1 package org.broadinstitute.hellbender.tools.walkers.annotator; 2 3 import com.google.common.annotations.VisibleForTesting; 4 import htsjdk.variant.variantcontext.Allele; 5 import htsjdk.variant.variantcontext.Genotype; 6 import htsjdk.variant.variantcontext.VariantContext; 7 import htsjdk.variant.variantcontext.VariantContextBuilder; 8 import htsjdk.variant.vcf.VCFConstants; 9 import htsjdk.variant.vcf.VCFInfoHeaderLine; 10 import htsjdk.variant.vcf.VCFStandardHeaderLines; 11 import org.broadinstitute.barclay.argparser.Argument; 12 import org.broadinstitute.barclay.help.DocumentedFeature; 13 import org.broadinstitute.hellbender.engine.ReferenceContext; 14 import org.broadinstitute.hellbender.exceptions.UserException; 15 import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.ReducibleAnnotation; 16 import org.broadinstitute.hellbender.tools.walkers.annotator.allelespecific.ReducibleAnnotationData; 17 import org.broadinstitute.hellbender.utils.QualityUtils; 18 import org.broadinstitute.hellbender.utils.Utils; 19 import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods; 20 import org.broadinstitute.hellbender.utils.help.HelpConstants; 21 import org.broadinstitute.hellbender.utils.logging.OneShotLogger; 22 import org.broadinstitute.hellbender.utils.read.GATKRead; 23 import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; 24 import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines; 25 import org.broadinstitute.hellbender.utils.variant.VariantContextGetters; 26 27 import java.util.*; 28 29 30 /** 31 * Root Mean Square of the mapping quality of reads across all samples. 32 * 33 * <p>This annotation provides an estimation of the overall mapping quality of reads supporting a variant call, averaged over all samples in a cohort.</p> 34 * 35 * <p>The raw data format for this annotation consists of a list of two entries: the sum of the squared mapping qualities and the number of reads across variant (not homRef) genotypes</p> 36 * 37 * <h3>Statistical notes</h3> 38 * <p>The root mean square is equivalent to the mean of the mapping qualities plus the standard deviation of the mapping qualities.</p> 39 * 40 * <h3>Related annotations</h3> 41 * <ul> 42 * <li><b><a href="https://www.broadinstitute.org/gatk/guide/tooldocs/org_broadinstitute_gatk_tools_walkers_annotator_MappingQualityRankSumTest.php">MappingQualityRankSumTest</a></b> compares the mapping quality of reads supporting the REF and ALT alleles.</li> 43 * </ul> 44 * 45 */ 46 @DocumentedFeature(groupName=HelpConstants.DOC_CAT_ANNOTATORS, groupSummary=HelpConstants.DOC_CAT_ANNOTATORS_SUMMARY, summary="Root mean square of the mapping quality of reads across all samples (MQ)") 47 public final class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation, ReducibleAnnotation { 48 private static final OneShotLogger logger = new OneShotLogger(RMSMappingQuality.class); 49 private static final RMSMappingQuality instance = new RMSMappingQuality(); 50 private static final int NUM_LIST_ENTRIES = 2; 51 private static final int SUM_OF_SQUARES_INDEX = 0; 52 private static final int TOTAL_DEPTH_INDEX = 1; 53 private static final String OUTPUT_PRECISION = "%.2f"; 54 public static final String RMS_MAPPING_QUALITY_OLD_BEHAVIOR_OVERRIDE_ARGUMENT = "allow-old-rms-mapping-quality-annotation-data"; 55 56 @Argument(fullName = RMS_MAPPING_QUALITY_OLD_BEHAVIOR_OVERRIDE_ARGUMENT, doc="Override to allow old RMSMappingQuality annotated VCFs to function", optional=true) 57 public boolean allowOlderRawKeyValues = false; 58 59 @Override getPrimaryRawKey()60 public String getPrimaryRawKey() { return GATKVCFConstants.RAW_MAPPING_QUALITY_WITH_DEPTH_KEY; } //new key for the two-value MQ data to prevent version mismatch catastrophes 61 62 /** 63 * @return true if annotation has secondary raw keys 64 */ 65 @Override hasSecondaryRawKeys()66 public boolean hasSecondaryRawKeys() { 67 return false; 68 } 69 70 /** 71 * Get additional raw key strings that are not the primary key 72 * 73 * @return may be null 74 */ 75 @Override getSecondaryRawKeys()76 public List<String> getSecondaryRawKeys() { 77 return null; 78 } 79 getDeprecatedRawKeyName()80 public static String getDeprecatedRawKeyName() { return GATKVCFConstants.RAW_RMS_MAPPING_QUALITY_DEPRECATED;} //old key that used the janky depth estimation method 81 82 @Override getKeyNames()83 public List<String> getKeyNames() { 84 final List<String> allKeys = new ArrayList<>(); 85 allKeys.add(VCFConstants.RMS_MAPPING_QUALITY_KEY); 86 allKeys.addAll(getRawKeyNames()); 87 return allKeys; 88 } 89 90 @Override getDescriptions()91 public List<VCFInfoHeaderLine> getDescriptions() { 92 return Arrays.asList(VCFStandardHeaderLines.getInfoLine(getKeyNames().get(0))); 93 } 94 95 @Override getRawDescriptions()96 public List<VCFInfoHeaderLine> getRawDescriptions() { 97 final List<VCFInfoHeaderLine> lines = new ArrayList<>(1); 98 for (final String rawKey : getRawKeyNames()) { 99 lines.add(GATKVCFHeaderLines.getInfoLine(rawKey)); 100 } 101 return lines; 102 } 103 104 /** 105 * Generate the raw data necessary to calculate the annotation. Raw data is the final endpoint for gVCFs. 106 */ 107 @Override annotateRawData(final ReferenceContext ref, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele> likelihoods)108 public Map<String, Object> annotateRawData(final ReferenceContext ref, 109 final VariantContext vc, 110 final AlleleLikelihoods<GATKRead, Allele> likelihoods){ 111 Utils.nonNull(vc); 112 if (likelihoods == null || likelihoods.evidenceCount() == 0) { 113 return Collections.emptyMap(); 114 } 115 116 final Map<String, Object> annotations = new HashMap<>(); 117 final ReducibleAnnotationData<List<Long>> myData = new ReducibleAnnotationData<>(null); 118 calculateRawData(vc, likelihoods, myData); 119 final String annotationString = makeRawAnnotationString(vc.getAlleles(), myData.getAttributeMap()); 120 annotations.put(getPrimaryRawKey(), annotationString); 121 return annotations; 122 } 123 124 @Override 125 @SuppressWarnings({"unchecked", "rawtypes"})//FIXME generics here blow up combineRawData(final List<Allele> vcAlleles, final List<ReducibleAnnotationData<?>> annotationList)126 public Map<String, Object> combineRawData(final List<Allele> vcAlleles, final List<ReducibleAnnotationData<?>> annotationList) { 127 //VC already contains merged alleles from ReferenceConfidenceVariantContextMerger 128 ReducibleAnnotationData combinedData = new ReducibleAnnotationData(null); 129 130 for (final ReducibleAnnotationData currentValue : annotationList) { 131 parseRawDataString(currentValue); 132 combineAttributeMap(currentValue, combinedData); 133 } 134 final Map<String, Object> annotations = new HashMap<>(); 135 String annotationString = makeRawAnnotationString(vcAlleles, combinedData.getAttributeMap()); 136 annotations.put(getPrimaryRawKey(), annotationString); 137 return annotations; 138 } 139 makeRawAnnotationString(final List<Allele> vcAlleles, final Map<Allele, List<Long>> perAlleleData)140 private String makeRawAnnotationString(final List<Allele> vcAlleles, final Map<Allele, List<Long>> perAlleleData) { 141 return String.format("%d,%d", perAlleleData.get(Allele.NO_CALL).get(SUM_OF_SQUARES_INDEX), perAlleleData.get(Allele.NO_CALL).get(TOTAL_DEPTH_INDEX)); 142 } 143 144 @Override 145 @SuppressWarnings({"unchecked", "rawtypes"})//FIXME generics here blow up finalizeRawData(final VariantContext vc, final VariantContext originalVC)146 public Map<String, Object> finalizeRawData(final VariantContext vc, final VariantContext originalVC) { 147 String rawMQdata; 148 if (vc.hasAttribute(getPrimaryRawKey())) { 149 rawMQdata = vc.getAttributeAsString(getPrimaryRawKey(), null); 150 } 151 else if (vc.hasAttribute(getDeprecatedRawKeyName())) { 152 if (!allowOlderRawKeyValues) { 153 throw new UserException.BadInput("Presence of '-"+getDeprecatedRawKeyName()+"' annotation is detected. This GATK version expects key " 154 + getPrimaryRawKey() + " with a tuple of sum of squared MQ values and total reads over variant " 155 + "genotypes as the value. This could indicate that the provided input was produced with an older version of GATK. " + 156 "Use the argument '--"+RMS_MAPPING_QUALITY_OLD_BEHAVIOR_OVERRIDE_ARGUMENT+"' to override and attempt the deprecated MQ calculation. There " + 157 "may be differences in how newer GATK versions calculate DP and MQ that may result in worse MQ results. Use at your own risk."); 158 } 159 160 rawMQdata = vc.getAttributeAsString(getDeprecatedRawKeyName(), null); 161 //the original version of ReblockGVCF produces a different MQ format -- try to handle that gracefully here just in case those files go through GenotypeGVCFs 162 if (vc.hasAttribute(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED)) { 163 logger.warn("Presence of " + GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED + " key indicates that this tool may be running on an older output of ReblockGVCF " + 164 "that may not have compatible annotations with this GATK version. Attempting to reformat MQ data."); 165 final String rawMQdepth = vc.getAttributeAsString(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED,null); 166 if (rawMQdepth == null) { 167 throw new UserException.BadInput("MQ annotation data is not properly formatted. This version expects a " + 168 "long tuple of sum of squared MQ values and total reads over variant genotypes."); 169 } 170 rawMQdata = Math.round(Double.parseDouble(rawMQdata)) + "," + rawMQdepth; //deprecated format was double so it needs to be converted to long 171 } 172 else { 173 logger.warn("MQ annotation data is not properly formatted. This GATK version expects key " 174 + getPrimaryRawKey() + " with a tuple of sum of squared MQ values and total reads over variant " 175 + "genotypes as the value. Attempting to use deprecated MQ calculation."); 176 final long numOfReads = getNumOfReads(vc, null); 177 rawMQdata = Math.round(Double.parseDouble(rawMQdata)) + "," + numOfReads; //deprecated format was double so it needs to be converted to long 178 } 179 } 180 else { 181 return new HashMap<>(); 182 } 183 if (rawMQdata == null) { 184 return new HashMap<>(); 185 } 186 187 ReducibleAnnotationData<List<Long>> myData = new ReducibleAnnotationData(rawMQdata); 188 parseRawDataString(myData); 189 190 final String annotationString = makeFinalizedAnnotationString(myData.getAttribute(Allele.NO_CALL).get(TOTAL_DEPTH_INDEX), myData.getAttribute(Allele.NO_CALL).get(SUM_OF_SQUARES_INDEX)); 191 return Collections.singletonMap(getKeyNames().get(0), annotationString); 192 } 193 makeFinalizedAnnotationString(final long numOfReads, final long sumOfSquaredMQs)194 private String makeFinalizedAnnotationString(final long numOfReads, final long sumOfSquaredMQs) { 195 return String.format(OUTPUT_PRECISION, Math.sqrt(sumOfSquaredMQs/(double)numOfReads)); 196 } 197 198 199 /** 200 * Combine the long tuples: sum of squared mapping quality and read counts over variant genotypes 201 * Since this is not an allele-specific annotation, store the result with the NO_CALL allele key. 202 * @param toAdd new data 203 * @param combined passed in as previous data, modified to store the combined result 204 */ combineAttributeMap(ReducibleAnnotationData<List<Long>> toAdd, ReducibleAnnotationData<List<Long>> combined)205 private void combineAttributeMap(ReducibleAnnotationData<List<Long>> toAdd, ReducibleAnnotationData<List<Long>> combined) { 206 if (combined.getAttribute(Allele.NO_CALL) != null) { 207 combined.putAttribute(Allele.NO_CALL, Arrays.asList(combined.getAttribute(Allele.NO_CALL).get(SUM_OF_SQUARES_INDEX) + toAdd.getAttribute(Allele.NO_CALL).get(SUM_OF_SQUARES_INDEX), 208 combined.getAttribute(Allele.NO_CALL).get(TOTAL_DEPTH_INDEX) + toAdd.getAttribute(Allele.NO_CALL).get(TOTAL_DEPTH_INDEX))); 209 } else { 210 combined.putAttribute(Allele.NO_CALL, toAdd.getAttribute(Allele.NO_CALL)); 211 } 212 } 213 214 @SuppressWarnings({"unchecked", "rawtypes"})//FIXME calculateRawData(final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele> likelihoods, final ReducibleAnnotationData rawAnnotations)215 private void calculateRawData(final VariantContext vc, 216 final AlleleLikelihoods<GATKRead, Allele> likelihoods, 217 final ReducibleAnnotationData rawAnnotations){ 218 //GATK3.5 had a double, but change this to an long for the tuple representation (square sum, read count) 219 long squareSum = 0; 220 long numReadsUsed = 0; 221 for (int i = 0; i < likelihoods.numberOfSamples(); i++) { 222 for (final GATKRead read : likelihoods.sampleEvidence(i)) { 223 long mq = read.getMappingQuality(); 224 if (mq != QualityUtils.MAPPING_QUALITY_UNAVAILABLE) { 225 squareSum += mq * mq; 226 numReadsUsed++; 227 } 228 } 229 } 230 rawAnnotations.putAttribute(Allele.NO_CALL, Arrays.asList(squareSum, numReadsUsed)); 231 } 232 233 @Override annotate(final ReferenceContext ref, final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele> likelihoods)234 public Map<String, Object> annotate(final ReferenceContext ref, 235 final VariantContext vc, 236 final AlleleLikelihoods<GATKRead, Allele> likelihoods) { 237 Utils.nonNull(vc); 238 if (likelihoods == null || likelihoods.evidenceCount() < 1 ) { 239 return new HashMap<>(); 240 } 241 242 final Map<String, Object> annotations = new HashMap<>(); 243 final ReducibleAnnotationData<List<Long>> myData = new ReducibleAnnotationData<>(null); 244 calculateRawData(vc, likelihoods, myData); 245 final String annotationString = makeFinalizedAnnotationString(myData.getAttribute(Allele.NO_CALL).get(TOTAL_DEPTH_INDEX), myData.getAttribute(Allele.NO_CALL).get(SUM_OF_SQUARES_INDEX)); 246 annotations.put(getKeyNames().get(0), annotationString); 247 return annotations; 248 } 249 250 @VisibleForTesting formattedValue(double rms)251 static String formattedValue(double rms) { 252 return String.format(OUTPUT_PRECISION, rms); 253 } 254 255 /** 256 * converts {@link GATKVCFConstants#RAW_MAPPING_QUALITY_WITH_DEPTH_KEY} into {@link VCFConstants#RMS_MAPPING_QUALITY_KEY} annotation if present 257 * NOTE: this is currently only used by HaplotypeCaller in VCF mode and GnarlyGenotyper 258 * @param vc which potentially contains rawMQ 259 * @return if vc contained {@link GATKVCFConstants#RAW_MAPPING_QUALITY_WITH_DEPTH_KEY} it will be replaced with {@link VCFConstants#RMS_MAPPING_QUALITY_KEY} 260 * otherwise return the original vc 261 */ finalizeRawMQ(final VariantContext vc)262 public VariantContext finalizeRawMQ(final VariantContext vc) { 263 final String rawMQdata = vc.getAttributeAsString(getPrimaryRawKey(), null); 264 if (rawMQdata == null) { 265 if (!vc.hasAttribute(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED)) { 266 return vc; 267 } 268 if (vc.hasAttribute(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED)) { 269 final int numOfReads = vc.getAttributeAsInt(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED, getNumOfReads(vc)); //MQ_DP is an undocumented hack for the Gnarly Pipeline -- improved version uses RAW_MQ_and_DP tuple format (see #4969) 270 final String deprecatedRawMQdata = vc.getAttributeAsString(getDeprecatedRawKeyName(), null); 271 final double squareSum = parseDeprecatedRawDataString(deprecatedRawMQdata); 272 final double rms = Math.sqrt(squareSum / (double)numOfReads); 273 final String finalizedRMSMAppingQuality = formattedValue(rms); 274 return new VariantContextBuilder(vc) 275 .rmAttribute(getDeprecatedRawKeyName()) //some old GVCFs that were reblocked for gnomAD have both 276 .rmAttributes(getRawKeyNames()) 277 .attribute(getKeyNames().get(0), finalizedRMSMAppingQuality) 278 .make(); 279 } 280 281 } else { 282 final List<Long> SSQMQandDP = parseRawDataString(rawMQdata); 283 final double rms = Math.sqrt(SSQMQandDP.get(SUM_OF_SQUARES_INDEX) / (double)SSQMQandDP.get(TOTAL_DEPTH_INDEX)); 284 final String finalizedRMSMAppingQuality = formattedValue(rms); 285 return new VariantContextBuilder(vc) 286 .rmAttribute(getDeprecatedRawKeyName()) //some old GVCFs that were reblocked for gnomAD have both 287 .rmAttributes(getRawKeyNames()) 288 .attribute(getKeyNames().get(0), finalizedRMSMAppingQuality) 289 .make(); 290 } 291 return vc; 292 } 293 parseRawDataString(ReducibleAnnotationData<List<Long>> myData)294 private void parseRawDataString(ReducibleAnnotationData<List<Long>> myData) { 295 myData.putAttribute(Allele.NO_CALL, parseRawDataString(myData.getRawData())); 296 } 297 298 //TODO once the AS annotations have been added genotype gvcfs this can be removed for a more generic approach parseRawDataString(String rawDataString)299 private static List<Long> parseRawDataString(String rawDataString) { 300 try { 301 final String[] parsed = rawDataString.trim().replaceAll(AnnotationUtils.BRACKET_REGEX, "").split(", *"); 302 if (parsed.length != NUM_LIST_ENTRIES) { 303 throw new UserException.BadInput("Raw value for annotation has " + parsed.length + " values, expected " + NUM_LIST_ENTRIES); 304 } 305 final long squareSum = Long.parseLong(parsed[SUM_OF_SQUARES_INDEX]); 306 final long totalDP = Long.parseLong(parsed[TOTAL_DEPTH_INDEX]); 307 return Arrays.asList(squareSum,totalDP); 308 } catch (final NumberFormatException e) { 309 throw new UserException.BadInput("malformed " + GATKVCFConstants.RAW_MAPPING_QUALITY_WITH_DEPTH_KEY + " annotation: " + rawDataString); 310 } 311 } 312 313 //Maintain some semblance of backward compatability by keeping the ability to use the old annotation key and format parseDeprecatedRawDataString(String rawDataString)314 private static double parseDeprecatedRawDataString(String rawDataString) { 315 try { 316 /* 317 * TODO: this is copied from gatk3 where it ignored all but the first value, we should figure out if this is 318 * the right thing to do or if it should just convert the string without trying to split it and fail if 319 * there is more than one value 320 */ 321 final double squareSum = Double.parseDouble(rawDataString.split(",")[0]); 322 return squareSum; 323 } catch (final NumberFormatException e) { 324 throw new UserException.BadInput("malformed " + getDeprecatedRawKeyName() + " annotation: " + rawDataString); 325 } 326 } 327 328 /** 329 * 330 * @return the number of reads at the given site, calculated as InfoField {@link VCFConstants#DEPTH_KEY} minus the 331 * format field {@link GATKVCFConstants#MIN_DP_FORMAT_KEY} or DP of each of the HomRef genotypes at that site 332 * @throws UserException.BadInput if the {@link VCFConstants#DEPTH_KEY} is missing or if the calculated depth is <= 0 333 */ 334 @VisibleForTesting getNumOfReads(final VariantContext vc)335 private static int getNumOfReads(final VariantContext vc) { 336 if(vc.hasAttribute(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED)) { 337 int mqDP = vc.getAttributeAsInt(GATKVCFConstants.MAPPING_QUALITY_DEPTH_DEPRECATED, 0); 338 if (mqDP > 0) { 339 return mqDP; 340 } 341 } 342 343 //don't use the full depth because we don't calculate MQ for reference blocks 344 //don't count spanning deletion calls towards number of reads 345 int numOfReads = vc.getAttributeAsInt(VCFConstants.DEPTH_KEY, -1); 346 if(vc.hasGenotypes()) { 347 for(final Genotype gt : vc.getGenotypes()) { 348 if(hasReferenceDepth(gt)) { 349 //site-level DP contribution will come from MIN_DP for gVCF-called reference variants or DP for BP resolution 350 if (gt.hasExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY)) { 351 numOfReads -= Integer.parseInt(gt.getExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY).toString()); 352 } else if (gt.hasDP()) { 353 numOfReads -= gt.getDP(); 354 } 355 } 356 else if(hasSpanningDeletionAllele(gt)) { 357 //site-level DP contribution will come from MIN_DP for gVCF-called reference variants or DP for BP resolution 358 if (gt.hasExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY)) { 359 numOfReads -= Integer.parseInt(gt.getExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY).toString()); 360 } else if (gt.hasDP()) { 361 numOfReads -= gt.getDP(); 362 } 363 } 364 } 365 } 366 if (numOfReads <= 0){ 367 numOfReads = -1; //return -1 to result in a NaN 368 } 369 return numOfReads; 370 } 371 372 //In the new reducible framework only samples that get annotated at the GVCF level contribute to MQ 373 //The problem is that DP includes those samples plus the min_DP of the homRef blocks, which don't contribute MQ 374 //The fix is to pull out reference blocks, whether or not they have a called GT, but don't subtract depth from PL=[0,0,0] sites because they're still "variant" 375 //This is still inaccurate if there's an annotated homRef in the GVCF, which does happen for really low evidence alleles, but we won't know after the samples are merged hasReferenceDepth(Genotype gt)376 private static boolean hasReferenceDepth(Genotype gt) { 377 return gt.isHomRef() || (gt.isNoCall() && gt.hasPL() && gt.getPL()[0] == 0 && gt.getPL()[1] != 0); 378 } 379 380 /** 381 * 382 * @return the number of reads at the given site, trying first {@Link GATKVCFConstants.RAW_MAPPING_QUALITY_WITH_DEPTH_KEY}, 383 * falling back to calculating the value as InfoField {@link VCFConstants#DEPTH_KEY} minus the 384 * format field {@link GATKVCFConstants#MIN_DP_FORMAT_KEY} or DP of each of the HomRef genotypes at that site. 385 * If neither of those is possible, will fall back to calculating the reads from the likelihoods data if provided. 386 * @throws UserException.BadInput if the {@link VCFConstants#DEPTH_KEY} is missing or if the calculated depth is <= 0 387 */ 388 @VisibleForTesting getNumOfReads(final VariantContext vc, final AlleleLikelihoods<GATKRead, Allele> likelihoods)389 static long getNumOfReads(final VariantContext vc, 390 final AlleleLikelihoods<GATKRead, Allele> likelihoods) { 391 if(vc.hasAttribute(GATKVCFConstants.RAW_MAPPING_QUALITY_WITH_DEPTH_KEY)) { 392 List<Long> mqTuple = VariantContextGetters.getAttributeAsLongList(vc, GATKVCFConstants.RAW_MAPPING_QUALITY_WITH_DEPTH_KEY,0L); 393 if (mqTuple.get(TOTAL_DEPTH_INDEX) > 0) { 394 return mqTuple.get(TOTAL_DEPTH_INDEX); 395 } 396 } 397 398 long numOfReads = 0; 399 if (vc.hasAttribute(VCFConstants.DEPTH_KEY)) { 400 numOfReads = VariantContextGetters.getAttributeAsLong(vc, VCFConstants.DEPTH_KEY, -1L); 401 if(vc.hasGenotypes()) { 402 for(final Genotype gt : vc.getGenotypes()) { 403 if(gt.isHomRef()) { 404 //site-level DP contribution will come from MIN_DP for gVCF-called reference variants or DP for BP resolution 405 if (gt.hasExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY)) { 406 numOfReads -= Long.parseLong(gt.getExtendedAttribute(GATKVCFConstants.MIN_DP_FORMAT_KEY).toString()); 407 } else if (gt.hasDP()) { 408 numOfReads -= gt.getDP(); 409 } 410 } 411 } 412 } 413 414 // If there is no depth key, try to compute from the likelihoods 415 } else if (likelihoods != null && likelihoods.numberOfAlleles() != 0) { 416 for (int i = 0; i < likelihoods.numberOfSamples(); i++) { 417 for (GATKRead read : likelihoods.sampleEvidence(i)) { 418 if (read.getMappingQuality() != QualityUtils.MAPPING_QUALITY_UNAVAILABLE) { 419 numOfReads++; 420 } 421 } 422 } 423 } 424 if (numOfReads <= 0) { 425 numOfReads = -1; //return -1 to result in a NaN 426 } 427 return numOfReads; 428 } 429 hasSpanningDeletionAllele(final Genotype gt)430 private static boolean hasSpanningDeletionAllele(final Genotype gt) { 431 for(final Allele a : gt.getAlleles()) { 432 boolean hasSpanningDeletion = GATKVCFConstants.isSpanningDeletion(a); 433 if(hasSpanningDeletion) { 434 return true; 435 } 436 } 437 return false; 438 } 439 getInstance()440 public static RMSMappingQuality getInstance() { 441 return instance; 442 } 443 } 444