1#!/usr/bin/env Rscript --vanilla --slave 2 3# get the input VCF tabular format, assert that sites must have AC > 0 4vcf <- subset(read.table(pipe('cat /dev/stdin'), header=T), AC > 0) 5 6tag <- commandArgs(TRUE)[1] 7 8tag.genotypes_alternate_count <- paste(tag, '.genotypes.alternate_count', sep='') 9tag.non_reference_discrepancy_count <- paste(tag, '.site.non_reference_discrepancy.count', sep='') 10tag.non_reference_discrepancy_normalizer <- paste(tag, '.site.non_reference_discrepancy.normalizer', sep='') 11tag.non_reference_sensitivity_count <- paste(tag, '.site.non_reference_sensitivity.count', sep='') 12tag.non_reference_sensitivity_normalizer <- paste(tag, '.site.non_reference_sensitivity.normalizer', sep='') 13tag.alternate_positive_discrepancy <- paste(tag, '.site.alternate_positive_discrepancy', sep='') 14tag.alternate_negative_discrepancy <- paste(tag, '.site.alternate_negative_discrepancy', sep='') 15tag.has_variant <- paste(tag, '.has_variant', sep='') 16 17vcf.numberOfSites <- length(vcf[, tag.genotypes_alternate_count]) 18vcf.totalAltAlleles <- sum(vcf[, tag.genotypes_alternate_count]) 19vcf.positiveDiscrepancy <- sum(vcf[, tag.alternate_positive_discrepancy]) / sum(vcf[, tag.genotypes_alternate_count]) 20vcf.negativeDiscrepancy <- sum(vcf[, tag.alternate_negative_discrepancy]) / sum(vcf[, tag.genotypes_alternate_count]) 21vcf.sitesTruePositive <- sum(vcf[, tag.has_variant]) / nrow(vcf) 22 23cat('number of sites', vcf.numberOfSites, '\n') 24cat('total alternate alleles', vcf.totalAltAlleles, '\n') 25cat('positive discrepancy', vcf.positiveDiscrepancy, '\n') 26cat('negative discrepancy', vcf.negativeDiscrepancy, '\n') 27 28x <- cbind(by(vcf, vcf$AC, 29 function(x) { 30 sum(x[, tag.alternate_positive_discrepancy]) / sum(x[, tag.genotypes_alternate_count]) 31 })) 32 33byac <- data.frame(ac=as.numeric(rownames(x)), fdr=as.vector(x)) 34 35print(byac) 36 37 38