1#!/usr/bin/env Rscript --vanilla --slave
2
3# get the input VCF tabular format, assert that sites must have AC > 0
4vcf <- subset(read.table(pipe('cat /dev/stdin'), header=T), AC > 0)
5
6tag <- commandArgs(TRUE)[1]
7
8tag.genotypes_alternate_count <- paste(tag, '.genotypes.alternate_count', sep='')
9tag.non_reference_discrepancy_count <- paste(tag, '.site.non_reference_discrepancy.count', sep='')
10tag.non_reference_discrepancy_normalizer <- paste(tag, '.site.non_reference_discrepancy.normalizer', sep='')
11tag.non_reference_sensitivity_count <- paste(tag, '.site.non_reference_sensitivity.count', sep='')
12tag.non_reference_sensitivity_normalizer <- paste(tag, '.site.non_reference_sensitivity.normalizer', sep='')
13tag.alternate_positive_discrepancy <- paste(tag, '.site.alternate_positive_discrepancy', sep='')
14tag.alternate_negative_discrepancy <- paste(tag, '.site.alternate_negative_discrepancy', sep='')
15tag.has_variant <- paste(tag, '.has_variant', sep='')
16
17vcf.numberOfSites <- length(vcf[, tag.genotypes_alternate_count])
18vcf.totalAltAlleles <- sum(vcf[, tag.genotypes_alternate_count])
19vcf.positiveDiscrepancy <- sum(vcf[, tag.alternate_positive_discrepancy]) / sum(vcf[, tag.genotypes_alternate_count])
20vcf.negativeDiscrepancy <- sum(vcf[, tag.alternate_negative_discrepancy]) / sum(vcf[, tag.genotypes_alternate_count])
21vcf.sitesTruePositive <- sum(vcf[, tag.has_variant]) / nrow(vcf)
22
23cat('number of sites', vcf.numberOfSites, '\n')
24cat('total alternate alleles', vcf.totalAltAlleles, '\n')
25cat('positive discrepancy', vcf.positiveDiscrepancy, '\n')
26cat('negative discrepancy', vcf.negativeDiscrepancy, '\n')
27
28x <- cbind(by(vcf, vcf$AC,
29    function(x) {
30        sum(x[, tag.alternate_positive_discrepancy]) / sum(x[, tag.genotypes_alternate_count])
31    }))
32
33byac <- data.frame(ac=as.numeric(rownames(x)), fdr=as.vector(x))
34
35print(byac)
36
37
38