1# This plots haplohseq posterior probability data for hg19 runs.
2# To support other genomes, the chromosome length definitions below
3# need to be altered.
4
5require("optparse")
6
7option_list = list(
8  make_option(c("-f", "--file"), type="character", default=NULL,
9              help="posterior output from haplohseq", metavar="character"),
10  make_option(c("-c", "--chromosome"), type="character", default="all",
11              help="chromosome to plot; all chromosomes by default", metavar="character"),
12  make_option(c("-o", "--out"), type="character", default=NULL,
13              help="output directory", metavar="character"),
14  make_option(c("-p", "--prefix"), type="character", default=NULL,
15              help="output file prefix", metavar="character")
16)
17
18opt_parser = OptionParser(option_list=option_list)
19opt = parse_args(opt_parser)
20posteriors = opt$file
21outputDir = opt$out
22outputPrefix = opt$prefix
23chromosome = opt$chromosome
24
25# hg19 chromosome lengths
26# http://www.ncbi.nlm.nih.gov/projects/genome/assembly/grc/human/data/index.shtml
27chrLen = c()
28chrLen["1"] <- 249250621
29chrLen["2"] <- 243199373
30chrLen["3"] <- 198022430
31chrLen["4"] <- 191154276
32chrLen["5"] <- 180915260
33chrLen["6"] <- 171115067
34chrLen["7"] <- 159138663
35chrLen["8"] <- 146364022
36chrLen["9"] <- 141213431
37chrLen["10"] <- 135534747
38chrLen["11"] <- 135006516
39chrLen["12"] <- 133851895
40chrLen["13"] <- 115169878
41chrLen["14"] <- 107349540
42chrLen["15"] <- 102531392
43chrLen["16"] <- 90354753
44chrLen["17"] <- 81195210
45chrLen["18"] <- 78077248
46chrLen["19"] <- 59128983
47chrLen["20"] <- 63025520
48chrLen["21"] <- 48129895
49chrLen["22"] <- 51304566
50chrLen["X"] <- 155270560
51chrLen["Y"] <- 59373566
52
53# For a given chromosome, this calculates a genomic start and end (for a plot with chromosomes appended to each other).
54setPlotStartEnd <- function(chromosome) {
55	plotStart <- c()
56	plotEnd <- c()
57
58	if (chromosome == "all") {
59		plotStart["1"] <- 1
60		plotEnd["1"] <- plotStart["1"] + chrLen["1"] - 1
61
62		for (chr in 2:22) {
63			plotStart[as.character(chr)] <- plotEnd[as.character(chr-1)] + 1
64			plotEnd[as.character(chr)] <- plotStart[as.character(chr)] + chrLen[as.character(chr)] - 1
65		}
66		plotStart["X"] <- plotEnd["22"] + 1
67		plotEnd["X"] <- plotStart["X"] + chrLen["X"]
68		plotStart["Y"] <- plotEnd["X"] + 1
69		plotEnd["Y"] <- plotStart["Y"] + chrLen["Y"]
70
71		maxPos <- plotEnd["22"]
72
73	}
74	else {
75		plotStart[chromosome] <- 1
76		plotEnd[chromosome] <- chrLen[chromosome]
77		maxPos <- plotEnd[chromosome]
78	}
79
80	list("maxPos" = maxPos, "plotStart" = plotStart, "plotEnd" = plotEnd)
81}
82
83chrNum <- function(chrName) {
84	substring(as.character(chrName), if (nchar(as.character(chrName)) > 2 && substring(as.character(chrName),0,3) == 'chr') {4} else 1)
85}
86
87chrNums <- function(chrNames) {
88	sapply(chrNames, chrNum)
89}
90
91# given a list of chromosomes, this returns a list of start positions (for an aggregate plot)
92plotStarts <- function(chrNums) {
93	starts <- rep(0, length(chrNums))
94	for(i in 1:length(chrNums)) {
95		starts[i] <- plotStart[chrNums[i]]
96		}
97	return(starts)
98}
99
100# main plotting function
101plotHaplohseq <- function(chromosome, posteriorFile, plotStart, plotEnd, maxPos, title, title2) {
102	posteriorData <- read.table(posteriorFile, header=TRUE)
103	desc = tail(strsplit(posteriorFile,split="/")[[1]],n=1)
104	if (chromosome != "all") {
105		posteriorData <- posteriorData[chrNums(posteriorData$CHR)==chromosome,]
106	}
107
108	# plot RAFs
109	hap1RefFreqs <- posteriorData[posteriorData$HAP=="1",]
110	hap2RefFreqs <- posteriorData[posteriorData$HAP=="2",]
111	plot((plotStarts(chrNums(hap1RefFreqs$CHR)) + hap1RefFreqs$POS), hap1RefFreqs$RAF, ylim=c(0,1), xlim=c(0,maxPos), col="#0000FF88", xlab="", ylab="event prob", xaxt="n" , main=title, cex=0.4)
112	points((plotStarts(chrNums(hap2RefFreqs$CHR)) + hap2RefFreqs$POS), hap2RefFreqs$RAF, ylim=c(0,1), xlim=c(0,maxPos), col="#00FF0088", cex=0.4)
113
114	# plot posterior probabilities for up to 2 event states
115	lines(head((plotStarts(chrNums(posteriorData$CHR)) + posteriorData$POS), -1), head(posteriorData$S1, -1), col="#FF000088", lwd=3, cex=0.5)
116	lines(head((plotStarts(chrNums(posteriorData$CHR)) + posteriorData$POS), -1), head(posteriorData$S2, -1), col="#0000FF", lwd=3)
117
118	# add lines and text to demarcate chromosomes
119	currentGenomePosition = 0
120	abline(v=currentGenomePosition, lty=2, col="#AAAAAAFF")
121	if (chromosome == "all") {
122		for (chr in 1:22) {
123			previousGenomePosition = currentGenomePosition
124			abline(v=plotEnd[chr], lty=2, col="#AAAAAAFF")
125			currentGenomePosition <- plotEnd[chr] + 1
126			labelPosition = previousGenomePosition + (currentGenomePosition - previousGenomePosition)/2
127			text(c(labelPosition),c(0.02),cex=1, labels=c(paste0(chr)), font=2)
128		}
129	}
130}
131
132# initialize supporting variables
133params <- setPlotStartEnd(chromosome)
134maxPos <- params$maxPos
135plotStart <- params$plotStart
136plotEnd <- params$plotEnd
137
138# generate plot image
139suffix = ""
140if (chromosome != "all") {
141	suffix = paste("_chr",chromosome,sep="")
142}
143png(paste(outputDir,"/",outputPrefix,suffix,".png",sep=""),width=12,height=2,units="in",res=400)
144par(mar=c(0.5,1.75,1,1), oma=c(1.25,1,1,1), ps=8, mgp=c(1,0.2,0))
145layout(matrix(c(1,1),1,1,byrow=TRUE),widths=c(1),heights=c(1))
146plotHaplohseq(chromosome, posteriors, plotStart, plotEnd, maxPos, "", "")
147dev.off()
148cat(paste("haplohseq image generated: ", paste(outputDir,"/",outputPrefix,suffix,".png",sep=""), "\n", sep=""))
149
150
151