1# This plots haplohseq posterior probability data for hg19 runs. 2# To support other genomes, the chromosome length definitions below 3# need to be altered. 4 5require("optparse") 6 7option_list = list( 8 make_option(c("-f", "--file"), type="character", default=NULL, 9 help="posterior output from haplohseq", metavar="character"), 10 make_option(c("-c", "--chromosome"), type="character", default="all", 11 help="chromosome to plot; all chromosomes by default", metavar="character"), 12 make_option(c("-o", "--out"), type="character", default=NULL, 13 help="output directory", metavar="character"), 14 make_option(c("-p", "--prefix"), type="character", default=NULL, 15 help="output file prefix", metavar="character") 16) 17 18opt_parser = OptionParser(option_list=option_list) 19opt = parse_args(opt_parser) 20posteriors = opt$file 21outputDir = opt$out 22outputPrefix = opt$prefix 23chromosome = opt$chromosome 24 25# hg19 chromosome lengths 26# http://www.ncbi.nlm.nih.gov/projects/genome/assembly/grc/human/data/index.shtml 27chrLen = c() 28chrLen["1"] <- 249250621 29chrLen["2"] <- 243199373 30chrLen["3"] <- 198022430 31chrLen["4"] <- 191154276 32chrLen["5"] <- 180915260 33chrLen["6"] <- 171115067 34chrLen["7"] <- 159138663 35chrLen["8"] <- 146364022 36chrLen["9"] <- 141213431 37chrLen["10"] <- 135534747 38chrLen["11"] <- 135006516 39chrLen["12"] <- 133851895 40chrLen["13"] <- 115169878 41chrLen["14"] <- 107349540 42chrLen["15"] <- 102531392 43chrLen["16"] <- 90354753 44chrLen["17"] <- 81195210 45chrLen["18"] <- 78077248 46chrLen["19"] <- 59128983 47chrLen["20"] <- 63025520 48chrLen["21"] <- 48129895 49chrLen["22"] <- 51304566 50chrLen["X"] <- 155270560 51chrLen["Y"] <- 59373566 52 53# For a given chromosome, this calculates a genomic start and end (for a plot with chromosomes appended to each other). 54setPlotStartEnd <- function(chromosome) { 55 plotStart <- c() 56 plotEnd <- c() 57 58 if (chromosome == "all") { 59 plotStart["1"] <- 1 60 plotEnd["1"] <- plotStart["1"] + chrLen["1"] - 1 61 62 for (chr in 2:22) { 63 plotStart[as.character(chr)] <- plotEnd[as.character(chr-1)] + 1 64 plotEnd[as.character(chr)] <- plotStart[as.character(chr)] + chrLen[as.character(chr)] - 1 65 } 66 plotStart["X"] <- plotEnd["22"] + 1 67 plotEnd["X"] <- plotStart["X"] + chrLen["X"] 68 plotStart["Y"] <- plotEnd["X"] + 1 69 plotEnd["Y"] <- plotStart["Y"] + chrLen["Y"] 70 71 maxPos <- plotEnd["22"] 72 73 } 74 else { 75 plotStart[chromosome] <- 1 76 plotEnd[chromosome] <- chrLen[chromosome] 77 maxPos <- plotEnd[chromosome] 78 } 79 80 list("maxPos" = maxPos, "plotStart" = plotStart, "plotEnd" = plotEnd) 81} 82 83chrNum <- function(chrName) { 84 substring(as.character(chrName), if (nchar(as.character(chrName)) > 2 && substring(as.character(chrName),0,3) == 'chr') {4} else 1) 85} 86 87chrNums <- function(chrNames) { 88 sapply(chrNames, chrNum) 89} 90 91# given a list of chromosomes, this returns a list of start positions (for an aggregate plot) 92plotStarts <- function(chrNums) { 93 starts <- rep(0, length(chrNums)) 94 for(i in 1:length(chrNums)) { 95 starts[i] <- plotStart[chrNums[i]] 96 } 97 return(starts) 98} 99 100# main plotting function 101plotHaplohseq <- function(chromosome, posteriorFile, plotStart, plotEnd, maxPos, title, title2) { 102 posteriorData <- read.table(posteriorFile, header=TRUE) 103 desc = tail(strsplit(posteriorFile,split="/")[[1]],n=1) 104 if (chromosome != "all") { 105 posteriorData <- posteriorData[chrNums(posteriorData$CHR)==chromosome,] 106 } 107 108 # plot RAFs 109 hap1RefFreqs <- posteriorData[posteriorData$HAP=="1",] 110 hap2RefFreqs <- posteriorData[posteriorData$HAP=="2",] 111 plot((plotStarts(chrNums(hap1RefFreqs$CHR)) + hap1RefFreqs$POS), hap1RefFreqs$RAF, ylim=c(0,1), xlim=c(0,maxPos), col="#0000FF88", xlab="", ylab="event prob", xaxt="n" , main=title, cex=0.4) 112 points((plotStarts(chrNums(hap2RefFreqs$CHR)) + hap2RefFreqs$POS), hap2RefFreqs$RAF, ylim=c(0,1), xlim=c(0,maxPos), col="#00FF0088", cex=0.4) 113 114 # plot posterior probabilities for up to 2 event states 115 lines(head((plotStarts(chrNums(posteriorData$CHR)) + posteriorData$POS), -1), head(posteriorData$S1, -1), col="#FF000088", lwd=3, cex=0.5) 116 lines(head((plotStarts(chrNums(posteriorData$CHR)) + posteriorData$POS), -1), head(posteriorData$S2, -1), col="#0000FF", lwd=3) 117 118 # add lines and text to demarcate chromosomes 119 currentGenomePosition = 0 120 abline(v=currentGenomePosition, lty=2, col="#AAAAAAFF") 121 if (chromosome == "all") { 122 for (chr in 1:22) { 123 previousGenomePosition = currentGenomePosition 124 abline(v=plotEnd[chr], lty=2, col="#AAAAAAFF") 125 currentGenomePosition <- plotEnd[chr] + 1 126 labelPosition = previousGenomePosition + (currentGenomePosition - previousGenomePosition)/2 127 text(c(labelPosition),c(0.02),cex=1, labels=c(paste0(chr)), font=2) 128 } 129 } 130} 131 132# initialize supporting variables 133params <- setPlotStartEnd(chromosome) 134maxPos <- params$maxPos 135plotStart <- params$plotStart 136plotEnd <- params$plotEnd 137 138# generate plot image 139suffix = "" 140if (chromosome != "all") { 141 suffix = paste("_chr",chromosome,sep="") 142} 143png(paste(outputDir,"/",outputPrefix,suffix,".png",sep=""),width=12,height=2,units="in",res=400) 144par(mar=c(0.5,1.75,1,1), oma=c(1.25,1,1,1), ps=8, mgp=c(1,0.2,0)) 145layout(matrix(c(1,1),1,1,byrow=TRUE),widths=c(1),heights=c(1)) 146plotHaplohseq(chromosome, posteriors, plotStart, plotEnd, maxPos, "", "") 147dev.off() 148cat(paste("haplohseq image generated: ", paste(outputDir,"/",outputPrefix,suffix,".png",sep=""), "\n", sep="")) 149 150 151