1#!/usr/local/bin/python3.8 2# 3# Plot the output of "bcftools +guess-ploidy -v" 4# 5# Copyright (C) 2016-2017 Genome Research Ltd. 6# 7# Author: Petr Danecek <pd3@sanger.ac.uk> 8# 9# Permission is hereby granted, free of charge, to any person obtaining a copy 10# of this software and associated documentation files (the "Software"), to deal 11# in the Software without restriction, including without limitation the rights 12# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13# copies of the Software, and to permit persons to whom the Software is 14# furnished to do so, subject to the following conditions: 15# 16# The above copyright notice and this permission notice shall be included in 17# all copies or substantial portions of the Software. 18# 19# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25# DEALINGS IN THE SOFTWARE. 26 27from __future__ import print_function 28 29import matplotlib as mpl 30mpl.use('Agg') 31import matplotlib.pyplot as plt 32import matplotlib.gridspec as gridspec 33import random, math, sys 34import csv 35csv.register_dialect('tab', delimiter='\t', quoting=csv.QUOTE_NONE) 36 37if len(sys.argv) != 3: 38 print("""\ 39About: Plot output of "bcftools +guess-ploidy -v" 40Usage: guess-ploidy.py <guess-ploidy.out> <image-prefix>""", file = sys.stderr) 41 sys.exit() 42 43prefix = sys.argv[2] 44dpi = 150 45 46def add_value(dat,key,x,y): 47 if key not in dat: 48 dat[key] = [] 49 dat[key].append([x,y]) 50 51smpl2sex = {} 52dat = {} 53with open(sys.argv[1]) as f: 54 reader = csv.reader(f, 'tab') 55 for row in reader: 56 if row[0][0]=="#": continue 57 if row[0]=="SEX": 58 smpl = row[1] 59 sex = row[2] 60 phap = float(row[3]) 61 pdip = float(row[4]) 62 ndat = float(row[5]) 63 score = float(row[6]) 64 smpl2sex[smpl] = sex 65 add_value(dat,'score',smpl,score) 66 add_value(dat,'phap',smpl,phap) 67 add_value(dat,'pdip',smpl,pdip) 68 add_value(dat,'ndat',smpl,ndat) 69 70def sort_by_val(arr): 71 for x in (sorted(arr, key=lambda x:x[1])): 72 id = len(smpl2id) 73 smpl2id[x[0]] = id 74 arr = sorted(arr, key=lambda x:smpl2id[x[0]]) 75 return arr 76 77def select_sex(arr,sex): 78 out = [] 79 for x in arr: 80 if smpl2sex[x[0]]==sex: out.append(x) 81 return out 82 83col = {} 84col['blue'] = '#396ab1' 85col['orange'] = '#da7c30' 86col['green'] = '#3e9651' 87col['red'] = '#cc2529' 88col['grey'] = '#000000' 89col['purple'] = '#6b4c9a' 90col['yellow'] = '#ccc210' 91 92if True: 93 fig,ax1 = plt.subplots(1,1,figsize=(6,4)) 94 smpl2id = {} 95 dat['score'] = sort_by_val(dat['score']) 96 dat['scoreM'] = select_sex(dat['score'],'M') 97 dat['scoreF'] = select_sex(dat['score'],'F') 98 ax2 = ax1.twinx() 99 plots = ax2.plot([smpl2id[x[0]] for x in dat['ndat']],[x[1] for x in dat['ndat']],'v',color=col['grey'],ms=2,label='Number of sites') 100 plots += ax1.plot([smpl2id[x[0]] for x in dat['phap']],[x[1] for x in dat['phap']],'.',color=col['blue'],ms=3,label='log P(haploid)') 101 plots += ax1.plot([smpl2id[x[0]] for x in dat['pdip']],[x[1] for x in dat['pdip']],'.',color=col['yellow'],ms=3,label='log P(diploid)') 102 plots += ax1.plot([smpl2id[x[0]] for x in dat['scoreM']],[x[1] for x in dat['scoreM']],'.',color=col['green'],label='Total score: Males') 103 plots += ax1.plot([smpl2id[x[0]] for x in dat['scoreF']],[x[1] for x in dat['scoreF']],'.',color=col['red'],label='Total score: Females') 104 labels = [l.get_label() for l in plots] 105 ax1.legend(plots,labels,loc='best', frameon=False, numpoints=1, prop={'size':9}) 106 ax1.set_zorder(ax2.get_zorder()+1) 107 ax1.patch.set_visible(False) 108 ax1.set_xlabel('Sample') 109 ax1.set_ylabel('Score') 110 ax2.set_ylabel('Number of sites') 111 ax2.set_yscale('log') 112 # ax1.set_yscale('log') 113 ax1.ticklabel_format(style='sci', scilimits=(-3,4), axis='x') 114 plt.subplots_adjust(left=0.13,right=0.89,bottom=0.13,top=0.9,hspace=0.1) 115 plt.savefig(prefix+'.png',dpi=dpi) 116 117plt.close() 118 119 120 121