1#!/usr/bin/env python 2 3""" MultiQC module to parse output from SexdetErrmine """ 4 5from __future__ import print_function 6from collections import OrderedDict 7import logging 8import json 9 10from multiqc.plots import bargraph, scatter 11from multiqc.modules.base_module import BaseMultiqcModule 12 13# Initialise the logger 14log = logging.getLogger(__name__) 15 16 17class MultiqcModule(BaseMultiqcModule): 18 """ SexDeterrmine module """ 19 20 def __init__(self): 21 22 # Initialise the parent object 23 super(MultiqcModule, self).__init__( 24 name="SexDetErrmine", 25 anchor="sexdeterrmine", 26 href="https://github.com/TCLamnidis/Sex.DetERRmine", 27 info="""A python script to calculate the relative coverage of X and Y chromosomes, 28 and their associated error bars, from the depth of coverage at specified SNPs. """, 29 ) 30 31 # Find and load any DeDup reports 32 self.sexdet_data = dict() 33 34 # Find and load JSON file 35 for f in self.find_log_files("sexdeterrmine", filehandles=True): 36 self.parse_data(f) 37 38 # Filter samples 39 self.sexdet_data = self.ignore_samples(self.sexdet_data) 40 41 # Return if no samples found 42 if len(self.sexdet_data) == 0: 43 raise UserWarning 44 45 # Save data output file 46 self.write_data_file(self.sexdet_data, "multiqc_sexdeter_metrics") 47 48 # Add to General Statistics 49 self.addSummaryMetrics() 50 51 # Plots 52 self.snp_rate_scatterplot() 53 self.read_count_barplot() 54 self.snp_count_barplot() 55 56 def parse_data(self, f): 57 try: 58 data = json.load(f["f"]) 59 except Exception as e: 60 log.debug(e) 61 log.warning("Could not parse SexDeterrmine JSON: '{}'".format(f["fn"])) 62 return 63 64 # Parse JSON data to a dict 65 for s_name in data: 66 if s_name == "Metadata": 67 continue 68 69 s_clean = self.clean_s_name(s_name, f["root"]) 70 if s_clean in self.sexdet_data: 71 log.debug("Duplicate sample name found! Overwriting: {}".format(s_clean)) 72 73 self.add_data_source(f, s_clean) 74 self.sexdet_data[s_clean] = dict() 75 76 for k, v in data[s_name].items(): 77 try: 78 self.sexdet_data[s_clean][k] = float(v) 79 except ValueError: 80 self.sexdet_data[s_clean][k] = v 81 82 def addSummaryMetrics(self): 83 """ Take the parsed stats from SexDetErrmine and add it to the main plot """ 84 85 headers = OrderedDict() 86 headers["RateErrX"] = { 87 "title": "Err Rate X", 88 "description": "Rate of Error for Chr X", 89 "scale": "OrRd", 90 "hidden": True, 91 "shared_key": "snp_err_rate", 92 } 93 headers["RateErrY"] = { 94 "title": "Err Rate Y", 95 "description": "Rate of Error for Chr Y", 96 "scale": "OrRd", 97 "hidden": True, 98 "shared_key": "snp_err_rate", 99 } 100 headers["RateX"] = { 101 "title": "Rate X", 102 "description": "Number of positions on Chromosome X vs Autosomal positions.", 103 "scale": "PuBuGn", 104 "shared_key": "snp_count", 105 } 106 headers["RateY"] = { 107 "title": "Rate Y", 108 "description": "Number of positions on Chromosome Y vs Autosomal positions.", 109 "scale": "BuPu", 110 "shared_key": "snp_count", 111 } 112 113 self.general_stats_addcols(self.sexdet_data, headers) 114 115 def read_count_barplot(self): 116 """Make a bar plot showing read counts on Autosomal, X and Y chr""" 117 cats = OrderedDict() 118 cats["NR Aut"] = {"name": "Autosomal Reads"} 119 cats["NrX"] = {"name": "Reads on X"} 120 cats["NrY"] = {"name": "Reads on Y"} 121 122 config = {"id": "sexdeterrmine-readcounts-plot", "title": "SexDetErrmine: Read Counts", "ylab": "# Reads"} 123 124 self.add_section( 125 name="Read Counts", 126 anchor="sexdeterrmine-readcounts", 127 description="The number of reads covering positions on the autosomes, X and Y chromosomes.", 128 plot=bargraph.plot(self.sexdet_data, cats, config), 129 ) 130 131 def snp_rate_scatterplot(self): 132 """Make a scatter plot showing relative coverage on X and Y chr""" 133 data = OrderedDict() 134 for sample in self.sexdet_data: 135 try: 136 data[sample] = {"x": self.sexdet_data[sample]["RateX"], "y": self.sexdet_data[sample]["RateY"]} 137 except KeyError: 138 pass 139 140 config = { 141 "id": "sexdeterrmine-rate-plot", 142 "title": "SexDetErrmine: Relative coverage", 143 "ylab": "Relative Cov. on Y", 144 "xlab": "Relative Cov. on X", 145 } 146 147 if len(data) > 0: 148 self.add_section( 149 name="Relative Coverage", 150 anchor="sexdeterrmine-rates", 151 description="The coverage on the X vs Y chromosome, relative to coverage on the Autosomes.", 152 helptext=""" 153 Males are expected to have a roughly equal X- and Y-rates, while females are expected to have a Y-rate of 0 and an X-rate of 1. 154 Placement between the two clusters can be indicative of contamination, while placement with higher than expected X- and/or Y-rates can be indicative of sex chromosome aneuploidy. 155 """, 156 plot=scatter.plot(data, config), 157 ) 158 159 def snp_count_barplot(self): 160 """Make a bar plot showing read counts on Autosomal, X and Y chr""" 161 cats = OrderedDict() 162 cats["Snps Autosomal"] = {"name": "Autosomal SNPs"} 163 cats["XSnps"] = {"name": "SNPs on X"} 164 cats["YSnps"] = {"name": "SNPs on Y"} 165 166 config = {"id": "sexdeterrmine-snps-plot", "title": "SexDetErrmine: SNP Counts", "ylab": "# Reads"} 167 168 self.add_section( 169 name="SNP Counts", 170 anchor="sexdeterrmine-snps", 171 description="Total number of SNP positions. When supplied with a BED file, this includes only positions specified there.", 172 plot=bargraph.plot(self.sexdet_data, cats, config), 173 ) 174