1#!/usr/bin/env python
2
3""" MultiQC module to parse output from SexdetErrmine """
4
5from __future__ import print_function
6from collections import OrderedDict
7import logging
8import json
9
10from multiqc.plots import bargraph, scatter
11from multiqc.modules.base_module import BaseMultiqcModule
12
13# Initialise the logger
14log = logging.getLogger(__name__)
15
16
17class MultiqcModule(BaseMultiqcModule):
18    """ SexDeterrmine module """
19
20    def __init__(self):
21
22        # Initialise the parent object
23        super(MultiqcModule, self).__init__(
24            name="SexDetErrmine",
25            anchor="sexdeterrmine",
26            href="https://github.com/TCLamnidis/Sex.DetERRmine",
27            info="""A python script to calculate the relative coverage of X and Y chromosomes,
28            and their associated error bars, from the depth of coverage at specified SNPs. """,
29        )
30
31        # Find and load any DeDup reports
32        self.sexdet_data = dict()
33
34        # Find and load JSON file
35        for f in self.find_log_files("sexdeterrmine", filehandles=True):
36            self.parse_data(f)
37
38        # Filter samples
39        self.sexdet_data = self.ignore_samples(self.sexdet_data)
40
41        # Return if no samples found
42        if len(self.sexdet_data) == 0:
43            raise UserWarning
44
45        # Save data output file
46        self.write_data_file(self.sexdet_data, "multiqc_sexdeter_metrics")
47
48        # Add to General Statistics
49        self.addSummaryMetrics()
50
51        # Plots
52        self.snp_rate_scatterplot()
53        self.read_count_barplot()
54        self.snp_count_barplot()
55
56    def parse_data(self, f):
57        try:
58            data = json.load(f["f"])
59        except Exception as e:
60            log.debug(e)
61            log.warning("Could not parse SexDeterrmine JSON: '{}'".format(f["fn"]))
62            return
63
64        # Parse JSON data to a dict
65        for s_name in data:
66            if s_name == "Metadata":
67                continue
68
69            s_clean = self.clean_s_name(s_name, f["root"])
70            if s_clean in self.sexdet_data:
71                log.debug("Duplicate sample name found! Overwriting: {}".format(s_clean))
72
73            self.add_data_source(f, s_clean)
74            self.sexdet_data[s_clean] = dict()
75
76            for k, v in data[s_name].items():
77                try:
78                    self.sexdet_data[s_clean][k] = float(v)
79                except ValueError:
80                    self.sexdet_data[s_clean][k] = v
81
82    def addSummaryMetrics(self):
83        """ Take the parsed stats from SexDetErrmine and add it to the main plot """
84
85        headers = OrderedDict()
86        headers["RateErrX"] = {
87            "title": "Err Rate X",
88            "description": "Rate of Error for Chr X",
89            "scale": "OrRd",
90            "hidden": True,
91            "shared_key": "snp_err_rate",
92        }
93        headers["RateErrY"] = {
94            "title": "Err Rate Y",
95            "description": "Rate of Error for Chr Y",
96            "scale": "OrRd",
97            "hidden": True,
98            "shared_key": "snp_err_rate",
99        }
100        headers["RateX"] = {
101            "title": "Rate X",
102            "description": "Number of positions on Chromosome X vs Autosomal positions.",
103            "scale": "PuBuGn",
104            "shared_key": "snp_count",
105        }
106        headers["RateY"] = {
107            "title": "Rate Y",
108            "description": "Number of positions on Chromosome Y vs Autosomal positions.",
109            "scale": "BuPu",
110            "shared_key": "snp_count",
111        }
112
113        self.general_stats_addcols(self.sexdet_data, headers)
114
115    def read_count_barplot(self):
116        """Make a bar plot showing read counts on Autosomal, X and Y chr"""
117        cats = OrderedDict()
118        cats["NR Aut"] = {"name": "Autosomal Reads"}
119        cats["NrX"] = {"name": "Reads on X"}
120        cats["NrY"] = {"name": "Reads on Y"}
121
122        config = {"id": "sexdeterrmine-readcounts-plot", "title": "SexDetErrmine: Read Counts", "ylab": "# Reads"}
123
124        self.add_section(
125            name="Read Counts",
126            anchor="sexdeterrmine-readcounts",
127            description="The number of reads covering positions on the autosomes, X and Y chromosomes.",
128            plot=bargraph.plot(self.sexdet_data, cats, config),
129        )
130
131    def snp_rate_scatterplot(self):
132        """Make a scatter plot showing relative coverage on X and Y chr"""
133        data = OrderedDict()
134        for sample in self.sexdet_data:
135            try:
136                data[sample] = {"x": self.sexdet_data[sample]["RateX"], "y": self.sexdet_data[sample]["RateY"]}
137            except KeyError:
138                pass
139
140        config = {
141            "id": "sexdeterrmine-rate-plot",
142            "title": "SexDetErrmine: Relative coverage",
143            "ylab": "Relative Cov. on Y",
144            "xlab": "Relative Cov. on X",
145        }
146
147        if len(data) > 0:
148            self.add_section(
149                name="Relative Coverage",
150                anchor="sexdeterrmine-rates",
151                description="The coverage on the X vs Y chromosome, relative to coverage on the Autosomes.",
152                helptext="""
153                Males are expected to have a roughly equal X- and Y-rates, while females are expected to have a Y-rate of 0 and an X-rate of 1.
154                Placement between the two clusters can be indicative of contamination, while placement with higher than expected X- and/or Y-rates can be indicative of sex chromosome aneuploidy.
155                """,
156                plot=scatter.plot(data, config),
157            )
158
159    def snp_count_barplot(self):
160        """Make a bar plot showing read counts on Autosomal, X and Y chr"""
161        cats = OrderedDict()
162        cats["Snps Autosomal"] = {"name": "Autosomal SNPs"}
163        cats["XSnps"] = {"name": "SNPs on X"}
164        cats["YSnps"] = {"name": "SNPs on Y"}
165
166        config = {"id": "sexdeterrmine-snps-plot", "title": "SexDetErrmine: SNP Counts", "ylab": "# Reads"}
167
168        self.add_section(
169            name="SNP Counts",
170            anchor="sexdeterrmine-snps",
171            description="Total number of SNP positions. When supplied with a BED file, this includes only positions specified there.",
172            plot=bargraph.plot(self.sexdet_data, cats, config),
173        )
174