1#!/usr/bin/env python
2
3""" MultiQC module to parse output from Flexbar """
4
5from __future__ import print_function
6from collections import OrderedDict
7import logging
8import re
9
10from multiqc.plots import bargraph
11from multiqc.modules.base_module import BaseMultiqcModule
12
13# Initialise the logger
14log = logging.getLogger(__name__)
15
16
17class MultiqcModule(BaseMultiqcModule):
18    def __init__(self):
19
20        # Initialise the parent object
21        super(MultiqcModule, self).__init__(
22            name="Flexbar",
23            anchor="flexbar",
24            href="https://github.com/seqan/flexbar",
25            info="is a barcode and adapter removal tool.",
26        )
27
28        # Parse logs
29        self.flexbar_data = dict()
30        for f in self.find_log_files("flexbar", filehandles=True):
31            self.parse_flexbar(f)
32
33        # Filter to strip out ignored sample names
34        self.flexbar_data = self.ignore_samples(self.flexbar_data)
35
36        if len(self.flexbar_data) == 0:
37            raise UserWarning
38
39        log.info("Found {} logs".format(len(self.flexbar_data)))
40        self.write_data_file(self.flexbar_data, "multiqc_flexbar")
41
42        # Add drop rate to the general stats table
43        headers = {}
44        headers["removed_bases_pct"] = {
45            "title": "% bp Trimmed",
46            "description": "% Total Base Pairs removed",
47            "max": 100,
48            "min": 0,
49            "suffix": "%",
50            "scale": "YlOrRd",
51        }
52        self.general_stats_addcols(self.flexbar_data, headers)
53
54        # Make barplot
55        self.flexbar_barplot()
56
57    def parse_flexbar(self, f):
58        def _save_data(parsed_data):
59            if len(parsed_data) > 0:
60                # Calculate removed_bases
61                if "processed_bases" in parsed_data and "remaining_bases" in parsed_data:
62                    parsed_data["removed_bases"] = parsed_data["processed_bases"] - parsed_data["remaining_bases"]
63                    parsed_data["removed_bases_pct"] = (
64                        float(parsed_data["removed_bases"]) / float(parsed_data["processed_bases"])
65                    ) * 100.0
66                if s_name in self.flexbar_data:
67                    log.debug("Duplicate sample name found! Overwriting: {}".format(s_name))
68                self.flexbar_data[s_name] = parsed_data
69
70        regexes = {
71            "output_filename": r"Read file:\s+(.+)$",
72            "processed_reads": r"Processed reads\s+(\d+)",
73            "skipped_due_to_uncalled_bases": r"skipped due to uncalled bases\s+(\d+)",
74            "short_prior_to_adapter_removal": r"short prior to adapter removal\s+(\d+)",
75            "finally_skipped_short_reads": r"finally skipped short reads\s+(\d+)",
76            "discarded_reads_overall": r"Discarded reads overall\s+(\d+)",
77            "remaining_reads": r"Remaining reads\s+(\d+)",
78            "processed_bases": r"Processed bases:?\s+(\d+)",
79            "remaining_bases": r"Remaining bases:?\s+(\d+)",
80        }
81        s_name = f["s_name"]
82        parsed_data = dict()
83        for l in f["f"]:
84            for k, r in regexes.items():
85                match = re.search(r, l)
86                if match:
87                    if k == "output_filename":
88                        s_name = self.clean_s_name(match.group(1), f["root"])
89                    else:
90                        parsed_data[k] = int(match.group(1))
91
92            # End of log output. Save and reset in case of more logs.
93            if "Flexbar completed" in l:
94                _save_data(parsed_data)
95                s_name = f["s_name"]
96                parsed_data = dict()
97
98        # Pick up any partial logs
99        _save_data(parsed_data)
100
101    def flexbar_barplot(self):
102        """ Make the HighCharts HTML to plot the flexbar rates """
103
104        # Specify the order of the different possible categories
105        keys = OrderedDict()
106        keys["remaining_reads"] = {"color": "#437bb1", "name": "Remaining reads"}
107        keys["skipped_due_to_uncalled_bases"] = {"color": "#e63491", "name": "Skipped due to uncalled bases"}
108        keys["short_prior_to_adapter_removal"] = {"color": "#b1084c", "name": "Short prior to adapter removal"}
109        keys["finally_skipped_short_reads"] = {"color": "#7f0000", "name": "Finally skipped short reads"}
110
111        # Config for the plot
112        pconfig = {
113            "id": "flexbar_plot",
114            "title": "Flexbar: Processed Reads",
115            "ylab": "# Reads",
116            "cpswitch_counts_label": "Number of Reads",
117            "hide_zero_cats": False,
118        }
119
120        self.add_section(plot=bargraph.plot(self.flexbar_data, keys, pconfig))
121