1#!/usr/bin/env python 2 3""" MultiQC module to parse output from Flexbar """ 4 5from __future__ import print_function 6from collections import OrderedDict 7import logging 8import re 9 10from multiqc.plots import bargraph 11from multiqc.modules.base_module import BaseMultiqcModule 12 13# Initialise the logger 14log = logging.getLogger(__name__) 15 16 17class MultiqcModule(BaseMultiqcModule): 18 def __init__(self): 19 20 # Initialise the parent object 21 super(MultiqcModule, self).__init__( 22 name="Flexbar", 23 anchor="flexbar", 24 href="https://github.com/seqan/flexbar", 25 info="is a barcode and adapter removal tool.", 26 ) 27 28 # Parse logs 29 self.flexbar_data = dict() 30 for f in self.find_log_files("flexbar", filehandles=True): 31 self.parse_flexbar(f) 32 33 # Filter to strip out ignored sample names 34 self.flexbar_data = self.ignore_samples(self.flexbar_data) 35 36 if len(self.flexbar_data) == 0: 37 raise UserWarning 38 39 log.info("Found {} logs".format(len(self.flexbar_data))) 40 self.write_data_file(self.flexbar_data, "multiqc_flexbar") 41 42 # Add drop rate to the general stats table 43 headers = {} 44 headers["removed_bases_pct"] = { 45 "title": "% bp Trimmed", 46 "description": "% Total Base Pairs removed", 47 "max": 100, 48 "min": 0, 49 "suffix": "%", 50 "scale": "YlOrRd", 51 } 52 self.general_stats_addcols(self.flexbar_data, headers) 53 54 # Make barplot 55 self.flexbar_barplot() 56 57 def parse_flexbar(self, f): 58 def _save_data(parsed_data): 59 if len(parsed_data) > 0: 60 # Calculate removed_bases 61 if "processed_bases" in parsed_data and "remaining_bases" in parsed_data: 62 parsed_data["removed_bases"] = parsed_data["processed_bases"] - parsed_data["remaining_bases"] 63 parsed_data["removed_bases_pct"] = ( 64 float(parsed_data["removed_bases"]) / float(parsed_data["processed_bases"]) 65 ) * 100.0 66 if s_name in self.flexbar_data: 67 log.debug("Duplicate sample name found! Overwriting: {}".format(s_name)) 68 self.flexbar_data[s_name] = parsed_data 69 70 regexes = { 71 "output_filename": r"Read file:\s+(.+)$", 72 "processed_reads": r"Processed reads\s+(\d+)", 73 "skipped_due_to_uncalled_bases": r"skipped due to uncalled bases\s+(\d+)", 74 "short_prior_to_adapter_removal": r"short prior to adapter removal\s+(\d+)", 75 "finally_skipped_short_reads": r"finally skipped short reads\s+(\d+)", 76 "discarded_reads_overall": r"Discarded reads overall\s+(\d+)", 77 "remaining_reads": r"Remaining reads\s+(\d+)", 78 "processed_bases": r"Processed bases:?\s+(\d+)", 79 "remaining_bases": r"Remaining bases:?\s+(\d+)", 80 } 81 s_name = f["s_name"] 82 parsed_data = dict() 83 for l in f["f"]: 84 for k, r in regexes.items(): 85 match = re.search(r, l) 86 if match: 87 if k == "output_filename": 88 s_name = self.clean_s_name(match.group(1), f["root"]) 89 else: 90 parsed_data[k] = int(match.group(1)) 91 92 # End of log output. Save and reset in case of more logs. 93 if "Flexbar completed" in l: 94 _save_data(parsed_data) 95 s_name = f["s_name"] 96 parsed_data = dict() 97 98 # Pick up any partial logs 99 _save_data(parsed_data) 100 101 def flexbar_barplot(self): 102 """ Make the HighCharts HTML to plot the flexbar rates """ 103 104 # Specify the order of the different possible categories 105 keys = OrderedDict() 106 keys["remaining_reads"] = {"color": "#437bb1", "name": "Remaining reads"} 107 keys["skipped_due_to_uncalled_bases"] = {"color": "#e63491", "name": "Skipped due to uncalled bases"} 108 keys["short_prior_to_adapter_removal"] = {"color": "#b1084c", "name": "Short prior to adapter removal"} 109 keys["finally_skipped_short_reads"] = {"color": "#7f0000", "name": "Finally skipped short reads"} 110 111 # Config for the plot 112 pconfig = { 113 "id": "flexbar_plot", 114 "title": "Flexbar: Processed Reads", 115 "ylab": "# Reads", 116 "cpswitch_counts_label": "Number of Reads", 117 "hide_zero_cats": False, 118 } 119 120 self.add_section(plot=bargraph.plot(self.flexbar_data, keys, pconfig)) 121