1#!/usr/bin/env python2 2# encoding: utf-8 3""" 4cuffdiff_to_gct.py 5 6Created by Cole Trapnell on 2010-08-30. 7Copyright (c) 2010 Cole Trapnell. All rights reserved. 8""" 9 10import sys 11import getopt 12import math 13 14help_message = ''' 15This script transforms a Cuffdiff FPKM tracking file to an IGV-compatible GCT expression file 16 17Usage: 18 cuffdiff_to_gct.py <input.fpkm_tracking> <output.gct> 19''' 20 21 22class Usage(Exception): 23 def __init__(self, msg): 24 self.msg = msg 25 26 27def main(argv=None): 28 if argv is None: 29 argv = sys.argv 30 try: 31 try: 32 opts, args = getopt.getopt(argv[1:], "ho:v", ["help", "output="]) 33 except getopt.error, msg: 34 raise Usage(msg) 35 36 # option processing 37 for option, value in opts: 38 if option == "-v": 39 verbose = True 40 if option in ("-h", "--help"): 41 raise Usage(help_message) 42 if option in ("-o", "--output"): 43 output = value 44 45 if len(args) < 2: 46 raise Usage(help_message) 47 48 fpkm_tracking = open(args[0]) 49 gct_out = open(args[1], "w") 50 51 num_descriptor_cols = 6 52 sample_names = [] 53 while True: 54 h = fpkm_tracking.readline() 55 h = h.strip() 56 if h != "": 57 h_cols = h.split("\t") 58 if len(h_cols) < 12: 59 print >> sys.stderr, "Error: malformed header" 60 sys.exit(1) 61 num_samples = (len(h_cols) - num_descriptor_cols) / 3 62 for i in range(0, num_samples): 63 FPKM_label = h_cols[num_descriptor_cols + (3 * i)] 64 name_end = FPKM_label.rfind("_FPKM") 65 name = FPKM_label[:name_end] 66 sample_names.append(name) 67 break 68 69 #print sample_names 70 expr_records = [] 71 for line in fpkm_tracking.readlines(): 72 line = line.strip() 73 if line == "": 74 continue 75 cols = line.split("\t") 76 if len(cols) != 6 + (3 * num_samples): 77 print >> sys.stderr, "Error: malformed record" 78 sys.exit(1) 79 expr_strs = [] 80 for i in range(0, num_samples): 81 FPKM_string = cols[num_descriptor_cols + (3 * i)] 82 FPKM = float(FPKM_string) 83 if math.isnan(FPKM) or math.isinf(FPKM): 84 FPKM = 0.0 85 FPKM = str(FPKM) 86 expr_strs.append(FPKM) 87 rec = [] 88 rec.append(cols[0]) 89 desc = "na|@%s|" % cols[5] 90 rec.append(desc) 91 rec.extend(expr_strs) 92 expr_records.append(rec) 93 94 print >> gct_out, "#1.2" 95 print >> gct_out, "%d\t%d" % (len(expr_records), num_samples) 96 print >> gct_out, "NAME\tDescription\t%s" % ("\t".join(sample_names)) 97 for rec in expr_records: 98 print >> gct_out, "%s" % "\t".join(rec) 99 100 101 except Usage, err: 102 print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg) 103 print >> sys.stderr, "\t for help use --help" 104 return 2 105 106 107if __name__ == "__main__": 108 sys.exit(main()) 109