1#!/usr/bin/env python2
2# encoding: utf-8
3"""
4cuffdiff_to_gct.py
5
6Created by Cole Trapnell on 2010-08-30.
7Copyright (c) 2010 Cole Trapnell. All rights reserved.
8"""
9
10import sys
11import getopt
12import math
13
14help_message = '''
15This script transforms a Cuffdiff FPKM tracking file to an IGV-compatible GCT expression file
16
17Usage:
18    cuffdiff_to_gct.py <input.fpkm_tracking> <output.gct>
19'''
20
21
22class Usage(Exception):
23    def __init__(self, msg):
24        self.msg = msg
25
26
27def main(argv=None):
28    if argv is None:
29        argv = sys.argv
30    try:
31        try:
32            opts, args = getopt.getopt(argv[1:], "ho:v", ["help", "output="])
33        except getopt.error, msg:
34            raise Usage(msg)
35
36        # option processing
37        for option, value in opts:
38            if option == "-v":
39                verbose = True
40            if option in ("-h", "--help"):
41                raise Usage(help_message)
42            if option in ("-o", "--output"):
43                output = value
44
45        if len(args) < 2:
46            raise Usage(help_message)
47
48        fpkm_tracking = open(args[0])
49        gct_out = open(args[1], "w")
50
51        num_descriptor_cols = 6
52        sample_names = []
53        while True:
54            h = fpkm_tracking.readline()
55            h = h.strip()
56            if h != "":
57                h_cols = h.split("\t")
58                if len(h_cols) < 12:
59                    print >> sys.stderr, "Error: malformed header"
60                    sys.exit(1)
61                num_samples = (len(h_cols) - num_descriptor_cols) / 3
62                for i in range(0, num_samples):
63                    FPKM_label = h_cols[num_descriptor_cols + (3 * i)]
64                    name_end = FPKM_label.rfind("_FPKM")
65                    name = FPKM_label[:name_end]
66                    sample_names.append(name)
67                break
68
69        #print sample_names
70        expr_records = []
71        for line in fpkm_tracking.readlines():
72            line = line.strip()
73            if line == "":
74                continue
75            cols = line.split("\t")
76            if len(cols) != 6 + (3 * num_samples):
77                print >> sys.stderr, "Error: malformed record"
78                sys.exit(1)
79            expr_strs = []
80            for i in range(0, num_samples):
81                FPKM_string = cols[num_descriptor_cols + (3 * i)]
82                FPKM = float(FPKM_string)
83                if math.isnan(FPKM) or math.isinf(FPKM):
84                    FPKM = 0.0
85                FPKM = str(FPKM)
86                expr_strs.append(FPKM)
87            rec = []
88            rec.append(cols[0])
89            desc = "na|@%s|" % cols[5]
90            rec.append(desc)
91            rec.extend(expr_strs)
92            expr_records.append(rec)
93
94        print >> gct_out, "#1.2"
95        print >> gct_out, "%d\t%d" %  (len(expr_records), num_samples)
96        print >> gct_out, "NAME\tDescription\t%s" % ("\t".join(sample_names))
97        for rec in expr_records:
98            print >> gct_out, "%s" % "\t".join(rec)
99
100
101    except Usage, err:
102        print >> sys.stderr, sys.argv[0].split("/")[-1] + ": " + str(err.msg)
103        print >> sys.stderr, "\t for help use --help"
104        return 2
105
106
107if __name__ == "__main__":
108    sys.exit(main())
109