1 #ifndef CLUSTERING_H
2 #define CLUSTERING_H
3 /*
4  *  abundances.h
5  *  cufflinks
6  *
7  *  Created by Cole Trapnell on 4/27/09.
8  *  Copyright 2009 Cole Trapnell. All rights reserved.
9  *
10  */
11 
12 #ifdef HAVE_CONFIG_H
13 #include <config.h>
14 #endif
15 
16 // DON'T move this, or mystery compiler errors will result. Affects gcc >= 4.1
17 #include <boost/graph/vector_as_graph.hpp>
18 
19 #include <boost/graph/adjacency_list.hpp>
20 #include <boost/graph/depth_first_search.hpp>
21 #include <boost/graph/visitors.hpp>
22 #include <boost/graph/graph_traits.hpp>
23 #include <boost/graph/connected_components.hpp>
24 
25 #ifdef DEBUG
26 #include <boost/numeric/ublas/io.hpp>
27 #endif
28 
29 #include <boost/version.hpp>
30 
31 #if (BOOST_VERSION < 103800)
32 #include <boost/vector_property_map.hpp>
33 #else
34 #include <boost/property_map/vector_property_map.hpp>
35 #endif
36 
37 
38 #include "abundances.h"
39 
40 typedef boost::adjacency_list <boost::vecS, boost::vecS, boost::undirectedS> AbundanceGraph;
41 
42 struct ConnectByExonOverlap
43 {
44 	void operator()(const AbundanceGroup& cluster,
45 					AbundanceGraph& G);
46 };
47 
48 struct ConnectByAnnotatedGeneId
49 {
50 	void operator()(const AbundanceGroup& cluster,
51 					AbundanceGraph& G);
52 };
53 
54 struct ConnectByAnnotatedTssId
55 {
56 	void operator()(const AbundanceGroup& cluster,
57 					AbundanceGraph& G);
58 };
59 
60 struct ConnectByAnnotatedProteinId
61 {
62 	void operator()(const AbundanceGroup& cluster,
63 					AbundanceGraph& G);
64 };
65 
66 struct ConnectByStrand
67 {
68 	void operator()(const AbundanceGroup& cluster,
69 					AbundanceGraph& G);
70 };
71 
72 // A "transcript cluster is a set of transcripts whose projections into the
73 // genome overlap on the same strand.  They may thus share fragment alignments,
74 // and so they need to be quantitated together.  After quantitation, they
75 // can be picked apart.
76 template<class cluster_policy>
77 void cluster_transcripts(const AbundanceGroup& transfrags,
78 						 vector<AbundanceGroup>& transfrags_by_cluster,
79 						 ublas::matrix<double>* new_gamma = NULL,
80                          ublas::matrix<double>* new_iterated_count = NULL,
81                          ublas::matrix<double>* new_count = NULL,
82                          ublas::matrix<double>* new_fpkm = NULL)
83 {
84 	boost::adjacency_list <boost::vecS, boost::vecS, boost::undirectedS> G;
85 
86 	transfrags_by_cluster.clear();
87 
88 	cluster_policy cp;
89 
90 	cp(transfrags, G);
91 
92 	std::vector<int> component(num_vertices(G));
93 	connected_components(G, &component[0]);
94 
95 	vector<vector<bool> > clusters(transfrags.abundances().size(),
96 								   vector<bool>(transfrags.abundances().size(), false));
97 
98 	vector<vector<size_t> > cluster_indices(transfrags.abundances().size());
99 	for (size_t i = 0; i < transfrags.abundances().size(); ++i)
100 	{
101         clusters[component[i]][i] = true;
102         cluster_indices[component[i]].push_back(i);
103 	}
104 	for (size_t i = 0; i < cluster_indices.size(); ++i)
105 	{
106 		if (cluster_indices[i].empty())
107 		{
108 			cluster_indices.resize(i);
109 			break;
110 		}
111 	}
112 
113 	for (size_t i = 0; i < clusters.size(); ++i)
114 	{
115 		AbundanceGroup cluster;
116 		transfrags.filter_group(clusters[i], cluster);
117 		if (!cluster.abundances().empty())
118 			transfrags_by_cluster.push_back(cluster);
119 	}
120 
121 	if (new_gamma != NULL)
122 	{
123 		const ublas::matrix<double>& trans_gamma_cov = transfrags.gamma_cov();
124         const ublas::matrix<double>& trans_iterated_count_cov = transfrags.iterated_count_cov();
125         const ublas::matrix<double>& trans_count_cov = transfrags.count_cov();
126         const ublas::matrix<double>& trans_fpkm_cov = transfrags.fpkm_cov();
127 
128 		ublas::matrix<double>& cov = *new_gamma;
129         ublas::matrix<double>& iterated_count_cov = *new_iterated_count;
130         ublas::matrix<double>& count_cov = *new_count;
131         ublas::matrix<double>& fpkm_cov = *new_fpkm;
132 
133 		// number of primary transcripts for this gene
134 		size_t num_pt = cluster_indices.size();
135 		cov = ublas::zero_matrix<double>(num_pt, num_pt);
136 
137         count_cov = ublas::zero_matrix<double>(num_pt, num_pt);
138         iterated_count_cov = ublas::zero_matrix<double>(num_pt, num_pt);
139         fpkm_cov = ublas::zero_matrix<double>(num_pt, num_pt);
140 
141 		//cerr << "combined " << combined << endl;
142 
143 		//cerr << "locus isoform gamma cov" << gamma_cov << endl;
144 		for (size_t L = 0; L < cluster_indices.size(); ++L)
145 		{
146 			const vector<size_t>& L_isos = cluster_indices[L];
147 			for (size_t K = 0; K < cluster_indices.size(); ++K)
148 			{
149 				const vector<size_t>& K_isos = cluster_indices[K];
150 				for (size_t l = 0; l < L_isos.size(); ++l)
151 				{
152 					for (size_t k = 0; k < K_isos.size(); ++k)
153 					{
154 						cov(L,K) += trans_gamma_cov(L_isos[l],K_isos[k]);
155                         count_cov(L,K) += trans_count_cov(L_isos[l],K_isos[k]);
156                         iterated_count_cov(L,K) += trans_iterated_count_cov(L_isos[l],K_isos[k]);
157                         fpkm_cov(L,K) += trans_fpkm_cov(L_isos[l],K_isos[k]);
158 					}
159 				}
160 			}
161 		}
162 	}
163 }
164 
165 #endif
166 
167