1 /*
2 * cluster.cpp
3 *
4 *
5 * Created by Pat Schloss on 8/14/08.
6 * Copyright 2008 Patrick D. Schloss. All rights reserved.
7 *
8 */
9
10 #include "cluster.hpp"
11 #include "rabundvector.hpp"
12 #include "listvector.hpp"
13
14 /***********************************************************************/
15
Cluster(RAbundVector * rav,ListVector * lv,SparseDistanceMatrix * dm,float c,string f,float cs)16 Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string f, float cs) :
17 rabund(rav), list(lv), dMatrix(dm), method(f), adjust(cs)
18 {
19 try {
20
21 mapWanted = false; //set to true by mgcluster to speed up overlap merge
22
23 //save so you can modify as it changes in average neighbor
24 cutoff = c;
25 m = MothurOut::getInstance();
26 }
27 catch(exception& e) {
28 m->errorOut(e, "Cluster", "Cluster");
29 exit(1);
30 }
31 }
32 /***********************************************************************/
clusterBins()33 void Cluster::clusterBins(){
34 try {
35 rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));
36 rabund->set(smallRow, 0);
37 rabund->setLabel(toString(smallDist));
38 }
39 catch(exception& e) {
40 m->errorOut(e, "Cluster", "clusterBins");
41 exit(1);
42 }
43 }
44 /***********************************************************************/
45
clusterNames()46 void Cluster::clusterNames(){
47 try {
48 if (mapWanted) { updateMap(); }
49
50 list->set(smallCol, list->get(smallRow)+','+list->get(smallCol));
51 list->set(smallRow, "");
52 list->setLabel(toString(smallDist));
53 }
54 catch(exception& e) {
55 m->errorOut(e, "Cluster", "clusterNames");
56 exit(1);
57 }
58 }
59 /***********************************************************************/
update(double & cutOFF)60 bool Cluster::update(double& cutOFF){
61 try {
62 smallCol = dMatrix->getSmallestCell(smallRow);
63 nColCells = dMatrix->seqVec[smallCol].size();
64 nRowCells = dMatrix->seqVec[smallRow].size();
65
66 vector<int> foundCol(nColCells, 0);
67
68 int search;
69 bool changed = false;
70
71 for (int i=nRowCells-1;i>=0;i--) { //matrix indexes sorted from largest to smallest, so start at smallest index
72 if (m->getControl_pressed()) { break; }
73
74 //if you are not the smallCell
75 if (dMatrix->seqVec[smallRow][i].index != smallCol) {
76 search = dMatrix->seqVec[smallRow][i].index;
77
78 bool merged = false;
79 for (int j=0;j<nColCells;j++) { //go through each distance the smallCol has looking for matching distance to find
80
81 if (dMatrix->seqVec[smallCol][j].index != smallRow) { //if you are not the smallest distance
82 if (dMatrix->seqVec[smallCol][j].index == search) { //we found a distance for the merge
83 foundCol[j] = 1;
84 merged = true;
85 changed = updateDistance(dMatrix->seqVec[smallCol][j], dMatrix->seqVec[smallRow][i]);
86 dMatrix->updateCellCompliment(smallCol, j);
87 break;
88 }else if (dMatrix->seqVec[smallCol][j].index < search) { //we don't have a distance for this cell
89 if (!util.isEqual(adjust, -1)) { //adjust
90 merged = true;
91 PDistCell value(search, adjust); //create a distance for the missing value
92 int location = dMatrix->addCellSorted(smallCol, value);
93 changed = updateDistance(dMatrix->seqVec[smallCol][location], dMatrix->seqVec[smallRow][i]);
94 dMatrix->updateCellCompliment(smallCol, location);
95 nColCells++;
96 foundCol.push_back(0); //add a new found column
97 //adjust value
98 for (int k = foundCol.size()-1; k > location; k--) { foundCol[k] = foundCol[k-1]; }
99 foundCol[location] = 1;
100 }
101 j+=nColCells; //jump out of loop and remove cell below
102 }
103 }
104 }
105 //if not merged it you need it for warning
106 if ((!merged) && (method == "average" || method == "weighted")) { if (cutOFF > dMatrix->seqVec[smallRow][i].dist) { cutOFF = dMatrix->seqVec[smallRow][i].dist; } }
107 if ((method == "nearest") && (!merged)) { //you are a row dist without a column dist, add you as a column dist
108 PDistCell value(search, dMatrix->seqVec[smallRow][i].dist); //create a distance for the missing value
109 int location = dMatrix->addCellSorted(smallCol, value); nColCells++;
110 foundCol.push_back(0); //add a new found column
111 //adjust value
112 for (int k = foundCol.size()-1; k > location; k--) { foundCol[k] = foundCol[k-1]; }
113 foundCol[location] = 1;
114 }
115 dMatrix->rmCell(smallRow, i);
116 }
117 }
118 clusterBins();
119 clusterNames();
120
121 if (method == "nearest") {
122 for (int i=nColCells-1;i>=0;i--) { //remove any unfound dists from merged column, need special case for nn, since unfound dists mean above the cutoff -> keep smaller dist in col
123 if (foundCol[i] == 0) { //not found
124 if (dMatrix->seqVec[smallCol][i].index == smallRow) { //you are smallest distance
125 dMatrix->rmCell(smallCol, i);
126 break;
127 }
128 }
129 }
130 }else {
131 for (int i=nColCells-1;i>=0;i--) { //remove any unfound dists from merged column, need special case for nn, since unfound dists mean above the cutoff -> keep smaller dist in col
132 if (foundCol[i] == 0) { //not found
133 if (!util.isEqual(adjust, -1)) { //adjust
134 PDistCell value(smallCol, adjust); //create a distance for the missing value
135 changed = updateDistance(dMatrix->seqVec[smallCol][i], value);
136 dMatrix->updateCellCompliment(smallCol, i);
137 }else {
138 if (method == "average" || method == "weighted") {
139 if (dMatrix->seqVec[smallCol][i].index != smallRow) { //if you are not hte smallest distance
140 if (cutOFF > dMatrix->seqVec[smallCol][i].dist) { cutOFF = dMatrix->seqVec[smallCol][i].dist; }
141 }
142 }
143 }
144 dMatrix->rmCell(smallCol, i);
145 }
146 }
147 }
148 //dMatrix->print();
149 return changed;
150 }
151 catch(exception& e) {
152 m->errorOut(e, "Cluster", "update");
153 exit(1);
154 }
155 }
156 /***********************************************************************/
setMapWanted(bool f)157 void Cluster::setMapWanted(bool f) {
158 try {
159 mapWanted = f;
160
161 //initialize map
162 for (int k = 0; k < list->getNumBins(); k++) {
163
164 string names = list->get(k);
165
166 //parse bin
167 string individual = "";
168 int binNameslength = names.size();
169 for(int j=0;j<binNameslength;j++){
170 if(names[j] == ','){
171 seq2Bin[individual] = k;
172 individual = "";
173 }
174 else{ individual += names[j]; }
175 }
176 //get last name
177 seq2Bin[individual] = k;
178 }
179
180 }
181 catch(exception& e) {
182 m->errorOut(e, "Cluster", "setMapWanted");
183 exit(1);
184 }
185 }
186 /***********************************************************************/
updateMap()187 void Cluster::updateMap() {
188 try {
189 //update location of seqs in smallRow since they move to smallCol now
190 string names = list->get(smallRow);
191
192 string individual = "";
193 int binNameslength = names.size();
194 for(int j=0;j<binNameslength;j++){
195 if(names[j] == ','){
196 seq2Bin[individual] = smallCol;
197 individual = "";
198 }
199 else{ individual += names[j]; }
200 }
201 //get last name
202 seq2Bin[individual] = smallCol;
203
204 }
205 catch(exception& e) {
206 m->errorOut(e, "Cluster", "updateMap");
207 exit(1);
208 }
209 }
210 /***********************************************************************/
211
212
213
214