1 /*
2  *  cluster.cpp
3  *
4  *
5  *  Created by Pat Schloss on 8/14/08.
6  *  Copyright 2008 Patrick D. Schloss. All rights reserved.
7  *
8  */
9 
10 #include "cluster.hpp"
11 #include "rabundvector.hpp"
12 #include "listvector.hpp"
13 
14 /***********************************************************************/
15 
Cluster(RAbundVector * rav,ListVector * lv,SparseDistanceMatrix * dm,float c,string f,float cs)16 Cluster::Cluster(RAbundVector* rav, ListVector* lv, SparseDistanceMatrix* dm, float c, string f, float cs) :
17 rabund(rav), list(lv), dMatrix(dm), method(f), adjust(cs)
18 {
19 	try {
20 
21         mapWanted = false;  //set to true by mgcluster to speed up overlap merge
22 
23         //save so you can modify as it changes in average neighbor
24         cutoff = c;
25         m = MothurOut::getInstance();
26 	}
27 	catch(exception& e) {
28 		m->errorOut(e, "Cluster", "Cluster");
29 		exit(1);
30 	}
31 }
32 /***********************************************************************/
clusterBins()33 void Cluster::clusterBins(){
34 	try {
35  		rabund->set(smallCol, rabund->get(smallRow)+rabund->get(smallCol));
36 		rabund->set(smallRow, 0);
37 		rabund->setLabel(toString(smallDist));
38 	}
39 	catch(exception& e) {
40 		m->errorOut(e, "Cluster", "clusterBins");
41 		exit(1);
42 	}
43 }
44 /***********************************************************************/
45 
clusterNames()46 void Cluster::clusterNames(){
47 	try {
48 		if (mapWanted) {  updateMap();  }
49 
50 		list->set(smallCol, list->get(smallRow)+','+list->get(smallCol));
51 		list->set(smallRow, "");
52 		list->setLabel(toString(smallDist));
53     }
54 	catch(exception& e) {
55 		m->errorOut(e, "Cluster", "clusterNames");
56 		exit(1);
57 	}
58 }
59 /***********************************************************************/
update(double & cutOFF)60 bool Cluster::update(double& cutOFF){
61 	try {
62         smallCol = dMatrix->getSmallestCell(smallRow);
63         nColCells = dMatrix->seqVec[smallCol].size();
64         nRowCells = dMatrix->seqVec[smallRow].size();
65 
66 		vector<int> foundCol(nColCells, 0);
67 
68 		int search;
69 		bool changed = false;
70 
71 		for (int i=nRowCells-1;i>=0;i--) {  //matrix indexes sorted from largest to smallest, so start at smallest index
72             if (m->getControl_pressed()) { break; }
73 
74 			//if you are not the smallCell
75 			if (dMatrix->seqVec[smallRow][i].index != smallCol) {
76                 search = dMatrix->seqVec[smallRow][i].index;
77 
78 				bool merged = false;
79 				for (int j=0;j<nColCells;j++) {  //go through each distance the smallCol has looking for matching distance to find
80 
81 					if (dMatrix->seqVec[smallCol][j].index != smallRow) {  //if you are not the smallest distance
82 						if (dMatrix->seqVec[smallCol][j].index == search) {  //we found a distance for the merge
83 							foundCol[j] = 1;
84 							merged = true;
85 							changed = updateDistance(dMatrix->seqVec[smallCol][j], dMatrix->seqVec[smallRow][i]);
86                             dMatrix->updateCellCompliment(smallCol, j);
87 							break;
88 						}else if (dMatrix->seqVec[smallCol][j].index < search) { //we don't have a distance for this cell
89                             if (!util.isEqual(adjust, -1)) { //adjust
90                                 merged = true;
91                                 PDistCell value(search, adjust); //create a distance for the missing value
92                                 int location = dMatrix->addCellSorted(smallCol, value);
93                                 changed = updateDistance(dMatrix->seqVec[smallCol][location], dMatrix->seqVec[smallRow][i]);
94                                 dMatrix->updateCellCompliment(smallCol, location);
95                                 nColCells++;
96                                 foundCol.push_back(0); //add a new found column
97                                 //adjust value
98                                 for (int k = foundCol.size()-1; k > location; k--) { foundCol[k] = foundCol[k-1]; }
99                                 foundCol[location] = 1;
100                             }
101                             j+=nColCells;  //jump out of loop and remove cell below
102                         }
103                     }
104 				}
105 				//if not merged it you need it for warning
106 				if ((!merged) && (method == "average" || method == "weighted")) {   if (cutOFF > dMatrix->seqVec[smallRow][i].dist) {   cutOFF = dMatrix->seqVec[smallRow][i].dist; } }
107                 if ((method == "nearest") && (!merged)) { //you are a row dist without a column dist, add you as a column dist
108                     PDistCell value(search, dMatrix->seqVec[smallRow][i].dist); //create a distance for the missing value
109                     int location = dMatrix->addCellSorted(smallCol, value); nColCells++;
110                     foundCol.push_back(0); //add a new found column
111                     //adjust value
112                     for (int k = foundCol.size()-1; k > location; k--) { foundCol[k] = foundCol[k-1]; }
113                     foundCol[location] = 1;
114                 }
115                 dMatrix->rmCell(smallRow, i);
116 			}
117 		}
118 		clusterBins();
119 		clusterNames();
120 
121         if (method == "nearest") {
122             for (int i=nColCells-1;i>=0;i--) { //remove any unfound dists from merged column, need special case for nn, since unfound dists mean above the cutoff -> keep smaller dist in col
123                 if (foundCol[i] == 0) {  //not found
124                     if (dMatrix->seqVec[smallCol][i].index == smallRow) { //you are smallest distance
125                         dMatrix->rmCell(smallCol, i);
126                         break;
127                     }
128                 }
129             }
130         }else {
131             for (int i=nColCells-1;i>=0;i--) { //remove any unfound dists from merged column, need special case for nn, since unfound dists mean above the cutoff -> keep smaller dist in col
132                 if (foundCol[i] == 0) {  //not found
133                     if (!util.isEqual(adjust, -1)) { //adjust
134                         PDistCell value(smallCol, adjust); //create a distance for the missing value
135                         changed = updateDistance(dMatrix->seqVec[smallCol][i], value);
136                         dMatrix->updateCellCompliment(smallCol, i);
137                     }else {
138                         if (method == "average" || method == "weighted") {
139                             if (dMatrix->seqVec[smallCol][i].index != smallRow) { //if you are not hte smallest distance
140                                 if (cutOFF > dMatrix->seqVec[smallCol][i].dist) {   cutOFF = dMatrix->seqVec[smallCol][i].dist;   }
141                             }
142                         }
143                     }
144                     dMatrix->rmCell(smallCol, i);
145                 }
146             }
147         }
148         //dMatrix->print();
149         return changed;
150 	}
151 	catch(exception& e) {
152 		m->errorOut(e, "Cluster", "update");
153 		exit(1);
154 	}
155 }
156 /***********************************************************************/
setMapWanted(bool f)157 void Cluster::setMapWanted(bool f)  {
158 	try {
159 		mapWanted = f;
160 
161         //initialize map
162 		for (int k = 0; k < list->getNumBins(); k++) {
163 
164             string names = list->get(k);
165 
166             //parse bin
167             string individual = "";
168             int binNameslength = names.size();
169             for(int j=0;j<binNameslength;j++){
170                 if(names[j] == ','){
171                     seq2Bin[individual] = k;
172                     individual = "";
173                 }
174                 else{  individual += names[j];  }
175             }
176             //get last name
177             seq2Bin[individual] = k;
178 		}
179 
180 	}
181 	catch(exception& e) {
182 		m->errorOut(e, "Cluster", "setMapWanted");
183 		exit(1);
184 	}
185 }
186 /***********************************************************************/
updateMap()187 void Cluster::updateMap() {
188     try {
189 		//update location of seqs in smallRow since they move to smallCol now
190 		string names = list->get(smallRow);
191 
192         string individual = "";
193         int binNameslength = names.size();
194         for(int j=0;j<binNameslength;j++){
195             if(names[j] == ','){
196                 seq2Bin[individual] = smallCol;
197                 individual = "";
198             }
199             else{  individual += names[j];  }
200         }
201         //get last name
202         seq2Bin[individual] = smallCol;
203 
204 	}
205 	catch(exception& e) {
206 		m->errorOut(e, "Cluster", "updateMap");
207 		exit(1);
208 	}
209 }
210 /***********************************************************************/
211 
212 
213 
214