5 * Created by westcott on 10/29/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "clusterclassic.h"
11 #include "progress.hpp"
13 /***********************************************************************/
14 ClusterClassic::ClusterClassic(float c, string f, bool s) : method(f), smallDist(1e6), nseqs(0), sim(s) {
16 mapWanted = false; //set to true by mgcluster to speed up overlap merge
18 //save so you can modify as it changes in average neighbor
20 aboveCutoff = cutoff + 10000.0;
21 m = MothurOut::getInstance();
22 if(method == "furthest") { tag = "fn"; }
23 else if (method == "average") { tag = "an"; }
24 else if (method == "weighted") { tag = "wn"; }
25 else if (method == "nearest") { tag = "nn"; }
28 m->errorOut(e, "ClusterClassic", "ClusterClassic");
32 /***********************************************************************/
33 int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) {
38 vector<string> matrixNames;
41 m->openInputFile(filename, fileHandle);
43 fileHandle >> nseqs >> name;
45 matrixNames.push_back(name);
48 list = new ListVector(nseqs);
52 list = new ListVector(nameMap->getListVector());
53 if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
56 //initialize distance matrix to cutoff
57 dMatrix.resize(nseqs);
58 //colDist temp(0, 0, aboveCutoff);
59 //rowSmallDists.resize(nseqs, temp);
60 for (int i = 1; i < nseqs; i++) {
61 dMatrix[i].resize(i, aboveCutoff);
66 while((d=fileHandle.get()) != EOF){
70 fileHandle.putback(d);
71 for(int i=0;i<nseqs;i++){
72 fileHandle >> distance;
86 reading = new Progress("Reading matrix: ", nseqs * (nseqs - 1) / 2);
90 for(int i=1;i<nseqs;i++){
91 if (m->control_pressed) { fileHandle.close(); delete reading; return 0; }
94 matrixNames.push_back(name);
97 //there's A LOT of repeated code throughout this method...
101 for(int j=0;j<i;j++){
103 if (m->control_pressed) { delete reading; fileHandle.close(); return 0; }
105 fileHandle >> distance;
107 if (distance == -1) { distance = 1000000; }
108 else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert.
110 //if(distance < cutoff){
111 dMatrix[i][j] = distance;
112 if (distance < smallDist) { smallDist = distance; }
113 //if (rowSmallDists[i].dist > distance) { rowSmallDists[i].dist = distance; rowSmallDists[i].col = j; rowSmallDists[i].row = i; }
114 //if (rowSmallDists[j].dist > distance) { rowSmallDists[j].dist = distance; rowSmallDists[j].col = i; rowSmallDists[j].row = j; }
117 reading->update(index);
122 if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
124 for(int j=0;j<i;j++){
125 fileHandle >> distance;
127 if (m->control_pressed) { delete reading; fileHandle.close(); return 0; }
129 if (distance == -1) { distance = 1000000; }
130 else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert.
132 //if(distance < cutoff){
133 if (distance < smallDist) { smallDist = distance; }
135 int row = nameMap->get(matrixNames[i]);
136 int col = nameMap->get(matrixNames[j]);
138 if (row < col) { dMatrix[col][row] = distance; }
139 else { dMatrix[row][col] = distance; }
141 //if (rowSmallDists[row].dist > distance) { rowSmallDists[row].dist = distance; rowSmallDists[row].col = col; rowSmallDists[row].row = row; }
142 //if (rowSmallDists[col].dist > distance) { rowSmallDists[col].dist = distance; rowSmallDists[col].col = row; rowSmallDists[col].row = col; }
145 reading->update(index);
152 reading = new Progress("Reading matrix: ", nseqs * nseqs);
156 for(int i=1;i<nseqs;i++){
158 matrixNames.push_back(name);
162 for(int j=0;j<nseqs;j++){
163 fileHandle >> distance;
165 if (m->control_pressed) { fileHandle.close(); delete reading; return 0; }
167 if (distance == -1) { distance = 1000000; }
168 else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert.
171 if (distance < smallDist) { smallDist = distance; }
173 dMatrix[i][j] = distance;
174 //if (rowSmallDists[i].dist > distance) { rowSmallDists[i].dist = distance; rowSmallDists[i].col = j; rowSmallDists[i].row = i; }
175 //if (rowSmallDists[j].dist > distance) { rowSmallDists[j].dist = distance; rowSmallDists[j].col = i; rowSmallDists[j].row = j; }
178 reading->update(index);
183 if(nameMap->count(name)==0){ m->mothurOut("Error: Sequence '" + name + "' was not found in the names file, please correct"); m->mothurOutEndLine(); }
185 for(int j=0;j<nseqs;j++){
186 fileHandle >> distance;
188 if (m->control_pressed) { fileHandle.close(); delete reading; return 0; }
190 if (distance == -1) { distance = 1000000; }
191 else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert.
194 if (distance < smallDist) { smallDist = distance; }
196 int row = nameMap->get(matrixNames[i]);
197 int col = nameMap->get(matrixNames[j]);
199 if (row < col) { dMatrix[col][row] = distance; }
200 else { dMatrix[row][col] = distance; }
202 //if (rowSmallDists[row].dist > distance) { rowSmallDists[row].dist = distance; rowSmallDists[row].col = col; rowSmallDists[row].row = row; }
203 //if (rowSmallDists[col].dist > distance) { rowSmallDists[col].dist = distance; rowSmallDists[col].col = row; rowSmallDists[col].row = col; }
206 reading->update(index);
212 if (m->control_pressed) { fileHandle.close(); delete reading; return 0; }
218 rabund = new RAbundVector(list->getRAbundVector());
224 catch(exception& e) {
225 m->errorOut(e, "ClusterClassic", "readPhylipFile");
230 /***********************************************************************/
231 //sets smallCol and smallRow, returns distance
232 double ClusterClassic::getSmallCell() {
235 smallDist = aboveCutoff;
239 vector<colDist> mins;
241 for(int i=1;i<nseqs;i++){
242 for(int j=0;j<i;j++){
243 if (dMatrix[i][j] < smallDist) {
245 colDist temp(i, j, dMatrix[i][j]);
246 mins.push_back(temp);
247 smallDist = dMatrix[i][j];
248 }else if (dMatrix[i][j] == smallDist) {
249 colDist temp(i, j, dMatrix[i][j]);
250 mins.push_back(temp);
256 if (mins.size() > 0) {
258 if (mins.size() > 1) {
259 //pick random number between 0 and mins.size()
260 zrand = (int)((float)(rand()) / (RAND_MAX / (mins.size()-1) + 1));
263 smallRow = mins[zrand].row;
264 smallCol = mins[zrand].col;
267 //cout << smallRow << '\t' << smallCol << '\t' << smallDist << endl;
268 //eliminate smallCell
269 if (smallRow < smallCol) { dMatrix[smallCol][smallRow] = aboveCutoff; }
270 else { dMatrix[smallRow][smallCol] = aboveCutoff; }
275 catch(exception& e) {
276 m->errorOut(e, "ClusterClassic", "getSmallCell");
280 /***********************************************************************/
281 void ClusterClassic::clusterBins(){
283 // cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol);
285 rabund->set(smallRow, rabund->get(smallRow)+rabund->get(smallCol));
286 rabund->set(smallCol, 0);
287 /*for (int i = smallCol+1; i < rabund->size(); i++) {
288 rabund->set((i-1), rabund->get(i));
290 rabund->resize((rabund->size()-1));*/
291 rabund->setLabel(toString(smallDist));
293 // cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl;
295 catch(exception& e) {
296 m->errorOut(e, "ClusterClassic", "clusterBins");
300 /***********************************************************************/
301 void ClusterClassic::clusterNames(){
303 // cout << smallCol << '\t' << smallRow << '\t' << smallDist << '\t' << list->get(smallRow) << '\t' << list->get(smallCol);
304 if (mapWanted) { updateMap(); }
306 list->set(smallRow, list->get(smallRow)+','+list->get(smallCol));
307 list->set(smallCol, "");
308 /*for (int i = smallCol+1; i < list->size(); i++) {
309 list->set((i-1), list->get(i));
311 list->resize((list->size()-1));*/
312 list->setLabel(toString(smallDist));
314 // cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl;
316 catch(exception& e) {
317 m->errorOut(e, "ClusterClassic", "clusterNames");
321 /***********************************************************************/
322 void ClusterClassic::update(double& cutOFF){
328 r = smallRow; c = smallCol;
330 for(int i=0;i<nseqs;i++){
331 if(i != r && i != c){
332 double distRow, distCol, newDist;
333 if (i > r) { distRow = dMatrix[i][r]; }
334 else { distRow = dMatrix[r][i]; }
336 if (i > c) { distCol = dMatrix[i][c]; dMatrix[i][c] = aboveCutoff; } //like removeCell
337 else { distCol = dMatrix[c][i]; dMatrix[c][i] = aboveCutoff; }
339 if(method == "furthest"){
340 newDist = max(distRow, distCol);
342 else if (method == "average"){
343 int rowBin = rabund->get(r);
344 int colBin = rabund->get(c);
345 newDist = (colBin * distCol + rowBin * distRow) / (rowBin + colBin);
347 else if (method == "weighted"){
348 newDist = (distCol + distRow) / 2.0;
350 else if (method == "nearest"){
351 newDist = min(distRow, distCol);
353 //cout << "newDist = " << newDist << endl;
354 if (i > r) { dMatrix[i][r] = newDist; }
355 else { dMatrix[r][i] = newDist; }
364 /*for(int i=0;i<nseqs;i++){
365 for(int j=c+1;j<dMatrix[i].size();j++){
366 dMatrix[i][j-1]=dMatrix[i][j];
371 for(int i=c+1;i<nseqs;i++){
372 for(int j=0;j < dMatrix[i-1].size();j++){
373 dMatrix[i-1][j]=dMatrix[i][j];
378 dMatrix.pop_back();*/
381 catch(exception& e) {
382 m->errorOut(e, "ClusterClassic", "update");
386 /***********************************************************************/
387 void ClusterClassic::setMapWanted(bool f) {
392 for (int i = 0; i < list->getNumBins(); i++) {
395 string names = list->get(i);
396 while (names.find_first_of(',') != -1) {
398 string name = names.substr(0,names.find_first_of(','));
399 //save name and bin number
401 names = names.substr(names.find_first_of(',')+1, names.length());
409 catch(exception& e) {
410 m->errorOut(e, "ClusterClassic", "setMapWanted");
414 /***********************************************************************/
415 void ClusterClassic::updateMap() {
417 //update location of seqs in smallRow since they move to smallCol now
418 string names = list->get(smallRow);
419 while (names.find_first_of(',') != -1) {
421 string name = names.substr(0,names.find_first_of(','));
422 //save name and bin number
423 seq2Bin[name] = smallCol;
424 names = names.substr(names.find_first_of(',')+1, names.length());
428 seq2Bin[names] = smallCol;
431 catch(exception& e) {
432 m->errorOut(e, "ClusterClassic", "updateMap");
436 /***********************************************************************/
437 void ClusterClassic::print() {
439 //update location of seqs in smallRow since they move to smallCol now
440 for (int i = 0; i < dMatrix.size(); i++) {
441 m->mothurOut("row = " + toString(i) + "\t");
442 for (int j = 0; j < dMatrix[i].size(); j++) {
443 m->mothurOut(toString(dMatrix[i][j]) + "\t");
445 m->mothurOutEndLine();
448 catch(exception& e) {
449 m->errorOut(e, "ClusterClassic", "updateMap");
453 /***********************************************************************/