X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=clusterclassic.cpp;h=2d1b9a6b781960a824fddafc03b904e8039179e1;hb=f06b339c5fc4b6d1b9d2a08fe16bf7670bf7aeb4;hp=d0a63b1e2e4d47782839844595727d4420a9e873;hpb=26b30b0881a37665b18746d2851607c494e8ccc0;p=mothur.git diff --git a/clusterclassic.cpp b/clusterclassic.cpp index d0a63b1..2d1b9a6 100644 --- a/clusterclassic.cpp +++ b/clusterclassic.cpp @@ -11,15 +11,18 @@ #include "progress.hpp" /***********************************************************************/ -ClusterClassic::ClusterClassic(float c, string f) : method(f), smallDist(1e6), nseqs(0) { +ClusterClassic::ClusterClassic(float c, string f, bool s) : method(f), smallDist(1e6), nseqs(0), sim(s) { try { mapWanted = false; //set to true by mgcluster to speed up overlap merge //save so you can modify as it changes in average neighbor cutoff = c; - aboveCutoff = cutoff + 1.0; + aboveCutoff = cutoff + 10000.0; m = MothurOut::getInstance(); - globaldata = GlobalData::getInstance(); + if(method == "furthest") { tag = "fn"; } + else if (method == "average") { tag = "an"; } + else if (method == "weighted") { tag = "wn"; } + else if (method == "nearest") { tag = "nn"; } } catch(exception& e) { m->errorOut(e, "ClusterClassic", "ClusterClassic"); @@ -37,7 +40,12 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { ifstream fileHandle; m->openInputFile(filename, fileHandle); - fileHandle >> nseqs >> name; + string numTest; + fileHandle >> numTest >> name; + + if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); } + else { convert(numTest, nseqs); } + matrixNames.push_back(name); @@ -102,14 +110,14 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { fileHandle >> distance; if (distance == -1) { distance = 1000000; } - else if (globaldata->sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. - if(distance < cutoff){ + //if(distance < cutoff){ dMatrix[i][j] = distance; if (distance < smallDist) { smallDist = distance; } //if (rowSmallDists[i].dist > distance) { rowSmallDists[i].dist = distance; rowSmallDists[i].col = j; rowSmallDists[i].row = i; } //if (rowSmallDists[j].dist > distance) { rowSmallDists[j].dist = distance; rowSmallDists[j].col = i; rowSmallDists[j].row = j; } - } + //} index++; reading->update(index); } @@ -124,9 +132,9 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { if (m->control_pressed) { delete reading; fileHandle.close(); return 0; } if (distance == -1) { distance = 1000000; } - else if (globaldata->sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. - if(distance < cutoff){ + //if(distance < cutoff){ if (distance < smallDist) { smallDist = distance; } int row = nameMap->get(matrixNames[i]); @@ -137,7 +145,7 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { //if (rowSmallDists[row].dist > distance) { rowSmallDists[row].dist = distance; rowSmallDists[row].col = col; rowSmallDists[row].row = row; } //if (rowSmallDists[col].dist > distance) { rowSmallDists[col].dist = distance; rowSmallDists[col].col = row; rowSmallDists[col].row = col; } - } + //} index++; reading->update(index); } @@ -162,9 +170,9 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } if (distance == -1) { distance = 1000000; } - else if (globaldata->sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. - if(distance < cutoff && j < i){ + if(j < i){ if (distance < smallDist) { smallDist = distance; } dMatrix[i][j] = distance; @@ -185,9 +193,9 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } if (distance == -1) { distance = 1000000; } - else if (globaldata->sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. - if(distance < cutoff && j < i){ + if(j < i){ if (distance < smallDist) { smallDist = distance; } int row = nameMap->get(matrixNames[i]); @@ -215,6 +223,8 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { rabund = new RAbundVector(list->getRAbundVector()); fileHandle.close(); + + return 0; } catch(exception& e) { m->errorOut(e, "ClusterClassic", "readPhylipFile"); @@ -226,7 +236,7 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { //sets smallCol and smallRow, returns distance double ClusterClassic::getSmallCell() { try { - + smallDist = aboveCutoff; smallRow = 1; smallCol = 0; @@ -279,6 +289,10 @@ void ClusterClassic::clusterBins(){ rabund->set(smallRow, rabund->get(smallRow)+rabund->get(smallCol)); rabund->set(smallCol, 0); + /*for (int i = smallCol+1; i < rabund->size(); i++) { + rabund->set((i-1), rabund->get(i)); + } + rabund->resize((rabund->size()-1));*/ rabund->setLabel(toString(smallDist)); // cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl; @@ -296,6 +310,10 @@ void ClusterClassic::clusterNames(){ list->set(smallRow, list->get(smallRow)+','+list->get(smallCol)); list->set(smallCol, ""); + /*for (int i = smallCol+1; i < list->size(); i++) { + list->set((i-1), list->get(i)); + } + list->resize((list->size()-1));*/ list->setLabel(toString(smallDist)); // cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl; @@ -308,103 +326,62 @@ void ClusterClassic::clusterNames(){ /***********************************************************************/ void ClusterClassic::update(double& cutOFF){ try { -//cout << "before update " << endl; //print(); getSmallCell(); int r, c; r = smallRow; c = smallCol; - //because we only store lt, we need to make sure we grab the right location - //if (smallRow < smallCol) { c = smallRow; r = smallCol; } //smallRow is really our column value - //else { r = smallRow; c = smallCol; } //smallRow is the row value - - //reset rows smallest distance - //rowSmallDists[r].dist = aboveCutoff; rowSmallDists[r].row = 0; rowSmallDists[r].col = 0; - //rowSmallDists[c].dist = aboveCutoff; rowSmallDists[c].row = 0; rowSmallDists[c].col = 0; - - //if your rows smallest distance is from smallRow or smallCol, reset - //for(int i=0;i r) { distRow = dMatrix[i][r]; } else { distRow = dMatrix[r][i]; } - + if (i > c) { distCol = dMatrix[i][c]; dMatrix[i][c] = aboveCutoff; } //like removeCell else { distCol = dMatrix[c][i]; dMatrix[c][i] = aboveCutoff; } - + if(method == "furthest"){ newDist = max(distRow, distCol); } else if (method == "average"){ - if ((distRow == aboveCutoff) && (distCol == aboveCutoff)) { //you are merging with a value above cutoff - newDist = aboveCutoff; //eliminate value - }else if ((distRow == aboveCutoff) && (distCol != aboveCutoff)) { //you are merging with a value above cutoff - newDist = aboveCutoff; //eliminate value - if (cutOFF > distCol) { cutOFF = distCol; } - }else if ((distRow != aboveCutoff) && (distCol == aboveCutoff)) { //you are merging with a value above cutoff - newDist = aboveCutoff; //eliminate value - if (cutOFF > distRow) { cutOFF = distRow; } - }else { - int rowBin = rabund->get(r); - int colBin = rabund->get(c); - newDist = (colBin * distCol + rowBin * distRow) / (rowBin + colBin); - } + int rowBin = rabund->get(r); + int colBin = rabund->get(c); + newDist = (colBin * distCol + rowBin * distRow) / (rowBin + colBin); } else if (method == "weighted"){ - if ((distRow == aboveCutoff) && (distCol == aboveCutoff)) { //you are merging with a value above cutoff - newDist = aboveCutoff; //eliminate value - }else if ((distRow == aboveCutoff) && (distCol != aboveCutoff)) { //you are merging with a value above cutoff - newDist = aboveCutoff; //eliminate value - if (cutOFF > distCol) { cutOFF = distCol; } - }else if ((distRow != aboveCutoff) && (distCol == aboveCutoff)) { //you are merging with a value above cutoff - newDist = aboveCutoff; //eliminate value - if (cutOFF > distRow) { cutOFF = distRow; } - }else { - newDist = (distCol + distRow) / 2.0; - } + newDist = (distCol + distRow) / 2.0; } else if (method == "nearest"){ newDist = min(distRow, distCol); } - + //cout << "newDist = " << newDist << endl; if (i > r) { dMatrix[i][r] = newDist; } else { dMatrix[r][i] = newDist; } - //if (newDist < rowSmallDists[i].dist) { rowSmallDists[i].dist = newDist; rowSmallDists[i].col = r; rowSmallDists[i].row = i; } } - //cout << "rowsmall = " << i << '\t' << rowSmallDists[i].dist << endl; } clusterBins(); clusterNames(); - - //find new small for 2 rows we just merged - //colDist temp(0,0,100.0); - //rowSmallDists[r] = temp; - - //for (int i = 0; i < dMatrix[r].size(); i++) { - // if (dMatrix[r][i] < rowSmallDists[r].dist) { rowSmallDists[r].dist = dMatrix[r][i]; rowSmallDists[r].col = r; rowSmallDists[r].row = i; } - //} - //for (int i = dMatrix[r].size()+1; i < dMatrix.size(); i++) { - // if (dMatrix[i][dMatrix[r].size()] < rowSmallDists[r].dist) { rowSmallDists[r].dist = dMatrix[i][dMatrix[r].size()]; rowSmallDists[r].col = r; rowSmallDists[r].row = i; } - //} - //rowSmallDists[c] = temp; - //for (int i = 0; i < dMatrix[c].size(); i++) { - // if (dMatrix[c][i] < rowSmallDists[c].dist) { rowSmallDists[c].dist = dMatrix[c][i]; rowSmallDists[c].col = c; rowSmallDists[c].row = i; } - //} - //for (int i = dMatrix[c].size()+1; i < dMatrix.size(); i++) { - // if (dMatrix[i][dMatrix[c].size()] < rowSmallDists[c].dist) { rowSmallDists[c].dist = dMatrix[i][dMatrix[c].size()]; rowSmallDists[c].col = c; rowSmallDists[c].row = i; } - //} + //resize each row + /*for(int i=0;ierrorOut(e, "ClusterClassic", "update"); @@ -466,11 +443,11 @@ void ClusterClassic::print() { try { //update location of seqs in smallRow since they move to smallCol now for (int i = 0; i < dMatrix.size(); i++) { - cout << "row = " << i << '\t'; + m->mothurOut("row = " + toString(i) + "\t"); for (int j = 0; j < dMatrix[i].size(); j++) { - cout << dMatrix[i][j] << '\t'; + m->mothurOut(toString(dMatrix[i][j]) + "\t"); } - cout << endl; + m->mothurOutEndLine(); } } catch(exception& e) {