X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=clusterclassic.cpp;h=32a9341613d07274d397c1ea9cc9d6b8d9383826;hp=d0a63b1e2e4d47782839844595727d4420a9e873;hb=615301e57c25e241356a9c2380648d117709458d;hpb=26b30b0881a37665b18746d2851607c494e8ccc0 diff --git a/clusterclassic.cpp b/clusterclassic.cpp index d0a63b1..32a9341 100644 --- a/clusterclassic.cpp +++ b/clusterclassic.cpp @@ -11,15 +11,18 @@ #include "progress.hpp" /***********************************************************************/ -ClusterClassic::ClusterClassic(float c, string f) : method(f), smallDist(1e6), nseqs(0) { +ClusterClassic::ClusterClassic(float c, string f, bool s) : method(f), smallDist(1e6), nseqs(0), sim(s) { try { mapWanted = false; //set to true by mgcluster to speed up overlap merge //save so you can modify as it changes in average neighbor cutoff = c; - aboveCutoff = cutoff + 1.0; + aboveCutoff = cutoff + 10000.0; m = MothurOut::getInstance(); - globaldata = GlobalData::getInstance(); + if(method == "furthest") { tag = "fn"; } + else if (method == "average") { tag = "an"; } + else if (method == "weighted") { tag = "wn"; } + else if (method == "nearest") { tag = "nn"; } } catch(exception& e) { m->errorOut(e, "ClusterClassic", "ClusterClassic"); @@ -37,7 +40,12 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { ifstream fileHandle; m->openInputFile(filename, fileHandle); - fileHandle >> nseqs >> name; + string numTest; + fileHandle >> numTest >> name; + + if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); } + else { convert(numTest, nseqs); } + matrixNames.push_back(name); @@ -102,14 +110,14 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { fileHandle >> distance; if (distance == -1) { distance = 1000000; } - else if (globaldata->sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. - if(distance < cutoff){ + //if(distance < cutoff){ dMatrix[i][j] = distance; if (distance < smallDist) { smallDist = distance; } //if (rowSmallDists[i].dist > distance) { rowSmallDists[i].dist = distance; rowSmallDists[i].col = j; rowSmallDists[i].row = i; } //if (rowSmallDists[j].dist > distance) { rowSmallDists[j].dist = distance; rowSmallDists[j].col = i; rowSmallDists[j].row = j; } - } + //} index++; reading->update(index); } @@ -124,9 +132,9 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { if (m->control_pressed) { delete reading; fileHandle.close(); return 0; } if (distance == -1) { distance = 1000000; } - else if (globaldata->sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. - if(distance < cutoff){ + //if(distance < cutoff){ if (distance < smallDist) { smallDist = distance; } int row = nameMap->get(matrixNames[i]); @@ -137,7 +145,7 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { //if (rowSmallDists[row].dist > distance) { rowSmallDists[row].dist = distance; rowSmallDists[row].col = col; rowSmallDists[row].row = row; } //if (rowSmallDists[col].dist > distance) { rowSmallDists[col].dist = distance; rowSmallDists[col].col = row; rowSmallDists[col].row = col; } - } + //} index++; reading->update(index); } @@ -162,9 +170,9 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } if (distance == -1) { distance = 1000000; } - else if (globaldata->sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. - if(distance < cutoff && j < i){ + if(j < i){ if (distance < smallDist) { smallDist = distance; } dMatrix[i][j] = distance; @@ -185,9 +193,9 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } if (distance == -1) { distance = 1000000; } - else if (globaldata->sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. - if(distance < cutoff && j < i){ + if(j < i){ if (distance < smallDist) { smallDist = distance; } int row = nameMap->get(matrixNames[i]); @@ -215,18 +223,219 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { rabund = new RAbundVector(list->getRAbundVector()); fileHandle.close(); + + return 0; } catch(exception& e) { m->errorOut(e, "ClusterClassic", "readPhylipFile"); exit(1); } +} +/***********************************************************************/ +int ClusterClassic::readPhylipFile(string filename, CountTable* countTable) { + try { + double distance; + int square; + string name; + vector matrixNames; + + ifstream fileHandle; + m->openInputFile(filename, fileHandle); + + string numTest; + fileHandle >> numTest >> name; + + if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); } + else { convert(numTest, nseqs); } + + + matrixNames.push_back(name); + + if(countTable == NULL){ + list = new ListVector(nseqs); + list->set(0, name); + } + else{ list = new ListVector(countTable->getListVector()); } + + + //initialize distance matrix to cutoff + dMatrix.resize(nseqs); + //rowSmallDists.resize(nseqs, temp); + for (int i = 1; i < nseqs; i++) { + dMatrix[i].resize(i, aboveCutoff); + } + + + char d; + while((d=fileHandle.get()) != EOF){ + + if(isalnum(d)){ + square = 1; + fileHandle.putback(d); + for(int i=0;i> distance; + } + break; + } + if(d == '\n'){ + square = 0; + break; + } + } + + Progress* reading; + + if(square == 0){ + + reading = new Progress("Reading matrix: ", nseqs * (nseqs - 1) / 2); + + int index = 0; + + for(int i=1;icontrol_pressed) { fileHandle.close(); delete reading; return 0; } + + fileHandle >> name; + matrixNames.push_back(name); + + + //there's A LOT of repeated code throughout this method... + if(countTable == NULL){ + list->set(i, name); + + for(int j=0;jcontrol_pressed) { delete reading; fileHandle.close(); return 0; } + + fileHandle >> distance; + + if (distance == -1) { distance = 1000000; } + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + + //if(distance < cutoff){ + dMatrix[i][j] = distance; + if (distance < smallDist) { smallDist = distance; } + //if (rowSmallDists[i].dist > distance) { rowSmallDists[i].dist = distance; rowSmallDists[i].col = j; rowSmallDists[i].row = i; } + //if (rowSmallDists[j].dist > distance) { rowSmallDists[j].dist = distance; rowSmallDists[j].col = i; rowSmallDists[j].row = j; } + //} + index++; + reading->update(index); + } + + } + else{ + for(int j=0;j> distance; + + if (m->control_pressed) { delete reading; fileHandle.close(); return 0; } + + if (distance == -1) { distance = 1000000; } + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + + if (distance < smallDist) { smallDist = distance; } + + int row = countTable->get(matrixNames[i]); + int col = countTable->get(matrixNames[j]); + + if (row < col) { dMatrix[col][row] = distance; } + else { dMatrix[row][col] = distance; } + + index++; + reading->update(index); + } + } + } + } + else{ + + reading = new Progress("Reading matrix: ", nseqs * nseqs); + + int index = nseqs; + + for(int i=1;i> name; + matrixNames.push_back(name); + + if(countTable == NULL){ + list->set(i, name); + for(int j=0;j> distance; + + if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } + + if (distance == -1) { distance = 1000000; } + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + + if(j < i){ + if (distance < smallDist) { smallDist = distance; } + + dMatrix[i][j] = distance; + } + index++; + reading->update(index); + } + + } + else{ + + for(int j=0;j> distance; + + if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } + + if (distance == -1) { distance = 1000000; } + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + + if(j < i){ + if (distance < smallDist) { smallDist = distance; } + + int row = countTable->get(matrixNames[i]); + int col = countTable->get(matrixNames[j]); + + if (row < col) { dMatrix[col][row] = distance; } + else { dMatrix[row][col] = distance; } + } + index++; + reading->update(index); + } + } + } + } + + if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } + + reading->finish(); + delete reading; + + list->setLabel("0"); + rabund = new RAbundVector(); + rabund->setLabel(list->getLabel()); + + for(int i = 0; i < list->getNumBins(); i++) { + if (m->control_pressed) { break; } + vector binNames; + string bin = list->get(i); + m->splitAtComma(bin, binNames); + int total = 0; + for (int j = 0; j < binNames.size(); j++) { total += countTable->getNumSeqs(binNames[j]); } + rabund->push_back(total); + } + + fileHandle.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "ClusterClassic", "readPhylipFile"); + exit(1); + } + } /***********************************************************************/ //sets smallCol and smallRow, returns distance double ClusterClassic::getSmallCell() { try { - + smallDist = aboveCutoff; smallRow = 1; smallCol = 0; @@ -279,6 +488,10 @@ void ClusterClassic::clusterBins(){ rabund->set(smallRow, rabund->get(smallRow)+rabund->get(smallCol)); rabund->set(smallCol, 0); + /*for (int i = smallCol+1; i < rabund->size(); i++) { + rabund->set((i-1), rabund->get(i)); + } + rabund->resize((rabund->size()-1));*/ rabund->setLabel(toString(smallDist)); // cout << '\t' << rabund->get(smallRow) << '\t' << rabund->get(smallCol) << endl; @@ -296,6 +509,10 @@ void ClusterClassic::clusterNames(){ list->set(smallRow, list->get(smallRow)+','+list->get(smallCol)); list->set(smallCol, ""); + /*for (int i = smallCol+1; i < list->size(); i++) { + list->set((i-1), list->get(i)); + } + list->resize((list->size()-1));*/ list->setLabel(toString(smallDist)); // cout << '\t' << list->get(smallRow) << '\t' << list->get(smallCol) << endl; @@ -308,103 +525,62 @@ void ClusterClassic::clusterNames(){ /***********************************************************************/ void ClusterClassic::update(double& cutOFF){ try { -//cout << "before update " << endl; //print(); getSmallCell(); int r, c; r = smallRow; c = smallCol; - //because we only store lt, we need to make sure we grab the right location - //if (smallRow < smallCol) { c = smallRow; r = smallCol; } //smallRow is really our column value - //else { r = smallRow; c = smallCol; } //smallRow is the row value - - //reset rows smallest distance - //rowSmallDists[r].dist = aboveCutoff; rowSmallDists[r].row = 0; rowSmallDists[r].col = 0; - //rowSmallDists[c].dist = aboveCutoff; rowSmallDists[c].row = 0; rowSmallDists[c].col = 0; - - //if your rows smallest distance is from smallRow or smallCol, reset - //for(int i=0;i r) { distRow = dMatrix[i][r]; } else { distRow = dMatrix[r][i]; } - + if (i > c) { distCol = dMatrix[i][c]; dMatrix[i][c] = aboveCutoff; } //like removeCell else { distCol = dMatrix[c][i]; dMatrix[c][i] = aboveCutoff; } - + if(method == "furthest"){ newDist = max(distRow, distCol); } else if (method == "average"){ - if ((distRow == aboveCutoff) && (distCol == aboveCutoff)) { //you are merging with a value above cutoff - newDist = aboveCutoff; //eliminate value - }else if ((distRow == aboveCutoff) && (distCol != aboveCutoff)) { //you are merging with a value above cutoff - newDist = aboveCutoff; //eliminate value - if (cutOFF > distCol) { cutOFF = distCol; } - }else if ((distRow != aboveCutoff) && (distCol == aboveCutoff)) { //you are merging with a value above cutoff - newDist = aboveCutoff; //eliminate value - if (cutOFF > distRow) { cutOFF = distRow; } - }else { - int rowBin = rabund->get(r); - int colBin = rabund->get(c); - newDist = (colBin * distCol + rowBin * distRow) / (rowBin + colBin); - } + int rowBin = rabund->get(r); + int colBin = rabund->get(c); + newDist = (colBin * distCol + rowBin * distRow) / (rowBin + colBin); } else if (method == "weighted"){ - if ((distRow == aboveCutoff) && (distCol == aboveCutoff)) { //you are merging with a value above cutoff - newDist = aboveCutoff; //eliminate value - }else if ((distRow == aboveCutoff) && (distCol != aboveCutoff)) { //you are merging with a value above cutoff - newDist = aboveCutoff; //eliminate value - if (cutOFF > distCol) { cutOFF = distCol; } - }else if ((distRow != aboveCutoff) && (distCol == aboveCutoff)) { //you are merging with a value above cutoff - newDist = aboveCutoff; //eliminate value - if (cutOFF > distRow) { cutOFF = distRow; } - }else { - newDist = (distCol + distRow) / 2.0; - } + newDist = (distCol + distRow) / 2.0; } else if (method == "nearest"){ newDist = min(distRow, distCol); } - + //cout << "newDist = " << newDist << endl; if (i > r) { dMatrix[i][r] = newDist; } else { dMatrix[r][i] = newDist; } - //if (newDist < rowSmallDists[i].dist) { rowSmallDists[i].dist = newDist; rowSmallDists[i].col = r; rowSmallDists[i].row = i; } } - //cout << "rowsmall = " << i << '\t' << rowSmallDists[i].dist << endl; } clusterBins(); clusterNames(); - - //find new small for 2 rows we just merged - //colDist temp(0,0,100.0); - //rowSmallDists[r] = temp; + + //resize each row + /*for(int i=0;ierrorOut(e, "ClusterClassic", "update"); @@ -421,16 +597,12 @@ void ClusterClassic::setMapWanted(bool f) { //parse bin string names = list->get(i); - while (names.find_first_of(',') != -1) { - //get name from bin - string name = names.substr(0,names.find_first_of(',')); + vector binnames; + m->splitAtComma(names, binnames); + for (int j = 0; j < binnames.size(); j++) { //save name and bin number - seq2Bin[name] = i; - names = names.substr(names.find_first_of(',')+1, names.length()); + seq2Bin[binnames[j]] = i; } - - //get last name - seq2Bin[names] = i; } } @@ -443,17 +615,13 @@ void ClusterClassic::setMapWanted(bool f) { void ClusterClassic::updateMap() { try { //update location of seqs in smallRow since they move to smallCol now - string names = list->get(smallRow); - while (names.find_first_of(',') != -1) { - //get name from bin - string name = names.substr(0,names.find_first_of(',')); - //save name and bin number - seq2Bin[name] = smallCol; - names = names.substr(names.find_first_of(',')+1, names.length()); - } - - //get last name - seq2Bin[names] = smallCol; + string names = list->get(smallRow); + vector binnames; + m->splitAtComma(names, binnames); + for (int j = 0; j < binnames.size(); j++) { + //save name and bin number + seq2Bin[binnames[j]] = smallCol; + } } catch(exception& e) { @@ -466,11 +634,11 @@ void ClusterClassic::print() { try { //update location of seqs in smallRow since they move to smallCol now for (int i = 0; i < dMatrix.size(); i++) { - cout << "row = " << i << '\t'; + m->mothurOut("row = " + toString(i) + "\t"); for (int j = 0; j < dMatrix[i].size(); j++) { - cout << dMatrix[i][j] << '\t'; + m->mothurOut(toString(dMatrix[i][j]) + "\t"); } - cout << endl; + m->mothurOutEndLine(); } } catch(exception& e) {