X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=clusterclassic.cpp;h=32a9341613d07274d397c1ea9cc9d6b8d9383826;hp=287332ad628e33a6a460aee903ba8f7955027c85;hb=b206f634aae1b4ce13978d203247fb64757d5482;hpb=173a1a82a4e4aa6bef8e4ba77bce342fb9c6563e diff --git a/clusterclassic.cpp b/clusterclassic.cpp index 287332a..32a9341 100644 --- a/clusterclassic.cpp +++ b/clusterclassic.cpp @@ -11,7 +11,7 @@ #include "progress.hpp" /***********************************************************************/ -ClusterClassic::ClusterClassic(float c, string f) : method(f), smallDist(1e6), nseqs(0) { +ClusterClassic::ClusterClassic(float c, string f, bool s) : method(f), smallDist(1e6), nseqs(0), sim(s) { try { mapWanted = false; //set to true by mgcluster to speed up overlap merge @@ -19,7 +19,10 @@ ClusterClassic::ClusterClassic(float c, string f) : method(f), smallDist(1e6), n cutoff = c; aboveCutoff = cutoff + 10000.0; m = MothurOut::getInstance(); - globaldata = GlobalData::getInstance(); + if(method == "furthest") { tag = "fn"; } + else if (method == "average") { tag = "an"; } + else if (method == "weighted") { tag = "wn"; } + else if (method == "nearest") { tag = "nn"; } } catch(exception& e) { m->errorOut(e, "ClusterClassic", "ClusterClassic"); @@ -37,7 +40,12 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { ifstream fileHandle; m->openInputFile(filename, fileHandle); - fileHandle >> nseqs >> name; + string numTest; + fileHandle >> numTest >> name; + + if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); } + else { convert(numTest, nseqs); } + matrixNames.push_back(name); @@ -102,7 +110,7 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { fileHandle >> distance; if (distance == -1) { distance = 1000000; } - else if (globaldata->sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. //if(distance < cutoff){ dMatrix[i][j] = distance; @@ -124,7 +132,7 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { if (m->control_pressed) { delete reading; fileHandle.close(); return 0; } if (distance == -1) { distance = 1000000; } - else if (globaldata->sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. //if(distance < cutoff){ if (distance < smallDist) { smallDist = distance; } @@ -162,7 +170,7 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } if (distance == -1) { distance = 1000000; } - else if (globaldata->sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(j < i){ if (distance < smallDist) { smallDist = distance; } @@ -185,7 +193,7 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } if (distance == -1) { distance = 1000000; } - else if (globaldata->sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. if(j < i){ if (distance < smallDist) { smallDist = distance; } @@ -215,12 +223,213 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) { rabund = new RAbundVector(list->getRAbundVector()); fileHandle.close(); + + return 0; } catch(exception& e) { m->errorOut(e, "ClusterClassic", "readPhylipFile"); exit(1); } +} +/***********************************************************************/ +int ClusterClassic::readPhylipFile(string filename, CountTable* countTable) { + try { + double distance; + int square; + string name; + vector matrixNames; + + ifstream fileHandle; + m->openInputFile(filename, fileHandle); + + string numTest; + fileHandle >> numTest >> name; + + if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); } + else { convert(numTest, nseqs); } + + + matrixNames.push_back(name); + + if(countTable == NULL){ + list = new ListVector(nseqs); + list->set(0, name); + } + else{ list = new ListVector(countTable->getListVector()); } + + + //initialize distance matrix to cutoff + dMatrix.resize(nseqs); + //rowSmallDists.resize(nseqs, temp); + for (int i = 1; i < nseqs; i++) { + dMatrix[i].resize(i, aboveCutoff); + } + + + char d; + while((d=fileHandle.get()) != EOF){ + + if(isalnum(d)){ + square = 1; + fileHandle.putback(d); + for(int i=0;i> distance; + } + break; + } + if(d == '\n'){ + square = 0; + break; + } + } + + Progress* reading; + + if(square == 0){ + + reading = new Progress("Reading matrix: ", nseqs * (nseqs - 1) / 2); + + int index = 0; + + for(int i=1;icontrol_pressed) { fileHandle.close(); delete reading; return 0; } + + fileHandle >> name; + matrixNames.push_back(name); + + + //there's A LOT of repeated code throughout this method... + if(countTable == NULL){ + list->set(i, name); + + for(int j=0;jcontrol_pressed) { delete reading; fileHandle.close(); return 0; } + + fileHandle >> distance; + + if (distance == -1) { distance = 1000000; } + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + + //if(distance < cutoff){ + dMatrix[i][j] = distance; + if (distance < smallDist) { smallDist = distance; } + //if (rowSmallDists[i].dist > distance) { rowSmallDists[i].dist = distance; rowSmallDists[i].col = j; rowSmallDists[i].row = i; } + //if (rowSmallDists[j].dist > distance) { rowSmallDists[j].dist = distance; rowSmallDists[j].col = i; rowSmallDists[j].row = j; } + //} + index++; + reading->update(index); + } + + } + else{ + for(int j=0;j> distance; + + if (m->control_pressed) { delete reading; fileHandle.close(); return 0; } + + if (distance == -1) { distance = 1000000; } + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + + if (distance < smallDist) { smallDist = distance; } + + int row = countTable->get(matrixNames[i]); + int col = countTable->get(matrixNames[j]); + + if (row < col) { dMatrix[col][row] = distance; } + else { dMatrix[row][col] = distance; } + + index++; + reading->update(index); + } + } + } + } + else{ + + reading = new Progress("Reading matrix: ", nseqs * nseqs); + + int index = nseqs; + + for(int i=1;i> name; + matrixNames.push_back(name); + + if(countTable == NULL){ + list->set(i, name); + for(int j=0;j> distance; + + if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } + + if (distance == -1) { distance = 1000000; } + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + + if(j < i){ + if (distance < smallDist) { smallDist = distance; } + + dMatrix[i][j] = distance; + } + index++; + reading->update(index); + } + + } + else{ + + for(int j=0;j> distance; + + if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } + + if (distance == -1) { distance = 1000000; } + else if (sim) { distance = 1.0 - distance; } //user has entered a sim matrix that we need to convert. + + if(j < i){ + if (distance < smallDist) { smallDist = distance; } + + int row = countTable->get(matrixNames[i]); + int col = countTable->get(matrixNames[j]); + + if (row < col) { dMatrix[col][row] = distance; } + else { dMatrix[row][col] = distance; } + } + index++; + reading->update(index); + } + } + } + } + + if (m->control_pressed) { fileHandle.close(); delete reading; return 0; } + + reading->finish(); + delete reading; + + list->setLabel("0"); + rabund = new RAbundVector(); + rabund->setLabel(list->getLabel()); + + for(int i = 0; i < list->getNumBins(); i++) { + if (m->control_pressed) { break; } + vector binNames; + string bin = list->get(i); + m->splitAtComma(bin, binNames); + int total = 0; + for (int j = 0; j < binNames.size(); j++) { total += countTable->getNumSeqs(binNames[j]); } + rabund->push_back(total); + } + + fileHandle.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "ClusterClassic", "readPhylipFile"); + exit(1); + } + } /***********************************************************************/ //sets smallCol and smallRow, returns distance @@ -388,16 +597,12 @@ void ClusterClassic::setMapWanted(bool f) { //parse bin string names = list->get(i); - while (names.find_first_of(',') != -1) { - //get name from bin - string name = names.substr(0,names.find_first_of(',')); + vector binnames; + m->splitAtComma(names, binnames); + for (int j = 0; j < binnames.size(); j++) { //save name and bin number - seq2Bin[name] = i; - names = names.substr(names.find_first_of(',')+1, names.length()); + seq2Bin[binnames[j]] = i; } - - //get last name - seq2Bin[names] = i; } } @@ -410,17 +615,13 @@ void ClusterClassic::setMapWanted(bool f) { void ClusterClassic::updateMap() { try { //update location of seqs in smallRow since they move to smallCol now - string names = list->get(smallRow); - while (names.find_first_of(',') != -1) { - //get name from bin - string name = names.substr(0,names.find_first_of(',')); - //save name and bin number - seq2Bin[name] = smallCol; - names = names.substr(names.find_first_of(',')+1, names.length()); - } - - //get last name - seq2Bin[names] = smallCol; + string names = list->get(smallRow); + vector binnames; + m->splitAtComma(names, binnames); + for (int j = 0; j < binnames.size(); j++) { + //save name and bin number + seq2Bin[binnames[j]] = smallCol; + } } catch(exception& e) { @@ -433,11 +634,11 @@ void ClusterClassic::print() { try { //update location of seqs in smallRow since they move to smallCol now for (int i = 0; i < dMatrix.size(); i++) { - cout << "row = " << i << '\t'; + m->mothurOut("row = " + toString(i) + "\t"); for (int j = 0; j < dMatrix[i].size(); j++) { - cout << dMatrix[i][j] << '\t'; + m->mothurOut(toString(dMatrix[i][j]) + "\t"); } - cout << endl; + m->mothurOutEndLine(); } } catch(exception& e) {