X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=hcluster.cpp;h=f8f48095b2ec55a8dce91d55881271f2f1837218;hp=2d88533757a1495d54423ad87354532a96d3e053;hb=d1c97b8c04bb75faca1e76ffad60b37a4d789d3d;hpb=c82900be3ceed3d9bc491bdc98b1819ef85c1af7 diff --git a/hcluster.cpp b/hcluster.cpp index 2d88533..f8f4809 100644 --- a/hcluster.cpp +++ b/hcluster.cpp @@ -10,13 +10,13 @@ #include "hcluster.h" #include "rabundvector.hpp" #include "listvector.hpp" -#include "sparsematrix.hpp" /***********************************************************************/ - -HCluster::HCluster(RAbundVector* rav, ListVector* lv) : rabund(rav), list(lv){ +HCluster::HCluster(RAbundVector* rav, ListVector* lv, string ms, string d, NameAssignment* n, float c) : rabund(rav), list(lv), method(ms), distfile(d), nameMap(n), cutoff(c) { try { + m = MothurOut::getInstance(); mapWanted = false; + exitedBreak = false; numSeqs = list->getNumSeqs(); //initialize cluster array @@ -25,9 +25,14 @@ HCluster::HCluster(RAbundVector* rav, ListVector* lv) : rabund(rav), list(lv){ clusterArray.push_back(temp); } + if ((method == "furthest") || (method == "nearest")) { + m->openInputFile(distfile, filehandle); + }else{ + processFile(); + } } catch(exception& e) { - errorOut(e, "HCluster", "HCluster"); + m->errorOut(e, "HCluster", "HCluster"); exit(1); } } @@ -44,7 +49,7 @@ void HCluster::clusterBins(){ //cout << '\t' << rabund->get(clusterArray[smallRow].smallChild) << '\t' << rabund->get(clusterArray[smallCol].smallChild) << endl; } catch(exception& e) { - errorOut(e, "HCluster", "clusterBins"); + m->errorOut(e, "HCluster", "clusterBins"); exit(1); } @@ -66,7 +71,7 @@ void HCluster::clusterNames(){ } catch(exception& e) { - errorOut(e, "HCluster", "clusterNames"); + m->errorOut(e, "HCluster", "clusterNames"); exit(1); } @@ -74,7 +79,6 @@ void HCluster::clusterNames(){ /***********************************************************************/ int HCluster::getUpmostParent(int node){ try { - while (clusterArray[node].parent != -1) { node = clusterArray[node].parent; } @@ -82,7 +86,7 @@ int HCluster::getUpmostParent(int node){ return node; } catch(exception& e) { - errorOut(e, "HCluster", "getUpmostParent"); + m->errorOut(e, "HCluster", "getUpmostParent"); exit(1); } } @@ -91,14 +95,14 @@ void HCluster::printInfo(){ try { cout << "link table" << endl; - for (it = activeLinks.begin(); it!= activeLinks.end(); it++) { - cout << it->first << " = " << it->second << endl; + for (itActive = activeLinks.begin(); itActive!= activeLinks.end(); itActive++) { + cout << itActive->first << " = " << itActive->second << endl; } cout << endl; for (int i = 0; i < linkTable.size(); i++) { cout << i << '\t'; for (it = linkTable[i].begin(); it != linkTable[i].end(); it++) { - cout << it->first << '-' << it->second << '\t'; + cout << it->first << '-' << it->second << '\t' ; } cout << endl; } @@ -112,98 +116,98 @@ void HCluster::printInfo(){ } catch(exception& e) { - errorOut(e, "HCluster", "getUpmostParent"); + m->errorOut(e, "HCluster", "getUpmostParent"); exit(1); } } /***********************************************************************/ int HCluster::makeActive() { try { - int linkValue = 1; -//cout << "active - here" << endl; - it = activeLinks.find(smallRow); - it2 = activeLinks.find(smallCol); + + itActive = activeLinks.find(smallRow); + it2Active = activeLinks.find(smallCol); - if ((it == activeLinks.end()) && (it2 == activeLinks.end())) { //both are not active so add them + if ((itActive == activeLinks.end()) && (it2Active == activeLinks.end())) { //both are not active so add them int size = linkTable.size(); map temp; map temp2; //add link to eachother - temp[smallRow] = 1; // 1 2 - temp2[smallCol] = 1; // 1 0 1 - // 2 1 0 + temp[smallRow] = 1; // 1 2 + temp2[smallCol] = 1; // 1 0 1 + // 2 1 0 linkTable.push_back(temp); linkTable.push_back(temp2); //add to activeLinks activeLinks[smallRow] = size; activeLinks[smallCol] = size+1; -//cout << "active - here1" << endl; - }else if ((it != activeLinks.end()) && (it2 == activeLinks.end())) { //smallRow is active, smallCol is not + + }else if ((itActive != activeLinks.end()) && (it2Active == activeLinks.end())) { //smallRow is active, smallCol is not int size = linkTable.size(); - int alreadyActiveRow = it->second; + int alreadyActiveRow = itActive->second; map temp; //add link to eachother - temp[smallRow] = 1; // 6 2 3 5 - linkTable.push_back(temp); // 6 0 1 2 0 - linkTable[alreadyActiveRow][smallCol] = 1; // 2 1 0 1 1 - // 3 2 1 0 0 - // 5 0 1 0 0 + temp[smallRow] = 1; // 6 2 3 5 + linkTable.push_back(temp); // 6 0 1 2 0 + linkTable[alreadyActiveRow][smallCol] = 1; // 2 1 0 1 1 + // 3 2 1 0 0 + // 5 0 1 0 0 //add to activeLinks activeLinks[smallCol] = size; -//cout << "active - here2" << endl; - }else if ((it == activeLinks.end()) && (it2 != activeLinks.end())) { //smallCol is active, smallRow is not + + }else if ((itActive == activeLinks.end()) && (it2Active != activeLinks.end())) { //smallCol is active, smallRow is not int size = linkTable.size(); - int alreadyActiveCol = it2->second; + int alreadyActiveCol = it2Active->second; map temp; //add link to eachother - temp[smallCol] = 1; // 6 2 3 5 - linkTable.push_back(temp); // 6 0 1 2 0 - linkTable[alreadyActiveCol][smallRow] = 1; // 2 1 0 1 1 - // 3 2 1 0 0 - // 5 0 1 0 0 + temp[smallCol] = 1; // 6 2 3 5 + linkTable.push_back(temp); // 6 0 1 2 0 + linkTable[alreadyActiveCol][smallRow] = 1; // 2 1 0 1 1 + // 3 2 1 0 0 + // 5 0 1 0 0 //add to activeLinks activeLinks[smallRow] = size; -//cout << "active - here3" << endl; + }else { //both are active so add one - int row = it->second; - int col = it2->second; -//cout << row << '\t' << col << endl; + int row = itActive->second; + int col = it2Active->second; + linkTable[row][smallCol]++; linkTable[col][smallRow]++; linkValue = linkTable[row][smallCol]; -//cout << "active - here4" << endl; } return linkValue; } catch(exception& e) { - errorOut(e, "HCluster", "makeActive"); + m->errorOut(e, "HCluster", "makeActive"); exit(1); } } /***********************************************************************/ void HCluster::updateArrayandLinkTable() { try { - //if cluster was made update clusterArray and linkTable - int size = clusterArray.size(); - - //add new node - clusterNode temp(clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq, -1, clusterArray[smallCol].smallChild); - clusterArray.push_back(temp); - - //update child nodes - clusterArray[smallRow].parent = size; - clusterArray[smallCol].parent = size; + //if cluster was made update clusterArray and linkTable + int size = clusterArray.size(); + + //add new node + clusterNode temp(clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq, -1, clusterArray[smallCol].smallChild); + clusterArray.push_back(temp); + + //update child nodes + clusterArray[smallRow].parent = size; + clusterArray[smallCol].parent = size; + + if (method == "furthest") { //update linkTable by merging clustered rows and columns int rowSpot = activeLinks[smallRow]; int colSpot = activeLinks[smallCol]; - //cout << "here" << endl; + //fix old rows for (int i = 0; i < linkTable.size(); i++) { //check if they are in map @@ -222,8 +226,7 @@ void HCluster::updateArrayandLinkTable() { linkTable[i].erase(smallRow); //delete col } } - //printInfo(); -//cout << "here2" << endl; + //merge their values for (it = linkTable[rowSpot].begin(); it != linkTable[rowSpot].end(); it++) { it2 = linkTable[colSpot].find(it->first); //does the col also have this @@ -231,70 +234,79 @@ void HCluster::updateArrayandLinkTable() { if (it2 == linkTable[colSpot].end()) { //not there so add it linkTable[colSpot][it->first] = it->second; }else { //merge them - linkTable[colSpot][it->first] = it->second+it2->second; + linkTable[colSpot][it->first] = it->second + it2->second; } } -//cout << "here3" << endl; + linkTable[colSpot].erase(size); linkTable.erase(linkTable.begin()+rowSpot); //delete row - //printInfo(); + //update activerows activeLinks.erase(smallRow); activeLinks.erase(smallCol); activeLinks[size] = colSpot; //adjust everybody elses spot since you deleted - time vs. space - for (it = activeLinks.begin(); it != activeLinks.end(); it++) { - if (it->second > rowSpot) { activeLinks[it->first]--; } + for (itActive = activeLinks.begin(); itActive != activeLinks.end(); itActive++) { + if (itActive->second > rowSpot) { activeLinks[itActive->first]--; } } - -//cout << "here4" << endl; - + } } catch(exception& e) { - errorOut(e, "HCluster", "updateArrayandLinkTable"); + m->errorOut(e, "HCluster", "updateArrayandLinkTable"); exit(1); } } /***********************************************************************/ -void HCluster::update(int row, int col, float distance){ +double HCluster::update(int row, int col, float distance){ try { - + bool cluster = false; smallRow = row; smallCol = col; smallDist = distance; - bool clustered = false; //find upmost parent of row and col smallRow = getUpmostParent(smallRow); smallCol = getUpmostParent(smallCol); - //cout << "row = " << row << " smallRow = " << smallRow << " col = " << col << " smallCol = " << smallCol << " dist = " << distance << endl; - - //are they active in the link table - int linkValue = makeActive(); //after this point this nodes info is active in linkTable - //printInfo(); -//cout << "linkValue = " << linkValue << " times = " << (clusterArray[smallRow].numSeq * clusterArray[smallCol].numSeq) << endl; - //can we cluster??? - if (linkValue == (clusterArray[smallRow].numSeq * clusterArray[smallCol].numSeq)) { - //printInfo(); - updateArrayandLinkTable(); - clusterBins(); - clusterNames(); - clustered = true; - //printInfo(); + + //you don't want to cluster with yourself + if (smallRow != smallCol) { + + if ((method == "furthest") || (method == "nearest")) { + //can we cluster??? + if (method == "nearest") { cluster = true; } + else{ //assume furthest + //are they active in the link table + int linkValue = makeActive(); //after this point this nodes info is active in linkTable + if (linkValue == (clusterArray[smallRow].numSeq * clusterArray[smallCol].numSeq)) { cluster = true; } + } + + if (cluster) { + updateArrayandLinkTable(); + clusterBins(); + clusterNames(); + } + }else { + cluster = true; + updateArrayandLinkTable(); + clusterBins(); + clusterNames(); + combineFile(); + } } + return cutoff; //printInfo(); } catch(exception& e) { - errorOut(e, "HCluster", "update"); + m->errorOut(e, "HCluster", "update"); exit(1); } } /***********************************************************************/ -void HCluster::setMapWanted(bool m) { +void HCluster::setMapWanted(bool ms) { try { - mapWanted = m; + mapWanted = ms; //initialize map for (int i = 0; i < list->getNumBins(); i++) { @@ -315,7 +327,7 @@ void HCluster::setMapWanted(bool m) { } catch(exception& e) { - errorOut(e, "HCluster", "setMapWanted"); + m->errorOut(e, "HCluster", "setMapWanted"); exit(1); } } @@ -336,7 +348,451 @@ try { seq2Bin[names] = clusterArray[smallCol].smallChild; } catch(exception& e) { - errorOut(e, "HCluster", "updateMap"); + m->errorOut(e, "HCluster", "updateMap"); + exit(1); + } +} +//********************************************************************************************************************** +vector HCluster::getSeqs(){ + try { + vector sameSeqs; + + if ((method == "furthest") || (method == "nearest")) { + sameSeqs = getSeqsFNNN(); + }else{ + sameSeqs = getSeqsAN(); + } + + return sameSeqs; + } + catch(exception& e) { + m->errorOut(e, "HCluster", "getSeqs"); + exit(1); + } +} +//********************************************************************************************************************** +vector HCluster::getSeqsFNNN(){ + try { + string firstName, secondName; + float distance, prevDistance; + vector sameSeqs; + prevDistance = -1; + + //if you are not at the beginning of the file + if (exitedBreak) { + sameSeqs.push_back(next); + prevDistance = next.dist; + exitedBreak = false; + } + + //get entry + while (!filehandle.eof()) { + + filehandle >> firstName >> secondName >> distance; m->gobble(filehandle); + + //save first one + if (prevDistance == -1) { prevDistance = distance; } + + map::iterator itA = nameMap->find(firstName); + map::iterator itB = nameMap->find(secondName); + if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } + if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } + + //using cutoff + if (distance > cutoff) { break; } + + if (distance != -1) { //-1 means skip me + + //are the distances the same + if (distance == prevDistance) { //save in vector + seqDist temp(itA->second, itB->second, distance); + sameSeqs.push_back(temp); + exitedBreak = false; + }else{ + next.seq1 = itA->second; + next.seq2 = itB->second; + next.dist = distance; + exitedBreak = true; + break; + } + } + } + + //rndomize matching dists + random_shuffle(sameSeqs.begin(), sameSeqs.end()); + + return sameSeqs; + } + catch(exception& e) { + m->errorOut(e, "HCluster", "getSeqsFNNN"); + exit(1); + } +} +//********************************************************************************************************************** +vector HCluster::getSeqsAN(){ + try { + int firstName, secondName; + float prevDistance; + vector sameSeqs; + prevDistance = -1; + + m->openInputFile(distfile, filehandle, "no error"); + + //is the smallest value in mergedMin or the distfile? + float mergedMinDist = 10000; + float distance = 10000; + if (mergedMin.size() > 0) { mergedMinDist = mergedMin[0].dist; } + + if (!filehandle.eof()) { + filehandle >> firstName >> secondName >> distance; m->gobble(filehandle); + //save first one + if (prevDistance == -1) { prevDistance = distance; } + if (distance != -1) { //-1 means skip me + seqDist temp(firstName, secondName, distance); + sameSeqs.push_back(temp); + }else{ distance = 10000; } + } + + if (mergedMinDist < distance) { //get minimum distance from mergedMin + //remove distance we saved from file + sameSeqs.clear(); + prevDistance = mergedMinDist; + + for (int i = 0; i < mergedMin.size(); i++) { + if (mergedMin[i].dist == prevDistance) { + sameSeqs.push_back(mergedMin[i]); + }else { break; } + } + }else{ //get minimum from file + //get entry + while (!filehandle.eof()) { + + filehandle >> firstName >> secondName >> distance; m->gobble(filehandle); + + if (prevDistance == -1) { prevDistance = distance; } + + if (distance != -1) { //-1 means skip me + //are the distances the same + if (distance == prevDistance) { //save in vector + seqDist temp(firstName, secondName, distance); + sameSeqs.push_back(temp); + }else{ + break; + } + } + } + } + filehandle.close(); + + //randomize matching dists + random_shuffle(sameSeqs.begin(), sameSeqs.end()); + + //can only return one value since once these are merged the other distances in sameSeqs may have changed + vector temp; + if (sameSeqs.size() > 0) { temp.push_back(sameSeqs[0]); } + + return temp; + } + catch(exception& e) { + m->errorOut(e, "HCluster", "getSeqsAN"); + exit(1); + } +} + +/***********************************************************************/ +int HCluster::combineFile() { + try { + //int bufferSize = 64000; //512k - this should be a variable that the user can set to optimize code to their hardware + //char* inputBuffer; + //inputBuffer = new char[bufferSize]; + //size_t numRead; + + string tempDistFile = distfile + ".temp"; + ofstream out; + m->openOutputFile(tempDistFile, out); + + //FILE* in; + //in = fopen(distfile.c_str(), "rb"); + + ifstream in; + m->openInputFile(distfile, in, "no error"); + + int first, second; + float dist; + + vector< map > smallRowColValues; + smallRowColValues.resize(2); //0 = row, 1 = col + int count = 0; + + //go through file pulling out distances related to rows merging + //if mergedMin contains distances add those back into file + //bool done = false; + //partialDist = ""; + //while ((numRead = fread(inputBuffer, 1, bufferSize, in)) != 0) { +//cout << "number of char read = " << numRead << endl; +//cout << inputBuffer << endl; + //if (numRead < bufferSize) { done = true; } + + //parse input into individual distances + //int spot = 0; + //string outputString = ""; + //while(spot < numRead) { + //cout << "spot = " << spot << endl; + // seqDist nextDist = getNextDist(inputBuffer, spot, bufferSize); + + //you read a partial distance + // if (nextDist.seq1 == -1) { break; } + while (!in.eof()) { + //first = nextDist.seq1; second = nextDist.seq2; dist = nextDist.dist; + //cout << "next distance = " << first << '\t' << second << '\t' << dist << endl; + //since file is sorted and mergedMin is sorted + //you can put the smallest distance from each through the code below and keep the file sorted + + in >> first >> second >> dist; m->gobble(in); + + if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempDistFile); return 0; } + + //while there are still values in mergedMin that are smaller than the distance read from file + while (count < mergedMin.size()) { + + //is the distance in mergedMin smaller than from the file + if (mergedMin[count].dist < dist) { + //is this a distance related to the columns merging? + //if yes, save in memory + if ((mergedMin[count].seq1 == smallRow) && (mergedMin[count].seq2 == smallCol)) { //do nothing this is the smallest distance from last time + }else if (mergedMin[count].seq1 == smallCol) { + smallRowColValues[1][mergedMin[count].seq2] = mergedMin[count].dist; + }else if (mergedMin[count].seq2 == smallCol) { + smallRowColValues[1][mergedMin[count].seq1] = mergedMin[count].dist; + }else if (mergedMin[count].seq1 == smallRow) { + smallRowColValues[0][mergedMin[count].seq2] = mergedMin[count].dist; + }else if (mergedMin[count].seq2 == smallRow) { + smallRowColValues[0][mergedMin[count].seq1] = mergedMin[count].dist; + }else { //if no, write to temp file + //outputString += toString(mergedMin[count].seq1) + '\t' + toString(mergedMin[count].seq2) + '\t' + toString(mergedMin[count].dist) + '\n'; + //if (mergedMin[count].dist < cutoff) { + out << mergedMin[count].seq1 << '\t' << mergedMin[count].seq2 << '\t' << mergedMin[count].dist << endl; + //} + } + count++; + }else{ break; } + } + + //is this a distance related to the columns merging? + //if yes, save in memory + if ((first == smallRow) && (second == smallCol)) { //do nothing this is the smallest distance from last time + }else if (first == smallCol) { + smallRowColValues[1][second] = dist; + }else if (second == smallCol) { + smallRowColValues[1][first] = dist; + }else if (first == smallRow) { + smallRowColValues[0][second] = dist; + }else if (second == smallRow) { + smallRowColValues[0][first] = dist; + + }else { //if no, write to temp file + //outputString += toString(first) + '\t' + toString(second) + '\t' + toString(dist) + '\n'; + //if (dist < cutoff) { + out << first << '\t' << second << '\t' << dist << endl; + //} + } + } + + //out << outputString; + //if(done) { break; } + //} + //fclose(in); + in.close(); + + //if values in mergedMin are larger than the the largest in file then + while (count < mergedMin.size()) { + //is this a distance related to the columns merging? + //if yes, save in memory + if ((mergedMin[count].seq1 == smallRow) && (mergedMin[count].seq2 == smallCol)) { //do nothing this is the smallest distance from last time + }else if (mergedMin[count].seq1 == smallCol) { + smallRowColValues[1][mergedMin[count].seq2] = mergedMin[count].dist; + }else if (mergedMin[count].seq2 == smallCol) { + smallRowColValues[1][mergedMin[count].seq1] = mergedMin[count].dist; + }else if (mergedMin[count].seq1 == smallRow) { + smallRowColValues[0][mergedMin[count].seq2] = mergedMin[count].dist; + }else if (mergedMin[count].seq2 == smallRow) { + smallRowColValues[0][mergedMin[count].seq1] = mergedMin[count].dist; + + }else { //if no, write to temp file + //if (mergedMin[count].dist < cutoff) { + out << mergedMin[count].seq1 << '\t' << mergedMin[count].seq2 << '\t' << mergedMin[count].dist << endl; + //} + } + count++; + } + out.close(); + mergedMin.clear(); + + //rename tempfile to distfile + m->mothurRemove(distfile); + rename(tempDistFile.c_str(), distfile.c_str()); +//cout << "remove = "<< renameOK << " rename = " << ok << endl; + + //merge clustered rows averaging the distances + map::iterator itMerge; + map::iterator it2Merge; + for(itMerge = smallRowColValues[0].begin(); itMerge != smallRowColValues[0].end(); itMerge++) { + //does smallRowColValues[1] have a distance to this seq too? + it2Merge = smallRowColValues[1].find(itMerge->first); + + float average; + if (it2Merge != smallRowColValues[1].end()) { //if yes, then average + //average + if (method == "average") { + int total = clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq; + average = ((clusterArray[smallRow].numSeq * itMerge->second) + (clusterArray[smallCol].numSeq * it2Merge->second)) / (float) total; + }else { //weighted + average = ((itMerge->second * 1.0) + (it2Merge->second * 1.0)) / (float) 2.0; + } + + smallRowColValues[1].erase(it2Merge); + + seqDist temp(clusterArray[smallRow].parent, itMerge->first, average); + mergedMin.push_back(temp); + }else { + //can't find value so update cutoff + if (cutoff > itMerge->second) { cutoff = itMerge->second; } + } + } + + //update cutoff + for(itMerge = smallRowColValues[1].begin(); itMerge != smallRowColValues[1].end(); itMerge++) { + if (cutoff > itMerge->second) { cutoff = itMerge->second; } + } + + //sort merged values + sort(mergedMin.begin(), mergedMin.end(), compareSequenceDistance); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "HCluster", "combineFile"); + exit(1); + } +} +/*********************************************************************** +seqDist HCluster::getNextDist(char* buffer, int& index, int size){ + try { + seqDist next; + int indexBefore = index; + string first, second, distance; + first = ""; second = ""; distance = ""; + int tabCount = 0; +//cout << "partial = " << partialDist << endl; + if (partialDist != "") { //read what you can, you know it is less than a whole distance. + for (int i = 0; i < partialDist.size(); i++) { + if (tabCount == 0) { + if (partialDist[i] == '\t') { tabCount++; } + else { first += partialDist[i]; } + }else if (tabCount == 1) { + if (partialDist[i] == '\t') { tabCount++; } + else { second += partialDist[i]; } + }else if (tabCount == 2) { + distance += partialDist[i]; + } + } + partialDist = ""; + } + + //try to get another distance + bool gotDist = false; + while (index < size) { + if ((buffer[index] == 10) || (buffer[index] == 13)) { //newline in unix or windows + gotDist = true; + + //m->gobble space + while (index < size) { + if (isspace(buffer[index])) { index++; } + else { break; } + } + break; + }else{ + if (tabCount == 0) { + if (buffer[index] == '\t') { tabCount++; } + else { first += buffer[index]; } + }else if (tabCount == 1) { + if (buffer[index] == '\t') { tabCount++; } + else { second += buffer[index]; } + }else if (tabCount == 2) { + distance += buffer[index]; + } + index++; + } + } + + //there was not a whole distance in the buffer, ie. buffer = "1 2 0.01 2 3 0." + //then you want to save the partial distance. + if (!gotDist) { + for (int i = indexBefore; i < size; i++) { + partialDist += buffer[i]; + } + index = size + 1; + next.seq1 = -1; next.seq2 = -1; next.dist = 0.0; + }else{ + int firstname, secondname; + float dist; + + convert(first, firstname); + convert(second, secondname); + convert(distance, dist); + + next.seq1 = firstname; next.seq2 = secondname; next.dist = dist; + } + + return next; + } + catch(exception& e) { + m->errorOut(e, "HCluster", "getNextDist"); + exit(1); + } +} +***********************************************************************/ +int HCluster::processFile() { + try { + string firstName, secondName; + float distance; + + ifstream in; + m->openInputFile(distfile, in, "no error"); + + ofstream out; + string outTemp = distfile + ".temp"; + m->openOutputFile(outTemp, out); + + //get entry + while (!in.eof()) { + if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outTemp); return 0; } + + in >> firstName >> secondName >> distance; m->gobble(in); + + map::iterator itA = nameMap->find(firstName); + map::iterator itB = nameMap->find(secondName); + if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); } + if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); } + + //using cutoff + if (distance > cutoff) { break; } + + if (distance != -1) { //-1 means skip me + out << itA->second << '\t' << itB->second << '\t' << distance << endl; + } + } + + in.close(); + out.close(); + + m->mothurRemove(distfile); + rename(outTemp.c_str(), distfile.c_str()); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "HCluster", "processFile"); exit(1); } } @@ -344,3 +800,8 @@ try { + + + + +