X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=hcluster.cpp;h=07deaa5991c94627fa2e02a88e3b10b2729e290e;hb=cb9c5129766d46fa1a266f724c1ebd41047f3a03;hp=259036f0f02a42beb95926f9ce59a4140164840e;hpb=832d53a9dfac6b1795735eec643d8cf627b0d8e3;p=mothur.git diff --git a/hcluster.cpp b/hcluster.cpp index 259036f..07deaa5 100644 --- a/hcluster.cpp +++ b/hcluster.cpp @@ -13,8 +13,9 @@ #include "sparsematrix.hpp" /***********************************************************************/ -HCluster::HCluster(RAbundVector* rav, ListVector* lv, string m, string d, NameAssignment* n, float c) : rabund(rav), list(lv), method(m), distfile(d), nameMap(n), cutoff(c) { +HCluster::HCluster(RAbundVector* rav, ListVector* lv, string ms, string d, NameAssignment* n, float c) : rabund(rav), list(lv), method(ms), distfile(d), nameMap(n), cutoff(c) { try { + m = MothurOut::getInstance(); mapWanted = false; exitedBreak = false; numSeqs = list->getNumSeqs(); @@ -32,7 +33,7 @@ HCluster::HCluster(RAbundVector* rav, ListVector* lv, string m, string d, NameAs } } catch(exception& e) { - errorOut(e, "HCluster", "HCluster"); + m->errorOut(e, "HCluster", "HCluster"); exit(1); } } @@ -49,7 +50,7 @@ void HCluster::clusterBins(){ //cout << '\t' << rabund->get(clusterArray[smallRow].smallChild) << '\t' << rabund->get(clusterArray[smallCol].smallChild) << endl; } catch(exception& e) { - errorOut(e, "HCluster", "clusterBins"); + m->errorOut(e, "HCluster", "clusterBins"); exit(1); } @@ -71,7 +72,7 @@ void HCluster::clusterNames(){ } catch(exception& e) { - errorOut(e, "HCluster", "clusterNames"); + m->errorOut(e, "HCluster", "clusterNames"); exit(1); } @@ -86,7 +87,7 @@ int HCluster::getUpmostParent(int node){ return node; } catch(exception& e) { - errorOut(e, "HCluster", "getUpmostParent"); + m->errorOut(e, "HCluster", "getUpmostParent"); exit(1); } } @@ -116,7 +117,7 @@ void HCluster::printInfo(){ } catch(exception& e) { - errorOut(e, "HCluster", "getUpmostParent"); + m->errorOut(e, "HCluster", "getUpmostParent"); exit(1); } } @@ -184,7 +185,7 @@ int HCluster::makeActive() { return linkValue; } catch(exception& e) { - errorOut(e, "HCluster", "makeActive"); + m->errorOut(e, "HCluster", "makeActive"); exit(1); } } @@ -253,7 +254,7 @@ void HCluster::updateArrayandLinkTable() { } } catch(exception& e) { - errorOut(e, "HCluster", "updateArrayandLinkTable"); + m->errorOut(e, "HCluster", "updateArrayandLinkTable"); exit(1); } } @@ -299,14 +300,14 @@ bool HCluster::update(int row, int col, float distance){ //printInfo(); } catch(exception& e) { - errorOut(e, "HCluster", "update"); + m->errorOut(e, "HCluster", "update"); exit(1); } } /***********************************************************************/ -void HCluster::setMapWanted(bool m) { +void HCluster::setMapWanted(bool ms) { try { - mapWanted = m; + mapWanted = ms; //initialize map for (int i = 0; i < list->getNumBins(); i++) { @@ -327,7 +328,7 @@ void HCluster::setMapWanted(bool m) { } catch(exception& e) { - errorOut(e, "HCluster", "setMapWanted"); + m->errorOut(e, "HCluster", "setMapWanted"); exit(1); } } @@ -348,7 +349,7 @@ try { seq2Bin[names] = clusterArray[smallCol].smallChild; } catch(exception& e) { - errorOut(e, "HCluster", "updateMap"); + m->errorOut(e, "HCluster", "updateMap"); exit(1); } } @@ -366,7 +367,7 @@ vector HCluster::getSeqs(){ return sameSeqs; } catch(exception& e) { - errorOut(e, "HCluster", "getSeqs"); + m->errorOut(e, "HCluster", "getSeqs"); exit(1); } } @@ -424,7 +425,7 @@ vector HCluster::getSeqsFNNN(){ return sameSeqs; } catch(exception& e) { - errorOut(e, "HCluster", "getSeqsFNNN"); + m->errorOut(e, "HCluster", "getSeqsFNNN"); exit(1); } } @@ -495,26 +496,29 @@ vector HCluster::getSeqsAN(){ return temp; } catch(exception& e) { - errorOut(e, "HCluster", "getSeqsAN"); + m->errorOut(e, "HCluster", "getSeqsAN"); exit(1); } } /***********************************************************************/ -void HCluster::combineFile() { +int HCluster::combineFile() { try { - int bufferSize = 64000; //512k - this should be a variable that the user can set to optimize code to their hardware - char* inputBuffer; - inputBuffer = new char[bufferSize]; - size_t numRead; + //int bufferSize = 64000; //512k - this should be a variable that the user can set to optimize code to their hardware + //char* inputBuffer; + //inputBuffer = new char[bufferSize]; + //size_t numRead; string tempDistFile = distfile + ".temp"; ofstream out; openOutputFile(tempDistFile, out); - FILE* in; - in = fopen(distfile.c_str(), "rb"); + //FILE* in; + //in = fopen(distfile.c_str(), "rb"); + ifstream in; + openInputFile(distfile, in); + int first, second; float dist; @@ -524,28 +528,32 @@ void HCluster::combineFile() { //go through file pulling out distances related to rows merging //if mergedMin contains distances add those back into file - bool done = false; - partialDist = ""; - while ((numRead = fread(inputBuffer, 1, bufferSize, in)) != 0) { + //bool done = false; + //partialDist = ""; + //while ((numRead = fread(inputBuffer, 1, bufferSize, in)) != 0) { //cout << "number of char read = " << numRead << endl; //cout << inputBuffer << endl; - if (numRead < bufferSize) { done = true; } + //if (numRead < bufferSize) { done = true; } //parse input into individual distances - int spot = 0; - string outputString = ""; - while(spot < numRead) { + //int spot = 0; + //string outputString = ""; + //while(spot < numRead) { //cout << "spot = " << spot << endl; - seqDist nextDist = getNextDist(inputBuffer, spot, bufferSize); + // seqDist nextDist = getNextDist(inputBuffer, spot, bufferSize); //you read a partial distance - if (nextDist.seq1 == -1) { break; } - - first = nextDist.seq1; second = nextDist.seq2; dist = nextDist.dist; + // if (nextDist.seq1 == -1) { break; } + while (!in.eof()) { + //first = nextDist.seq1; second = nextDist.seq2; dist = nextDist.dist; //cout << "next distance = " << first << '\t' << second << '\t' << dist << endl; //since file is sorted and mergedMin is sorted //you can put the smallest distance from each through the code below and keep the file sorted + in >> first >> second >> dist; gobble(in); + + if (m->control_pressed) { in.close(); out.close(); remove(tempDistFile.c_str()); return 0; } + //while there are still values in mergedMin that are smaller than the distance read from file while (count < mergedMin.size()) { @@ -563,7 +571,8 @@ void HCluster::combineFile() { }else if (mergedMin[count].seq2 == smallRow) { smallRowColValues[0][mergedMin[count].seq1] = mergedMin[count].dist; }else { //if no, write to temp file - outputString += toString(mergedMin[count].seq1) + '\t' + toString(mergedMin[count].seq2) + '\t' + toString(mergedMin[count].dist) + '\n'; + //outputString += toString(mergedMin[count].seq1) + '\t' + toString(mergedMin[count].seq2) + '\t' + toString(mergedMin[count].dist) + '\n'; + out << mergedMin[count].seq1 << '\t' << mergedMin[count].seq2 << '\t' << mergedMin[count].dist << endl; } count++; }else{ break; } @@ -582,14 +591,16 @@ void HCluster::combineFile() { smallRowColValues[0][first] = dist; }else { //if no, write to temp file - outputString += toString(first) + '\t' + toString(second) + '\t' + toString(dist) + '\n'; + //outputString += toString(first) + '\t' + toString(second) + '\t' + toString(dist) + '\n'; + out << first << '\t' << second << '\t' << dist << endl; } } - out << outputString; - if(done) { break; } - } - fclose(in); + //out << outputString; + //if(done) { break; } + //} + //fclose(in); + in.close(); //if values in mergedMin are larger than the the largest in file then while (count < mergedMin.size()) { @@ -616,7 +627,8 @@ void HCluster::combineFile() { //rename tempfile to distfile remove(distfile.c_str()); rename(tempDistFile.c_str(), distfile.c_str()); - +//cout << "remove = "<< renameOK << " rename = " << ok << endl; + //merge clustered rows averaging the distances map::iterator itMerge; map::iterator it2Merge; @@ -638,13 +650,15 @@ void HCluster::combineFile() { //sort merged values sort(mergedMin.begin(), mergedMin.end(), compareSequenceDistance); + + return 0; } catch(exception& e) { - errorOut(e, "HCluster", "combineFile"); + m->errorOut(e, "HCluster", "combineFile"); exit(1); } } -/***********************************************************************/ +/*********************************************************************** seqDist HCluster::getNextDist(char* buffer, int& index, int size){ try { seqDist next; @@ -716,12 +730,12 @@ seqDist HCluster::getNextDist(char* buffer, int& index, int size){ return next; } catch(exception& e) { - errorOut(e, "HCluster", "getNextDist"); + m->errorOut(e, "HCluster", "getNextDist"); exit(1); } } /***********************************************************************/ -void HCluster::processFile() { +int HCluster::processFile() { try { string firstName, secondName; float distance; @@ -735,6 +749,7 @@ void HCluster::processFile() { //get entry while (!in.eof()) { + if (m->control_pressed) { in.close(); out.close(); remove(outTemp.c_str()); return 0; } in >> firstName >> secondName >> distance; gobble(in); @@ -756,9 +771,11 @@ void HCluster::processFile() { remove(distfile.c_str()); rename(outTemp.c_str(), distfile.c_str()); + + return 0; } catch(exception& e) { - errorOut(e, "HCluster", "processFile"); + m->errorOut(e, "HCluster", "processFile"); exit(1); } }