#include "sparsematrix.hpp"
/***********************************************************************/
-HCluster::HCluster(RAbundVector* rav, ListVector* lv, string m, string d, NameAssignment* n, float c) : rabund(rav), list(lv), method(m), distfile(d), nameMap(n), cutoff(c) {
+HCluster::HCluster(RAbundVector* rav, ListVector* lv, string ms, string d, NameAssignment* n, float c) : rabund(rav), list(lv), method(ms), distfile(d), nameMap(n), cutoff(c) {
try {
+ m = MothurOut::getInstance();
mapWanted = false;
exitedBreak = false;
numSeqs = list->getNumSeqs();
clusterArray.push_back(temp);
}
- if (method != "average") {
- openInputFile(distfile, filehandle);
- }else{ firstRead = true; }
+ if ((method == "furthest") || (method == "nearest")) {
+ m->openInputFile(distfile, filehandle);
+ }else{
+ processFile();
+ }
}
catch(exception& e) {
- errorOut(e, "HCluster", "HCluster");
+ m->errorOut(e, "HCluster", "HCluster");
exit(1);
}
}
//cout << '\t' << rabund->get(clusterArray[smallRow].smallChild) << '\t' << rabund->get(clusterArray[smallCol].smallChild) << endl;
}
catch(exception& e) {
- errorOut(e, "HCluster", "clusterBins");
+ m->errorOut(e, "HCluster", "clusterBins");
exit(1);
}
}
catch(exception& e) {
- errorOut(e, "HCluster", "clusterNames");
+ m->errorOut(e, "HCluster", "clusterNames");
exit(1);
}
return node;
}
catch(exception& e) {
- errorOut(e, "HCluster", "getUpmostParent");
+ m->errorOut(e, "HCluster", "getUpmostParent");
exit(1);
}
}
}
catch(exception& e) {
- errorOut(e, "HCluster", "getUpmostParent");
+ m->errorOut(e, "HCluster", "getUpmostParent");
exit(1);
}
}
return linkValue;
}
catch(exception& e) {
- errorOut(e, "HCluster", "makeActive");
+ m->errorOut(e, "HCluster", "makeActive");
exit(1);
}
}
}
}
catch(exception& e) {
- errorOut(e, "HCluster", "updateArrayandLinkTable");
+ m->errorOut(e, "HCluster", "updateArrayandLinkTable");
exit(1);
}
}
//you don't want to cluster with yourself
if (smallRow != smallCol) {
- if (method != "average") {
+ if ((method == "furthest") || (method == "nearest")) {
//can we cluster???
if (method == "nearest") { cluster = true; }
else{ //assume furthest
//printInfo();
}
catch(exception& e) {
- errorOut(e, "HCluster", "update");
+ m->errorOut(e, "HCluster", "update");
exit(1);
}
}
/***********************************************************************/
-void HCluster::setMapWanted(bool m) {
+void HCluster::setMapWanted(bool ms) {
try {
- mapWanted = m;
+ mapWanted = ms;
//initialize map
for (int i = 0; i < list->getNumBins(); i++) {
}
catch(exception& e) {
- errorOut(e, "HCluster", "setMapWanted");
+ m->errorOut(e, "HCluster", "setMapWanted");
exit(1);
}
}
seq2Bin[names] = clusterArray[smallCol].smallChild;
}
catch(exception& e) {
- errorOut(e, "HCluster", "updateMap");
+ m->errorOut(e, "HCluster", "updateMap");
exit(1);
}
}
try {
vector<seqDist> sameSeqs;
- if(method != "average") {
+ if ((method == "furthest") || (method == "nearest")) {
sameSeqs = getSeqsFNNN();
}else{
- if (firstRead) { processFile(); }
sameSeqs = getSeqsAN();
}
return sameSeqs;
}
catch(exception& e) {
- errorOut(e, "HCluster", "getSeqs");
+ m->errorOut(e, "HCluster", "getSeqs");
exit(1);
}
}
//get entry
while (!filehandle.eof()) {
- filehandle >> firstName >> secondName >> distance; gobble(filehandle);
+ filehandle >> firstName >> secondName >> distance; m->gobble(filehandle);
//save first one
if (prevDistance == -1) { prevDistance = distance; }
return sameSeqs;
}
catch(exception& e) {
- errorOut(e, "HCluster", "getSeqsFNNN");
+ m->errorOut(e, "HCluster", "getSeqsFNNN");
exit(1);
}
}
vector<seqDist> sameSeqs;
prevDistance = -1;
- openInputFile(distfile, filehandle, "no error");
+ m->openInputFile(distfile, filehandle, "no error");
//is the smallest value in mergedMin or the distfile?
float mergedMinDist = 10000;
if (mergedMin.size() > 0) { mergedMinDist = mergedMin[0].dist; }
if (!filehandle.eof()) {
- filehandle >> firstName >> secondName >> distance; gobble(filehandle);
+ filehandle >> firstName >> secondName >> distance; m->gobble(filehandle);
//save first one
if (prevDistance == -1) { prevDistance = distance; }
if (distance != -1) { //-1 means skip me
seqDist temp(firstName, secondName, distance);
sameSeqs.push_back(temp);
- }
+ }else{ distance = 10000; }
}
if (mergedMinDist < distance) { //get minimum distance from mergedMin
//get entry
while (!filehandle.eof()) {
- filehandle >> firstName >> secondName >> distance; gobble(filehandle);
+ filehandle >> firstName >> secondName >> distance; m->gobble(filehandle);
if (prevDistance == -1) { prevDistance = distance; }
return temp;
}
catch(exception& e) {
- errorOut(e, "HCluster", "getSeqsAN");
+ m->errorOut(e, "HCluster", "getSeqsAN");
exit(1);
}
}
/***********************************************************************/
-void HCluster::combineFile() {
+int HCluster::combineFile() {
try {
- int bufferSize = 64000; //512k - this should be a variable that the user can set to optimize code to their hardware
- char* inputBuffer;
- inputBuffer = new char[bufferSize];
- size_t numRead;
+ //int bufferSize = 64000; //512k - this should be a variable that the user can set to optimize code to their hardware
+ //char* inputBuffer;
+ //inputBuffer = new char[bufferSize];
+ //size_t numRead;
string tempDistFile = distfile + ".temp";
ofstream out;
- openOutputFile(tempDistFile, out);
+ m->openOutputFile(tempDistFile, out);
- FILE* in;
- in = fopen(distfile.c_str(), "rb");
+ //FILE* in;
+ //in = fopen(distfile.c_str(), "rb");
+ ifstream in;
+ m->openInputFile(distfile, in, "no error");
+
int first, second;
float dist;
//go through file pulling out distances related to rows merging
//if mergedMin contains distances add those back into file
- bool done = false;
- partialDist = "";
- while ((numRead = fread(inputBuffer, 1, bufferSize, in)) != 0) {
+ //bool done = false;
+ //partialDist = "";
+ //while ((numRead = fread(inputBuffer, 1, bufferSize, in)) != 0) {
//cout << "number of char read = " << numRead << endl;
//cout << inputBuffer << endl;
- if (numRead < bufferSize) { done = true; }
+ //if (numRead < bufferSize) { done = true; }
//parse input into individual distances
- int spot = 0;
- string outputString = "";
- while(spot < numRead) {
+ //int spot = 0;
+ //string outputString = "";
+ //while(spot < numRead) {
//cout << "spot = " << spot << endl;
- seqDist nextDist = getNextDist(inputBuffer, spot, bufferSize);
+ // seqDist nextDist = getNextDist(inputBuffer, spot, bufferSize);
//you read a partial distance
- if (nextDist.seq1 == -1) { break; }
-
- first = nextDist.seq1; second = nextDist.seq2; dist = nextDist.dist;
+ // if (nextDist.seq1 == -1) { break; }
+ while (!in.eof()) {
+ //first = nextDist.seq1; second = nextDist.seq2; dist = nextDist.dist;
//cout << "next distance = " << first << '\t' << second << '\t' << dist << endl;
//since file is sorted and mergedMin is sorted
//you can put the smallest distance from each through the code below and keep the file sorted
+ in >> first >> second >> dist; m->gobble(in);
+
+ if (m->control_pressed) { in.close(); out.close(); remove(tempDistFile.c_str()); return 0; }
+
//while there are still values in mergedMin that are smaller than the distance read from file
while (count < mergedMin.size()) {
}else if (mergedMin[count].seq2 == smallRow) {
smallRowColValues[0][mergedMin[count].seq1] = mergedMin[count].dist;
}else { //if no, write to temp file
- outputString += toString(mergedMin[count].seq1) + '\t' + toString(mergedMin[count].seq2) + '\t' + toString(mergedMin[count].dist) + '\n';
+ //outputString += toString(mergedMin[count].seq1) + '\t' + toString(mergedMin[count].seq2) + '\t' + toString(mergedMin[count].dist) + '\n';
+ out << mergedMin[count].seq1 << '\t' << mergedMin[count].seq2 << '\t' << mergedMin[count].dist << endl;
}
count++;
}else{ break; }
smallRowColValues[0][first] = dist;
}else { //if no, write to temp file
- outputString += toString(first) + '\t' + toString(second) + '\t' + toString(dist) + '\n';
+ //outputString += toString(first) + '\t' + toString(second) + '\t' + toString(dist) + '\n';
+ out << first << '\t' << second << '\t' << dist << endl;
}
}
- out << outputString;
- if(done) { break; }
- }
- fclose(in);
+ //out << outputString;
+ //if(done) { break; }
+ //}
+ //fclose(in);
+ in.close();
//if values in mergedMin are larger than the the largest in file then
while (count < mergedMin.size()) {
//rename tempfile to distfile
remove(distfile.c_str());
rename(tempDistFile.c_str(), distfile.c_str());
-
+//cout << "remove = "<< renameOK << " rename = " << ok << endl;
+
//merge clustered rows averaging the distances
map<int, float>::iterator itMerge;
map<int, float>::iterator it2Merge;
float average;
if (it2Merge != smallRowColValues[1].end()) { //if yes, then average
- //weighted average
- int total = clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq;
- average = ((clusterArray[smallRow].numSeq * itMerge->second) + (clusterArray[smallCol].numSeq * it2Merge->second)) / (float) total;
+ //average
+ if (method == "average") {
+ int total = clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq;
+ average = ((clusterArray[smallRow].numSeq * itMerge->second) + (clusterArray[smallCol].numSeq * it2Merge->second)) / (float) total;
+ }else { //weighted
+ average = ((itMerge->second * 1.0) + (it2Merge->second * 1.0)) / (float) 2.0;
+ }
+
smallRowColValues[1].erase(it2Merge);
seqDist temp(clusterArray[smallRow].parent, itMerge->first, average);
//sort merged values
sort(mergedMin.begin(), mergedMin.end(), compareSequenceDistance);
+
+ return 0;
}
catch(exception& e) {
- errorOut(e, "HCluster", "combineFile");
+ m->errorOut(e, "HCluster", "combineFile");
exit(1);
}
}
-/***********************************************************************/
+/***********************************************************************
seqDist HCluster::getNextDist(char* buffer, int& index, int size){
try {
seqDist next;
if ((buffer[index] == 10) || (buffer[index] == 13)) { //newline in unix or windows
gotDist = true;
- //gobble space
+ //m->gobble space
while (index < size) {
if (isspace(buffer[index])) { index++; }
else { break; }
return next;
}
catch(exception& e) {
- errorOut(e, "HCluster", "getNextDist");
+ m->errorOut(e, "HCluster", "getNextDist");
exit(1);
}
}
/***********************************************************************/
-void HCluster::processFile() {
+int HCluster::processFile() {
try {
string firstName, secondName;
float distance;
ifstream in;
- openInputFile(distfile, in);
+ m->openInputFile(distfile, in, "no error");
ofstream out;
string outTemp = distfile + ".temp";
- openOutputFile(outTemp, out);
+ m->openOutputFile(outTemp, out);
//get entry
while (!in.eof()) {
+ if (m->control_pressed) { in.close(); out.close(); remove(outTemp.c_str()); return 0; }
- in >> firstName >> secondName >> distance; gobble(in);
+ in >> firstName >> secondName >> distance; m->gobble(in);
map<string,int>::iterator itA = nameMap->find(firstName);
map<string,int>::iterator itB = nameMap->find(secondName);
remove(distfile.c_str());
rename(outTemp.c_str(), distfile.c_str());
- firstRead = false;
+ return 0;
}
catch(exception& e) {
- errorOut(e, "HCluster", "processFile");
+ m->errorOut(e, "HCluster", "processFile");
exit(1);
}
}