clusterArray.push_back(temp);
}
- if (method != "average") {
- openInputFile(distfile, filehandle);
+ if ((method == "furthest") || (method == "nearest")) {
+ m->openInputFile(distfile, filehandle);
}else{
processFile();
}
//you don't want to cluster with yourself
if (smallRow != smallCol) {
- if (method != "average") {
+ if ((method == "furthest") || (method == "nearest")) {
//can we cluster???
if (method == "nearest") { cluster = true; }
else{ //assume furthest
try {
vector<seqDist> sameSeqs;
- if(method != "average") {
+ if ((method == "furthest") || (method == "nearest")) {
sameSeqs = getSeqsFNNN();
}else{
sameSeqs = getSeqsAN();
//get entry
while (!filehandle.eof()) {
- filehandle >> firstName >> secondName >> distance; gobble(filehandle);
+ filehandle >> firstName >> secondName >> distance; m->gobble(filehandle);
//save first one
if (prevDistance == -1) { prevDistance = distance; }
vector<seqDist> sameSeqs;
prevDistance = -1;
- openInputFile(distfile, filehandle, "no error");
+ m->openInputFile(distfile, filehandle, "no error");
//is the smallest value in mergedMin or the distfile?
float mergedMinDist = 10000;
if (mergedMin.size() > 0) { mergedMinDist = mergedMin[0].dist; }
if (!filehandle.eof()) {
- filehandle >> firstName >> secondName >> distance; gobble(filehandle);
+ filehandle >> firstName >> secondName >> distance; m->gobble(filehandle);
//save first one
if (prevDistance == -1) { prevDistance = distance; }
if (distance != -1) { //-1 means skip me
seqDist temp(firstName, secondName, distance);
sameSeqs.push_back(temp);
- }
+ }else{ distance = 10000; }
}
if (mergedMinDist < distance) { //get minimum distance from mergedMin
//get entry
while (!filehandle.eof()) {
- filehandle >> firstName >> secondName >> distance; gobble(filehandle);
+ filehandle >> firstName >> secondName >> distance; m->gobble(filehandle);
if (prevDistance == -1) { prevDistance = distance; }
}
/***********************************************************************/
-void HCluster::combineFile() {
+int HCluster::combineFile() {
try {
//int bufferSize = 64000; //512k - this should be a variable that the user can set to optimize code to their hardware
//char* inputBuffer;
string tempDistFile = distfile + ".temp";
ofstream out;
- openOutputFile(tempDistFile, out);
+ m->openOutputFile(tempDistFile, out);
//FILE* in;
//in = fopen(distfile.c_str(), "rb");
ifstream in;
- openInputFile(distfile, in);
+ m->openInputFile(distfile, in, "no error");
int first, second;
float dist;
//since file is sorted and mergedMin is sorted
//you can put the smallest distance from each through the code below and keep the file sorted
- in >> first >> second >> dist; gobble(in);
+ in >> first >> second >> dist; m->gobble(in);
+
+ if (m->control_pressed) { in.close(); out.close(); remove(tempDistFile.c_str()); return 0; }
//while there are still values in mergedMin that are smaller than the distance read from file
while (count < mergedMin.size()) {
mergedMin.clear();
//rename tempfile to distfile
- int renameOK = remove(distfile.c_str());
- int ok = rename(tempDistFile.c_str(), distfile.c_str());
+ remove(distfile.c_str());
+ rename(tempDistFile.c_str(), distfile.c_str());
//cout << "remove = "<< renameOK << " rename = " << ok << endl;
//merge clustered rows averaging the distances
float average;
if (it2Merge != smallRowColValues[1].end()) { //if yes, then average
- //weighted average
- int total = clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq;
- average = ((clusterArray[smallRow].numSeq * itMerge->second) + (clusterArray[smallCol].numSeq * it2Merge->second)) / (float) total;
+ //average
+ if (method == "average") {
+ int total = clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq;
+ average = ((clusterArray[smallRow].numSeq * itMerge->second) + (clusterArray[smallCol].numSeq * it2Merge->second)) / (float) total;
+ }else { //weighted
+ average = ((itMerge->second * 1.0) + (it2Merge->second * 1.0)) / (float) 2.0;
+ }
+
smallRowColValues[1].erase(it2Merge);
seqDist temp(clusterArray[smallRow].parent, itMerge->first, average);
//sort merged values
sort(mergedMin.begin(), mergedMin.end(), compareSequenceDistance);
+
+ return 0;
}
catch(exception& e) {
m->errorOut(e, "HCluster", "combineFile");
if ((buffer[index] == 10) || (buffer[index] == 13)) { //newline in unix or windows
gotDist = true;
- //gobble space
+ //m->gobble space
while (index < size) {
if (isspace(buffer[index])) { index++; }
else { break; }
}
}
/***********************************************************************/
-void HCluster::processFile() {
+int HCluster::processFile() {
try {
string firstName, secondName;
float distance;
ifstream in;
- openInputFile(distfile, in);
+ m->openInputFile(distfile, in, "no error");
ofstream out;
string outTemp = distfile + ".temp";
- openOutputFile(outTemp, out);
+ m->openOutputFile(outTemp, out);
//get entry
while (!in.eof()) {
+ if (m->control_pressed) { in.close(); out.close(); remove(outTemp.c_str()); return 0; }
- in >> firstName >> secondName >> distance; gobble(in);
+ in >> firstName >> secondName >> distance; m->gobble(in);
map<string,int>::iterator itA = nameMap->find(firstName);
map<string,int>::iterator itB = nameMap->find(secondName);
remove(distfile.c_str());
rename(outTemp.c_str(), distfile.c_str());
+
+ return 0;
}
catch(exception& e) {
m->errorOut(e, "HCluster", "processFile");