clusterArray.push_back(temp);
}
- if (method != "average") {
+ if ((method == "furthest") || (method == "nearest")) {
m->openInputFile(distfile, filehandle);
}else{
processFile();
}
}
/***********************************************************************/
-bool HCluster::update(int row, int col, float distance){
+double HCluster::update(int row, int col, float distance){
try {
bool cluster = false;
smallRow = row;
//you don't want to cluster with yourself
if (smallRow != smallCol) {
- if (method != "average") {
+ if ((method == "furthest") || (method == "nearest")) {
//can we cluster???
if (method == "nearest") { cluster = true; }
else{ //assume furthest
}
}
- return cluster;
+ return cutoff;
//printInfo();
}
catch(exception& e) {
try {
vector<seqDist> sameSeqs;
- if(method != "average") {
+ if ((method == "furthest") || (method == "nearest")) {
sameSeqs = getSeqsFNNN();
}else{
sameSeqs = getSeqsAN();
map<string,int>::iterator itA = nameMap->find(firstName);
map<string,int>::iterator itB = nameMap->find(secondName);
- if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); }
- if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); }
+ if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); }
+ if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); }
//using cutoff
if (distance > cutoff) { break; }
}
}
//**********************************************************************************************************************
-//don't need cutoff since processFile removes all distance above cutoff and changes names to indexes
vector<seqDist> HCluster::getSeqsAN(){
try {
int firstName, secondName;
in >> first >> second >> dist; m->gobble(in);
- if (m->control_pressed) { in.close(); out.close(); remove(tempDistFile.c_str()); return 0; }
+ if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(tempDistFile); return 0; }
//while there are still values in mergedMin that are smaller than the distance read from file
while (count < mergedMin.size()) {
smallRowColValues[0][mergedMin[count].seq1] = mergedMin[count].dist;
}else { //if no, write to temp file
//outputString += toString(mergedMin[count].seq1) + '\t' + toString(mergedMin[count].seq2) + '\t' + toString(mergedMin[count].dist) + '\n';
- out << mergedMin[count].seq1 << '\t' << mergedMin[count].seq2 << '\t' << mergedMin[count].dist << endl;
+ //if (mergedMin[count].dist < cutoff) {
+ out << mergedMin[count].seq1 << '\t' << mergedMin[count].seq2 << '\t' << mergedMin[count].dist << endl;
+ //}
}
count++;
}else{ break; }
}else { //if no, write to temp file
//outputString += toString(first) + '\t' + toString(second) + '\t' + toString(dist) + '\n';
- out << first << '\t' << second << '\t' << dist << endl;
+ //if (dist < cutoff) {
+ out << first << '\t' << second << '\t' << dist << endl;
+ //}
}
}
smallRowColValues[0][mergedMin[count].seq1] = mergedMin[count].dist;
}else { //if no, write to temp file
- out << mergedMin[count].seq1 << '\t' << mergedMin[count].seq2 << '\t' << mergedMin[count].dist << endl;
+ //if (mergedMin[count].dist < cutoff) {
+ out << mergedMin[count].seq1 << '\t' << mergedMin[count].seq2 << '\t' << mergedMin[count].dist << endl;
+ //}
}
count++;
}
mergedMin.clear();
//rename tempfile to distfile
- remove(distfile.c_str());
+ m->mothurRemove(distfile);
rename(tempDistFile.c_str(), distfile.c_str());
//cout << "remove = "<< renameOK << " rename = " << ok << endl;
float average;
if (it2Merge != smallRowColValues[1].end()) { //if yes, then average
- //weighted average
- int total = clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq;
- average = ((clusterArray[smallRow].numSeq * itMerge->second) + (clusterArray[smallCol].numSeq * it2Merge->second)) / (float) total;
+ //average
+ if (method == "average") {
+ int total = clusterArray[smallRow].numSeq + clusterArray[smallCol].numSeq;
+ average = ((clusterArray[smallRow].numSeq * itMerge->second) + (clusterArray[smallCol].numSeq * it2Merge->second)) / (float) total;
+ }else { //weighted
+ average = ((itMerge->second * 1.0) + (it2Merge->second * 1.0)) / (float) 2.0;
+ }
+
smallRowColValues[1].erase(it2Merge);
seqDist temp(clusterArray[smallRow].parent, itMerge->first, average);
mergedMin.push_back(temp);
+ }else {
+ //can't find value so update cutoff
+ if (cutoff > itMerge->second) { cutoff = itMerge->second; }
}
}
-
+
+ //update cutoff
+ for(itMerge = smallRowColValues[1].begin(); itMerge != smallRowColValues[1].end(); itMerge++) {
+ if (cutoff > itMerge->second) { cutoff = itMerge->second; }
+ }
+
//sort merged values
sort(mergedMin.begin(), mergedMin.end(), compareSequenceDistance);
//get entry
while (!in.eof()) {
- if (m->control_pressed) { in.close(); out.close(); remove(outTemp.c_str()); return 0; }
+ if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outTemp); return 0; }
in >> firstName >> secondName >> distance; m->gobble(in);
map<string,int>::iterator itA = nameMap->find(firstName);
map<string,int>::iterator itB = nameMap->find(secondName);
- if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); }
- if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); }
+ if(itA == nameMap->end()){ m->mothurOut("AAError: Sequence '" + firstName + "' was not found in the names file, please correct\n"); exit(1); }
+ if(itB == nameMap->end()){ m->mothurOut("ABError: Sequence '" + secondName + "' was not found in the names file, please correct\n"); exit(1); }
//using cutoff
if (distance > cutoff) { break; }
in.close();
out.close();
- remove(distfile.c_str());
+ m->mothurRemove(distfile);
rename(outTemp.c_str(), distfile.c_str());
return 0;