vector<string> listFileNames;
set<string> labels;
string singletonName = "";
+ double saveCutoff = cutoff;
//****************** file prep work ******************************//
#ifdef USE_MPI
for(int i = 1; i < processors; i++) {
int num = dividedNames[i].size();
+ double tempCutoff;
+ MPI_Recv(&tempCutoff, 1, MPI_DOUBLE, i, tag, MPI_COMM_WORLD, &status);
+ if (tempCutoff < cutoff) { cutoff = tempCutoff; }
+
//send list filenames to root process
for (int j = 0; j < num; j++) {
int lengthList = 0;
//process them
listFileNames = cluster(myNames, labels);
+ //send cutoff
+ MPI_Send(&cutoff, 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD);
+
//send list filenames to root process
for (int j = 0; j < num; j++) {
char tempListFileName[1024];
ifstream in2;
openInputFile(filename, in2);
+ float tempCutoff;
+ in2 >> tempCutoff; gobble(in2);
+ if (tempCutoff < cutoff) { cutoff = tempCutoff; }
+
while(!in2.eof()) {
string tempName;
in2 >> tempName; gobble(in2);
#endif
if (m->control_pressed) { for (int i = 0; i < listFileNames.size(); i++) { remove(listFileNames[i].c_str()); } return 0; }
+ if (saveCutoff != cutoff) { m->mothurOut("Cutoff was " + toString(saveCutoff) + " changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); }
+
m->mothurOut("It took " + toString(time(NULL) - estart) + " seconds to cluster"); m->mothurOutEndLine();
//****************** merge list file and create rabund and sabund files ******************************//
if ((*it != "unique") && (convertTestFloat(*it, temp) == true)) { convert(*it, temp); }
else if (*it == "unique") { temp = -1.0; }
-
- orderFloat.push_back(temp);
- labelBin[temp] = numSingleBins; //initialize numbins
+
+ if (temp <= cutoff) {
+ orderFloat.push_back(temp);
+ labelBin[temp] = numSingleBins; //initialize numbins
+ }
}
//sort order
ofstream outLabels;
filename = toString(getpid()) + ".temp.labels";
openOutputFile(filename, outLabels);
-
+
+ outLabels << cutoff << endl;
for (set<string>::iterator it = labels.begin(); it != labels.end(); it++) {
outLabels << (*it) << endl;
}
vector<string> listFileNames;
+ double smallestCutoff = cutoff;
+
//cluster each distance file
for (int i = 0; i < distNames.size(); i++) {
+ if (m->control_pressed) { return listFileNames; }
string thisNamefile = distNames[i].begin()->second;
string thisDistFile = distNames[i].begin()->first;
listFileNames.clear(); return listFileNames;
}
- cluster->update(cutoff);
+ cluster->update(saveCutoff);
float dist = matrix->getSmallDist();
float rndDist;
remove(thisDistFile.c_str());
remove(thisNamefile.c_str());
+
+ if (saveCutoff != cutoff) { m->mothurOut("Cutoff was " + toString(cutoff) + " changed cutoff to " + toString(saveCutoff)); m->mothurOutEndLine(); }
+
+ if (saveCutoff < smallestCutoff) { smallestCutoff = saveCutoff; }
}
+ cutoff = smallestCutoff;
return listFileNames;
start = time(NULL);
oldList = *list;
+ map<string, int> Seq2Bin;
+ map<string, int> oldSeq2Bin;
if (method == "furthest") { tag = "fn"; }
else if (method == "nearest") { tag = "nn"; }
else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
else if(method == "average"){ cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
cluster->setMapWanted(true);
+ Seq2Bin = cluster->getSeqtoBin();
+ oldSeq2Bin = Seq2Bin;
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
}
else if(rndDist != rndPreviousDist){
if (merge) {
- map<string, int> seq2Bin = cluster->getSeqtoBin();
- ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
+ Seq2Bin = cluster->getSeqtoBin();
+ ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
}
previousDist = dist;
+ cout << "prev distance = " << previousDist << " dist = " << dist << endl;
rndPreviousDist = rndDist;
oldList = *list;
+ oldSeq2Bin = Seq2Bin;
}
if(previousDist <= 0.0000){
}
else if(rndPreviousDist<cutoff){
if (merge) {
- map<string, int> seq2Bin = cluster->getSeqtoBin();
- ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
+ Seq2Bin = cluster->getSeqtoBin();
+ ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
//create cluster
hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
hcluster->setMapWanted(true);
+ Seq2Bin = cluster->getSeqtoBin();
+ oldSeq2Bin = Seq2Bin;
vector<seqDist> seqs; seqs.resize(1); // to start loop
//ifstream inHcluster;
}
else if((rndDist != rndPreviousDist)){
if (merge) {
- map<string, int> seq2Bin = hcluster->getSeqtoBin();
- ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
+ Seq2Bin = hcluster->getSeqtoBin();
+ ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
if (m->control_pressed) {
delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
previousDist = seqs[i].dist;
rndPreviousDist = rndDist;
oldList = *list;
+ oldSeq2Bin = Seq2Bin;
}
}
}
}
else if(rndPreviousDist<cutoff){
if (merge) {
- map<string, int> seq2Bin = hcluster->getSeqtoBin();
- ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
+ Seq2Bin = hcluster->getSeqtoBin();
+ ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
if (m->control_pressed) {
delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
}
delete hcluster;
- remove(distFile.c_str());
- remove(overlapFile.c_str());
+ //remove(distFile.c_str());
+ //remove(overlapFile.c_str());
}
delete list;
//get names of seqs that overlap
string name1 = nameMap->get(overlapNode.seq1);
string name2 = nameMap->get(overlapNode.seq2);
-
+
//use binInfo to find out if they are already in the same bin
- int binKeep = binInfo[name1];
- int binRemove = binInfo[name2];
+ map<string, int>::iterator itBin1 = binInfo.find(name1);
+ map<string, int>::iterator itBin2 = binInfo.find(name2);
+
+ if(itBin1 == binInfo.end()){ cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1); }
+ if(itBin2 == binInfo.end()){ cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1); }
+
+ int binKeep = itBin1->second;
+ int binRemove = itBin2->second;
//if not merge bins and update binInfo
if(binKeep != binRemove) {
+
//save names in old bin
- string names = list->get(binRemove);
+ string names = newList->get(binRemove);
//merge bins into name1s bin
newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
//**********************************************************************************************************************
int NameAssignment::get(string key){
-
- return (*this)[key];
-
+ try {
+ map<string, int>::iterator itGet = (*this).find(key);
+
+ //if you can't find it
+ if (itGet == (*this).end()) { return -1; }
+
+ return (*this)[key];
+ }
+ catch(exception& e) {
+ m->errorOut(e, "NameAssignment", "get");
+ exit(1);
+ }
}
//**********************************************************************************************************************
string NameAssignment::get(int key){
+ try {
- return reverse[key];
-
+ map<int, string>::iterator itGet = reverse.find(key);
+
+ if (itGet == reverse.end()) { return "not found"; }
+
+ return reverse[key];
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "NameAssignment", "get");
+ exit(1);
+ }
}
//**********************************************************************************************************************
map<int, float>::iterator itDist;
for(it=thisRowsBlastScores.begin(); it!=thisRowsBlastScores.end(); it++) {
distance = 1.0 - (it->second / refScore);
+
//do we already have the distance calculated for b->a
map<string,int>::iterator itA = nameMap->find(currentRow);
//if we have it then compare
if (itDist != dists[it->first].end()) {
+ if (distance < 0.0) { cout << currentRow << '\t' << nameMap->get(it->first) << '\t' << "score = " << it->second << " refscore = " << refScore << " distance = " << distance << " distance = " << itDist->second << endl; }
+
//if you want the minimum blast score ratio, then pick max distance
if(minWanted) { distance = max(itDist->second, distance); }
else{ distance = min(itDist->second, distance); }
-
+
//is this distance below cutoff
if (distance < cutoff) {
if (!hclusterWanted) {