else {
//valid paramters for this command
- string Array[] = {"blast", "method", "name", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
+ string Array[] = {"blast", "method", "name", "hard", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
OptionParser parser(option);
temp = validParameter.validFile(parameters, "hcluster", false); if (temp == "not found") { temp = "false"; }
hclusterWanted = isTrue(temp);
+
+ temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "F"; }
+ hard = isTrue(temp);
}
}
list = new ListVector(nameMap->getListVector());
RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
+ if (m->control_pressed) { delete nameMap; delete read; delete list; delete rabund; return 0; }
+
start = time(NULL);
oldList = *list;
+ map<string, int> Seq2Bin;
+ map<string, int> oldSeq2Bin;
if (method == "furthest") { tag = "fn"; }
else if (method == "nearest") { tag = "nn"; }
openOutputFile(fileroot+ tag + ".rabund", rabundFile);
openOutputFile(fileroot+ tag + ".sabund", sabundFile);
+ if (m->control_pressed) {
+ delete nameMap; delete read; delete list; delete rabund;
+ listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ return 0;
+ }
+
if (!hclusterWanted) {
//get distmatrix and overlap
SparseMatrix* distMatrix = read->getDistMatrix();
else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
else if(method == "average"){ cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
cluster->setMapWanted(true);
+ Seq2Bin = cluster->getSeqtoBin();
+ oldSeq2Bin = Seq2Bin;
+ if (m->control_pressed) {
+ delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
+ listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ return 0;
+ }
+
//cluster using cluster classes
while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
cluster->update(cutoff);
+
+ if (m->control_pressed) {
+ delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
+ listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ return 0;
+ }
+
float dist = distMatrix->getSmallDist();
- float rndDist = roundDist(dist, precision);
+ float rndDist;
+ if (hard) {
+ rndDist = ceilDist(dist, precision);
+ }else{
+ rndDist = roundDist(dist, precision);
+ }
if(previousDist <= 0.0000 && dist != previousDist){
oldList.setLabel("unique");
}
else if(rndDist != rndPreviousDist){
if (merge) {
- map<string, int> seq2Bin = cluster->getSeqtoBin();
- ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
+ ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
+
+ if (m->control_pressed) {
+ delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
+ listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ return 0;
+ }
+
temp->setLabel(toString(rndPreviousDist, precisionLength-1));
printData(temp);
delete temp;
printData(&oldList);
}
}
-
+
previousDist = dist;
rndPreviousDist = rndDist;
oldList = *list;
+ Seq2Bin = cluster->getSeqtoBin();
+ oldSeq2Bin = Seq2Bin;
}
if(previousDist <= 0.0000){
}
else if(rndPreviousDist<cutoff){
if (merge) {
- map<string, int> seq2Bin = cluster->getSeqtoBin();
- ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
+ ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
+
+ if (m->control_pressed) {
+ delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
+ listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ return 0;
+ }
+
temp->setLabel(toString(rndPreviousDist, precisionLength-1));
printData(temp);
delete temp;
//sort the distance and overlap files
sortHclusterFiles(distFile, overlapFile);
+
+ if (m->control_pressed) {
+ delete nameMap; delete list; delete rabund;
+ listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ return 0;
+ }
//create cluster
hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
hcluster->setMapWanted(true);
+ Seq2Bin = cluster->getSeqtoBin();
+ oldSeq2Bin = Seq2Bin;
vector<seqDist> seqs; seqs.resize(1); // to start loop
//ifstream inHcluster;
//openInputFile(distFile, inHcluster);
+
+ if (m->control_pressed) {
+ delete nameMap; delete list; delete rabund; delete hcluster;
+ listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ return 0;
+ }
while (seqs.size() != 0){
seqs = hcluster->getSeqs();
+ if (m->control_pressed) {
+ delete nameMap; delete list; delete rabund; delete hcluster;
+ listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ remove(distFile.c_str());
+ remove(overlapFile.c_str());
+ return 0;
+ }
+
for (int i = 0; i < seqs.size(); i++) { //-1 means skip me
if (seqs[i].seq1 != seqs[i].seq2) {
hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
+
+ if (m->control_pressed) {
+ delete nameMap; delete list; delete rabund; delete hcluster;
+ listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ remove(distFile.c_str());
+ remove(overlapFile.c_str());
+ return 0;
+ }
- float rndDist = roundDist(seqs[i].dist, precision);
+ float rndDist;
+ if (hard) {
+ rndDist = ceilDist(seqs[i].dist, precision);
+ }else{
+ rndDist = roundDist(seqs[i].dist, precision);
+ }
if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
oldList.setLabel("unique");
}
else if((rndDist != rndPreviousDist)){
if (merge) {
- map<string, int> seq2Bin = hcluster->getSeqtoBin();
- ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
+ ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
+
+ if (m->control_pressed) {
+ delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
+ listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ remove(distFile.c_str());
+ remove(overlapFile.c_str());
+ return 0;
+ }
+
temp->setLabel(toString(rndPreviousDist, precisionLength-1));
printData(temp);
delete temp;
previousDist = seqs[i].dist;
rndPreviousDist = rndDist;
oldList = *list;
+ Seq2Bin = cluster->getSeqtoBin();
+ oldSeq2Bin = Seq2Bin;
}
}
}
}
else if(rndPreviousDist<cutoff){
if (merge) {
- map<string, int> seq2Bin = hcluster->getSeqtoBin();
- ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
+ ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
+
+ if (m->control_pressed) {
+ delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
+ listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ remove(distFile.c_str());
+ remove(overlapFile.c_str());
+ return 0;
+ }
+
temp->setLabel(toString(rndPreviousDist, precisionLength-1));
printData(temp);
delete temp;
globaldata->setListFile(fileroot+ tag + ".list");
globaldata->setFormat("list");
+ if (m->control_pressed) {
+ delete nameMap;
+ listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ globaldata->setListFile("");
+ globaldata->setFormat("");
+ return 0;
+ }
+
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine();
try {
//create new listvector so you don't overwrite the clustering
ListVector* newList = new ListVector(oldList);
+
bool done = false;
ifstream inOverlap;
int count = 0;
}else { if (overlapMatrix.size() == 0) { done = true; } }
while (!done) {
+ if (m->control_pressed) {
+ if (hclusterWanted) { inOverlap.close(); }
+ return newList;
+ }
//get next overlap
seqDist overlapNode;
float overlapDistance;
inOverlap >> firstName >> secondName >> overlapDistance; gobble(inOverlap);
- map<string,int>::iterator itA = nameMap->find(firstName);
- map<string,int>::iterator itB = nameMap->find(secondName);
- if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); }
- if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); }
+ //commented out because we check this in readblast already
+ //map<string,int>::iterator itA = nameMap->find(firstName);
+ //map<string,int>::iterator itB = nameMap->find(secondName);
+ //if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); }
+ //if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); }
- overlapNode.seq1 = itA->second;
- overlapNode.seq2 = itB->second;
+ //overlapNode.seq1 = itA->second;
+ //overlapNode.seq2 = itB->second;
+ overlapNode.seq1 = nameMap->get(firstName);
+ overlapNode.seq2 = nameMap->get(secondName);
overlapNode.dist = overlapDistance;
}else { inOverlap.close(); break; }
}
//get names of seqs that overlap
string name1 = nameMap->get(overlapNode.seq1);
string name2 = nameMap->get(overlapNode.seq2);
-
+
//use binInfo to find out if they are already in the same bin
+ //map<string, int>::iterator itBin1 = binInfo.find(name1);
+ //map<string, int>::iterator itBin2 = binInfo.find(name2);
+
+ //if(itBin1 == binInfo.end()){ cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1); }
+ //if(itBin2 == binInfo.end()){ cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1); }
+
+ //int binKeep = itBin1->second;
+ //int binRemove = itBin2->second;
+
int binKeep = binInfo[name1];
int binRemove = binInfo[name2];
-
+
//if not merge bins and update binInfo
if(binKeep != binRemove) {
+
//save names in old bin
- string names = list->get(binRemove);
-
+ string names = newList->get(binRemove);
+
//merge bins into name1s bin
newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
newList->set(binRemove, "");
void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
try {
//sort distFile
- string sortedDistFile = sortFile(unsortedDist);
+ string sortedDistFile = sortFile(unsortedDist, outputDir);
remove(unsortedDist.c_str()); //delete unsorted file
distFile = sortedDistFile;
//sort overlap file
- string sortedOverlapFile = sortFile(unsortedOverlap);
+ string sortedOverlapFile = sortFile(unsortedOverlap, outputDir);
remove(unsortedOverlap.c_str()); //delete unsorted file
overlapFile = sortedOverlapFile;
}