#include "mgclustercommand.h"
+
+//**********************************************************************************************************************
+vector<string> MGClusterCommand::getValidParameters(){
+ try {
+ string Array[] = {"blast", "method", "name", "hard", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
+ vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MGClusterCommand", "getValidParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+MGClusterCommand::MGClusterCommand(){
+ try {
+ abort = true;
+ //initialize outputTypes
+ vector<string> tempOutNames;
+ outputTypes["list"] = tempOutNames;
+ outputTypes["rabund"] = tempOutNames;
+ outputTypes["sabund"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<string> MGClusterCommand::getRequiredParameters(){
+ try {
+ string Array[] = {"blast"};
+ vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MGClusterCommand", "getRequiredParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<string> MGClusterCommand::getRequiredFiles(){
+ try {
+ vector<string> myArray;
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MGClusterCommand", "getRequiredFiles");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
MGClusterCommand::MGClusterCommand(string option) {
try {
else {
//valid paramters for this command
- string Array[] = {"blast", "method", "name", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
+ string Array[] = {"blast", "method", "name", "hard", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
OptionParser parser(option);
if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
}
+ //initialize outputTypes
+ vector<string> tempOutNames;
+ outputTypes["list"] = tempOutNames;
+ outputTypes["rabund"] = tempOutNames;
+ outputTypes["sabund"] = tempOutNames;
+
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
if (inputDir == "not found"){ inputDir = ""; }
it = parameters.find("blast");
//user has given a template file
if(it != parameters.end()){
- path = hasPath(it->second);
+ path = m->hasPath(it->second);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["blast"] = inputDir + it->second; }
}
it = parameters.find("name");
//user has given a template file
if(it != parameters.end()){
- path = hasPath(it->second);
+ path = m->hasPath(it->second);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["name"] = inputDir + it->second; }
}
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
outputDir = "";
- outputDir += hasPath(blastfile); //if user entered a file with a path then preserve it
+ outputDir += m->hasPath(blastfile); //if user entered a file with a path then preserve it
}
namefile = validParameter.validFile(parameters, "name", true);
convert(temp, penalty);
temp = validParameter.validFile(parameters, "min", false); if (temp == "not found") { temp = "true"; }
- minWanted = isTrue(temp);
+ minWanted = m->isTrue(temp);
temp = validParameter.validFile(parameters, "merge", false); if (temp == "not found") { temp = "true"; }
- merge = isTrue(temp);
+ merge = m->isTrue(temp);
temp = validParameter.validFile(parameters, "hcluster", false); if (temp == "not found") { temp = "false"; }
- hclusterWanted = isTrue(temp);
+ hclusterWanted = m->isTrue(temp);
+
+ temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "F"; }
+ hard = m->isTrue(temp);
}
}
nameMap->readMap();
}else{ nameMap= new NameAssignment(); }
- string fileroot = outputDir + getRootName(getSimpleName(blastfile));
+ string fileroot = outputDir + m->getRootName(m->getSimpleName(blastfile));
string tag = "";
time_t start;
float previousDist = 0.00000;
list = new ListVector(nameMap->getListVector());
RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
- if (m->control_pressed) { delete nameMap; delete read; delete list; delete rabund; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
start = time(NULL);
oldList = *list;
+ map<string, int> Seq2Bin;
+ map<string, int> oldSeq2Bin;
if (method == "furthest") { tag = "fn"; }
else if (method == "nearest") { tag = "nn"; }
else { tag = "an"; }
//open output files
- openOutputFile(fileroot+ tag + ".list", listFile);
- openOutputFile(fileroot+ tag + ".rabund", rabundFile);
- openOutputFile(fileroot+ tag + ".sabund", sabundFile);
+ m->openOutputFile(fileroot+ tag + ".list", listFile);
+ m->openOutputFile(fileroot+ tag + ".rabund", rabundFile);
+ m->openOutputFile(fileroot+ tag + ".sabund", sabundFile);
if (m->control_pressed) {
delete nameMap; delete read; delete list; delete rabund;
listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ outputTypes.clear();
return 0;
}
else if(method == "nearest"){ cluster = new SingleLinkage(rabund, list, distMatrix, cutoff, method); }
else if(method == "average"){ cluster = new AverageLinkage(rabund, list, distMatrix, cutoff, method); }
cluster->setMapWanted(true);
+ Seq2Bin = cluster->getSeqtoBin();
+ oldSeq2Bin = Seq2Bin;
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ outputTypes.clear();
return 0;
}
-
+
//cluster using cluster classes
while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ outputTypes.clear();
return 0;
}
float dist = distMatrix->getSmallDist();
- float rndDist = roundDist(dist, precision);
+ float rndDist;
+ if (hard) {
+ rndDist = m->ceilDist(dist, precision);
+ }else{
+ rndDist = m->roundDist(dist, precision);
+ }
if(previousDist <= 0.0000 && dist != previousDist){
oldList.setLabel("unique");
}
else if(rndDist != rndPreviousDist){
if (merge) {
- map<string, int> seq2Bin = cluster->getSeqtoBin();
- ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
+ ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ outputTypes.clear();
return 0;
}
printData(&oldList);
}
}
-
+
previousDist = dist;
rndPreviousDist = rndDist;
oldList = *list;
+ Seq2Bin = cluster->getSeqtoBin();
+ oldSeq2Bin = Seq2Bin;
}
if(previousDist <= 0.0000){
}
else if(rndPreviousDist<cutoff){
if (merge) {
- map<string, int> seq2Bin = cluster->getSeqtoBin();
- ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
+ ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ outputTypes.clear();
return 0;
}
if (m->control_pressed) {
delete nameMap; delete list; delete rabund;
listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ outputTypes.clear();
return 0;
}
//create cluster
hcluster = new HCluster(rabund, list, method, distFile, nameMap, cutoff);
hcluster->setMapWanted(true);
+ Seq2Bin = cluster->getSeqtoBin();
+ oldSeq2Bin = Seq2Bin;
vector<seqDist> seqs; seqs.resize(1); // to start loop
//ifstream inHcluster;
- //openInputFile(distFile, inHcluster);
+ //m->openInputFile(distFile, inHcluster);
if (m->control_pressed) {
delete nameMap; delete list; delete rabund; delete hcluster;
listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ outputTypes.clear();
return 0;
}
listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
remove(distFile.c_str());
remove(overlapFile.c_str());
+ outputTypes.clear();
return 0;
}
listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
remove(distFile.c_str());
remove(overlapFile.c_str());
+ outputTypes.clear();
return 0;
}
- float rndDist = roundDist(seqs[i].dist, precision);
+ float rndDist;
+ if (hard) {
+ rndDist = m->ceilDist(seqs[i].dist, precision);
+ }else{
+ rndDist = m->roundDist(seqs[i].dist, precision);
+ }
if((previousDist <= 0.0000) && (seqs[i].dist != previousDist)){
oldList.setLabel("unique");
}
else if((rndDist != rndPreviousDist)){
if (merge) {
- map<string, int> seq2Bin = hcluster->getSeqtoBin();
- ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
+ ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
if (m->control_pressed) {
delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
remove(distFile.c_str());
remove(overlapFile.c_str());
+ outputTypes.clear();
return 0;
}
previousDist = seqs[i].dist;
rndPreviousDist = rndDist;
oldList = *list;
+ Seq2Bin = cluster->getSeqtoBin();
+ oldSeq2Bin = Seq2Bin;
}
}
}
}
else if(rndPreviousDist<cutoff){
if (merge) {
- map<string, int> seq2Bin = hcluster->getSeqtoBin();
- ListVector* temp = mergeOPFs(seq2Bin, rndPreviousDist);
+ ListVector* temp = mergeOPFs(oldSeq2Bin, rndPreviousDist);
if (m->control_pressed) {
delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
remove(distFile.c_str());
remove(overlapFile.c_str());
+ outputTypes.clear();
return 0;
}
listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
globaldata->setListFile("");
globaldata->setFormat("");
+ outputTypes.clear();
return 0;
}
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
- m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine();
- m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine();
- m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine();
+ m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
+ m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
+ m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
m->mothurOutEndLine();
m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
try {
//create new listvector so you don't overwrite the clustering
ListVector* newList = new ListVector(oldList);
+
bool done = false;
ifstream inOverlap;
int count = 0;
if (hclusterWanted) {
- openInputFile(overlapFile, inOverlap);
+ m->openInputFile(overlapFile, inOverlap);
if (inOverlap.eof()) { done = true; }
}else { if (overlapMatrix.size() == 0) { done = true; } }
if (!inOverlap.eof()) {
string firstName, secondName;
float overlapDistance;
- inOverlap >> firstName >> secondName >> overlapDistance; gobble(inOverlap);
+ inOverlap >> firstName >> secondName >> overlapDistance; m->gobble(inOverlap);
- map<string,int>::iterator itA = nameMap->find(firstName);
- map<string,int>::iterator itB = nameMap->find(secondName);
- if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); }
- if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); }
+ //commented out because we check this in readblast already
+ //map<string,int>::iterator itA = nameMap->find(firstName);
+ //map<string,int>::iterator itB = nameMap->find(secondName);
+ //if(itA == nameMap->end()){ cerr << "AAError: Sequence '" << firstName << "' was not found in the names file, please correct\n"; exit(1); }
+ //if(itB == nameMap->end()){ cerr << "ABError: Sequence '" << secondName << "' was not found in the names file, please correct\n"; exit(1); }
- overlapNode.seq1 = itA->second;
- overlapNode.seq2 = itB->second;
+ //overlapNode.seq1 = itA->second;
+ //overlapNode.seq2 = itB->second;
+ overlapNode.seq1 = nameMap->get(firstName);
+ overlapNode.seq2 = nameMap->get(secondName);
overlapNode.dist = overlapDistance;
}else { inOverlap.close(); break; }
}
//get names of seqs that overlap
string name1 = nameMap->get(overlapNode.seq1);
string name2 = nameMap->get(overlapNode.seq2);
-
+
//use binInfo to find out if they are already in the same bin
+ //map<string, int>::iterator itBin1 = binInfo.find(name1);
+ //map<string, int>::iterator itBin2 = binInfo.find(name2);
+
+ //if(itBin1 == binInfo.end()){ cerr << "AAError: Sequence '" << name1 << "' does not have any bin info.\n"; exit(1); }
+ //if(itBin2 == binInfo.end()){ cerr << "ABError: Sequence '" << name2 << "' does not have any bin info.\n"; exit(1); }
+
+ //int binKeep = itBin1->second;
+ //int binRemove = itBin2->second;
+
int binKeep = binInfo[name1];
int binRemove = binInfo[name2];
-
+
//if not merge bins and update binInfo
if(binKeep != binRemove) {
+
//save names in old bin
- string names = list->get(binRemove);
-
+ string names = newList->get(binRemove);
+
//merge bins into name1s bin
newList->set(binKeep, newList->get(binRemove)+','+newList->get(binKeep));
newList->set(binRemove, "");
void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOverlap) {
try {
//sort distFile
- string sortedDistFile = sortFile(unsortedDist);
+ string sortedDistFile = m->sortFile(unsortedDist, outputDir);
remove(unsortedDist.c_str()); //delete unsorted file
distFile = sortedDistFile;
//sort overlap file
- string sortedOverlapFile = sortFile(unsortedOverlap);
+ string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
remove(unsortedOverlap.c_str()); //delete unsorted file
overlapFile = sortedOverlapFile;
}