//**********************************************************************************************************************
vector<string> MGClusterCommand::setParameters(){
try {
- CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
- CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
- CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
- CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
- CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
- CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
- CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
- CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
- CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
- CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
- CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
- CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+ CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none","list",false,true,true); parameters.push_back(pblast);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName","rabund-sabund",false,false,true); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pcount);
+ CommandParameter plength("length", "Number", "", "5", "", "", "","",false,false); parameters.push_back(plength);
+ CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "","",false,false); parameters.push_back(ppenalty);
+ CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "","",false,false,true); parameters.push_back(pcutoff);
+ CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
+ CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "","",false,false); parameters.push_back(pmethod);
+ CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
+ CommandParameter pmin("min", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmin);
+ CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmerge);
+ CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(phcluster);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
}
}
//**********************************************************************************************************************
+string MGClusterCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
+
+ if (type == "list") { pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; }
+ else if (type == "rabund") { pattern = "[filename],[clustertag],rabund"; }
+ else if (type == "sabund") { pattern = "[filename],[clustertag],sabund"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MGClusterCommand", "getOutputPattern");
+ exit(1);
+ }
+}
+//*******************************************************************************************************************
MGClusterCommand::MGClusterCommand(){
try {
abort = true; calledHelp = true;
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
else {
vector<string> myArray = setParameters();
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["name"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
namefile = validParameter.validFile(parameters, "name", true);
if (namefile == "not open") { abort = true; }
else if (namefile == "not found") { namefile = ""; }
+ else { m->setNameFile(namefile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if (countfile != "" && namefile != "") { m->mothurOut("[ERROR]: Cannot have both a name file and count file. Please use one or the other."); m->mothurOutEndLine(); abort = true; }
if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
//check for optional parameter and set defaults
string temp;
- temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
+ temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
precisionLength = temp.length();
- convert(temp, precision);
+ m->mothurConvert(temp, precision);
temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.70"; }
- convert(temp, cutoff);
+ m->mothurConvert(temp, cutoff);
cutoff += (5 / (precision * 10.0));
method = validParameter.validFile(parameters, "method", false);
else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
temp = validParameter.validFile(parameters, "length", false); if (temp == "not found") { temp = "5"; }
- convert(temp, length);
+ m->mothurConvert(temp, length);
temp = validParameter.validFile(parameters, "penalty", false); if (temp == "not found") { temp = "0.10"; }
- convert(temp, penalty);
+ m->mothurConvert(temp, penalty);
temp = validParameter.validFile(parameters, "min", false); if (temp == "not found") { temp = "true"; }
minWanted = m->isTrue(temp);
hclusterWanted = m->isTrue(temp);
temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; }
- hard = m->isTrue(temp);
+ hard = m->isTrue(temp);
}
}
//**********************************************************************************************************************
int MGClusterCommand::execute(){
try {
-
if (abort == true) { if (calledHelp) { return 0; } return 2; }
//read names file
string tag = "";
time_t start;
float previousDist = 0.00000;
- float rndPreviousDist = 0.00000;
-
+ float rndPreviousDist = 0.00000;
+
//read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
//must remember to delete those objects here since readBlast does not
read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
read->read(nameMap);
-
- list = new ListVector(nameMap->getListVector());
- RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
+
+ list = new ListVector(nameMap->getListVector());
+ RAbundVector* rabund = NULL;
+
+ if(countfile != "") {
+ //map<string, int> nameMapCounts = m->readNames(namefile);
+ ct = new CountTable();
+ ct->readTable(countfile, false);
+ rabund = new RAbundVector();
+ createRabund(ct, list, rabund);
+ }else {
+ rabund = new RAbundVector(list->getRAbundVector());
+ }
+
+
+ //list = new ListVector(nameMap->getListVector());
+ //rabund = new RAbundVector(list->getRAbundVector());
if (m->control_pressed) { outputTypes.clear(); delete nameMap; delete read; delete list; delete rabund; return 0; }
else if (method == "nearest") { tag = "nn"; }
else { tag = "an"; }
- //open output files
- m->openOutputFile(fileroot+ tag + ".list", listFile);
- m->openOutputFile(fileroot+ tag + ".rabund", rabundFile);
- m->openOutputFile(fileroot+ tag + ".sabund", sabundFile);
+ map<string, string> variables;
+ variables["[filename]"] = fileroot;
+ variables["[clustertag]"] = tag;
+ string sabundFileName = getOutputFileName("sabund", variables);
+ string rabundFileName = getOutputFileName("rabund", variables);
+ if (countfile != "") { variables["[tag2]"] = "unique_list"; }
+ string listFileName = getOutputFileName("list", variables);
+
+ if (countfile == "") {
+ m->openOutputFile(sabundFileName, sabundFile);
+ m->openOutputFile(rabundFileName, rabundFile);
+ }
+ m->openOutputFile(listFileName, listFile);
if (m->control_pressed) {
delete nameMap; delete read; delete list; delete rabund;
- listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
outputTypes.clear();
return 0;
}
+ double saveCutoff = cutoff;
+
if (!hclusterWanted) {
//get distmatrix and overlap
- SparseMatrix* distMatrix = read->getDistMatrix();
+ SparseDistanceMatrix* distMatrix = read->getDistMatrix();
overlapMatrix = read->getOverlapMatrix(); //already sorted by read
delete read;
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
- listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
outputTypes.clear();
return 0;
}
-
+
+
//cluster using cluster classes
while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
+ if (m->debug) { cout << "numNodes=" << distMatrix->getNNodes() << " smallDist = " << distMatrix->getSmallDist() << endl; }
+
cluster->update(cutoff);
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
- listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
outputTypes.clear();
return 0;
}
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
- listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
outputTypes.clear();
return 0;
}
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
- listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
outputTypes.clear();
return 0;
}
if (m->control_pressed) {
delete nameMap; delete list; delete rabund;
- listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
outputTypes.clear();
return 0;
}
if (m->control_pressed) {
delete nameMap; delete list; delete rabund; delete hcluster;
- listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
outputTypes.clear();
return 0;
}
seqs = hcluster->getSeqs();
+ //to account for cutoff change in average neighbor
+ if (seqs.size() != 0) {
+ if (seqs[0].dist > cutoff) { break; }
+ }
+
if (m->control_pressed) {
delete nameMap; delete list; delete rabund; delete hcluster;
- listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
- remove(distFile.c_str());
- remove(overlapFile.c_str());
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
+ m->mothurRemove(distFile);
+ m->mothurRemove(overlapFile);
outputTypes.clear();
return 0;
}
if (seqs[i].seq1 != seqs[i].seq2) {
- hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
+ cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
if (m->control_pressed) {
delete nameMap; delete list; delete rabund; delete hcluster;
- listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
- remove(distFile.c_str());
- remove(overlapFile.c_str());
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
+ m->mothurRemove(distFile);
+ m->mothurRemove(overlapFile);
outputTypes.clear();
return 0;
}
if (m->control_pressed) {
delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
- listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
- remove(distFile.c_str());
- remove(overlapFile.c_str());
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
+ m->mothurRemove(distFile);
+ m->mothurRemove(overlapFile);
outputTypes.clear();
return 0;
}
if (m->control_pressed) {
delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
- listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
- remove(distFile.c_str());
- remove(overlapFile.c_str());
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
+ m->mothurRemove(distFile);
+ m->mothurRemove(overlapFile);
outputTypes.clear();
return 0;
}
}
delete hcluster;
- remove(distFile.c_str());
- remove(overlapFile.c_str());
+ m->mothurRemove(distFile);
+ m->mothurRemove(overlapFile);
}
- delete list;
+ delete list;
delete rabund;
listFile.close();
- sabundFile.close();
- rabundFile.close();
-
+ if (countfile == "") {
+ sabundFile.close();
+ rabundFile.close();
+ }
if (m->control_pressed) {
delete nameMap;
- listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
outputTypes.clear();
return 0;
}
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
- m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
- m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
- m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
+ m->mothurOut(listFileName); m->mothurOutEndLine(); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
+ if (countfile == "") {
+ m->mothurOut(rabundFileName); m->mothurOutEndLine(); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
+ m->mothurOut(sabundFileName); m->mothurOutEndLine(); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
+ }
m->mothurOutEndLine();
+ if (saveCutoff != cutoff) {
+ if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); }
+ else { saveCutoff = m->roundDist(saveCutoff, precision); }
+
+ m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();
+ }
+
//set list file as new current listfile
string current = "";
itTypes = outputTypes.find("list");
void MGClusterCommand::printData(ListVector* mergedList){
try {
mergedList->print(listFile);
- mergedList->getRAbundVector().print(rabundFile);
-
- SAbundVector sabund = mergedList->getSAbundVector();
+ SAbundVector sabund = mergedList->getSAbundVector();
+
+ if (countfile == "") {
+ mergedList->getRAbundVector().print(rabundFile);
+ sabund.print(sabundFile);
+ }
sabund.print(cout);
- sabund.print(sabundFile);
}
catch(exception& e) {
m->errorOut(e, "MGClusterCommand", "printData");
try {
//sort distFile
string sortedDistFile = m->sortFile(unsortedDist, outputDir);
- remove(unsortedDist.c_str()); //delete unsorted file
+ m->mothurRemove(unsortedDist); //delete unsorted file
distFile = sortedDistFile;
//sort overlap file
string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
- remove(unsortedOverlap.c_str()); //delete unsorted file
+ m->mothurRemove(unsortedOverlap); //delete unsorted file
overlapFile = sortedOverlapFile;
}
catch(exception& e) {
//**********************************************************************************************************************
+void MGClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){
+ try {
+ //vector<string> names = ct.getNamesOfSeqs();
+ //for ( int i; i < ct.getNumGroups(); i++ ) { rav.push_back( ct.getNumSeqs(names[i]) ); }
+ //return rav;
+
+ for(int i = 0; i < list->getNumBins(); i++) {
+ vector<string> binNames;
+ string bin = list->get(i);
+ m->splitAtComma(bin, binNames);
+ int total = 0;
+ for (int j = 0; j < binNames.size(); j++) {
+ total += ct->getNumSeqs(binNames[j]);
+ }
+ rabund->push_back(total);
+ }
+
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MGClusterCommand", "createRabund");
+ exit(1);
+ }
+
+}
+//**********************************************************************************************************************