try {
CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
try {
CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pcount);
CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
- CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "furthest", "", "", "",false,false); parameters.push_back(pmethod);
- CommandParameter phard("hard", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phard);
+ CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
+ CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
helpString += "The precision parameter's default value is 100. \n";
helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
helpString += "The precision parameter's default value is 100. \n";
helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
helpString += "The length parameter is used to specify the minimum overlap required. The default is 5.\n";
helpString += "The penalty parameter is used to adjust the error rate. The default is 0.10.\n";
helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
helpString += "The length parameter is used to specify the minimum overlap required. The default is 5.\n";
helpString += "The penalty parameter is used to adjust the error rate. The default is 0.10.\n";
helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering. This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
helpString += "The mgcluster command should be in the following format: \n";
helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering. This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
helpString += "The mgcluster command should be in the following format: \n";
helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
+string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "list") { outputFileName = "list"; }
+ else if (type == "rabund") { outputFileName = "rabund"; }
+ else if (type == "sabund") { outputFileName = "sabund"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
namefile = validParameter.validFile(parameters, "name", true);
if (namefile == "not open") { abort = true; }
else if (namefile == "not found") { namefile = ""; }
namefile = validParameter.validFile(parameters, "name", true);
if (namefile == "not open") { abort = true; }
else if (namefile == "not found") { namefile = ""; }
+ else { m->setNameFile(namefile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if (countfile != "" && namefile != "") { m->mothurOut("[ERROR]: Cannot have both a name file and count file. Please use one or the other."); m->mothurOutEndLine(); abort = true; }
if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
//check for optional parameter and set defaults
string temp;
if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
//check for optional parameter and set defaults
string temp;
- temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
+ temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; }
temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.70"; }
temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0.70"; }
cutoff += (5 / (precision * 10.0));
method = validParameter.validFile(parameters, "method", false);
cutoff += (5 / (precision * 10.0));
method = validParameter.validFile(parameters, "method", false);
if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
temp = validParameter.validFile(parameters, "length", false); if (temp == "not found") { temp = "5"; }
if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
else { m->mothurOut("Not a valid clustering method. Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
temp = validParameter.validFile(parameters, "length", false); if (temp == "not found") { temp = "5"; }
temp = validParameter.validFile(parameters, "penalty", false); if (temp == "not found") { temp = "0.10"; }
temp = validParameter.validFile(parameters, "penalty", false); if (temp == "not found") { temp = "0.10"; }
temp = validParameter.validFile(parameters, "min", false); if (temp == "not found") { temp = "true"; }
minWanted = m->isTrue(temp);
temp = validParameter.validFile(parameters, "min", false); if (temp == "not found") { temp = "true"; }
minWanted = m->isTrue(temp);
temp = validParameter.validFile(parameters, "hcluster", false); if (temp == "not found") { temp = "false"; }
hclusterWanted = m->isTrue(temp);
temp = validParameter.validFile(parameters, "hcluster", false); if (temp == "not found") { temp = "false"; }
hclusterWanted = m->isTrue(temp);
- temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "F"; }
- hard = m->isTrue(temp);
+ temp = validParameter.validFile(parameters, "hard", false); if (temp == "not found") { temp = "T"; }
+ hard = m->isTrue(temp);
//**********************************************************************************************************************
int MGClusterCommand::execute(){
try {
//**********************************************************************************************************************
int MGClusterCommand::execute(){
try {
//read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
//must remember to delete those objects here since readBlast does not
read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
read->read(nameMap);
//read blastfile - creates sparsematrices for the distances and overlaps as well as a listvector
//must remember to delete those objects here since readBlast does not
read = new ReadBlast(blastfile, cutoff, penalty, length, minWanted, hclusterWanted);
read->read(nameMap);
-
- list = new ListVector(nameMap->getListVector());
- RAbundVector* rabund = new RAbundVector(list->getRAbundVector());
+
+ list = new ListVector(nameMap->getListVector());
+ RAbundVector* rabund = NULL;
+
+ if(countfile != "") {
+ //map<string, int> nameMapCounts = m->readNames(namefile);
+ ct = new CountTable();
+ ct->readTable(countfile);
+ rabund = new RAbundVector();
+ createRabund(ct, list, rabund);
+ }else {
+ rabund = new RAbundVector(list->getRAbundVector());
+ }
+
+
+ //list = new ListVector(nameMap->getListVector());
+ //rabund = new RAbundVector(list->getRAbundVector());
- //open output files
- m->openOutputFile(fileroot+ tag + ".list", listFile);
- m->openOutputFile(fileroot+ tag + ".rabund", rabundFile);
- m->openOutputFile(fileroot+ tag + ".sabund", sabundFile);
+ string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
+ string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
+ string listFileName = fileroot+ tag + ".";
+ if (countfile != "") { listFileName += "unique_"; }
+ listFileName += getOutputFileNameTag("list");
+
+ if (countfile == "") {
+ m->openOutputFile(sabundFileName, sabundFile);
+ m->openOutputFile(rabundFileName, rabundFile);
+ }
+ m->openOutputFile(listFileName, listFile);
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
if (m->control_pressed) {
delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
- listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
- remove(distFile.c_str());
- remove(overlapFile.c_str());
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
+ m->mothurRemove(distFile);
+ m->mothurRemove(overlapFile);
- listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
- remove(distFile.c_str());
- remove(overlapFile.c_str());
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
+ m->mothurRemove(distFile);
+ m->mothurRemove(overlapFile);
- listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
- remove(distFile.c_str());
- remove(overlapFile.c_str());
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
+ m->mothurRemove(distFile);
+ m->mothurRemove(overlapFile);
- listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
- remove(distFile.c_str());
- remove(overlapFile.c_str());
+ listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
+ m->mothurRemove(distFile);
+ m->mothurRemove(overlapFile);
- m->mothurOut(fileroot+ tag + ".list"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".list"); outputTypes["list"].push_back(fileroot+ tag + ".list");
- m->mothurOut(fileroot+ tag + ".rabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".rabund"); outputTypes["rabund"].push_back(fileroot+ tag + ".rabund");
- m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
+ m->mothurOut(listFileName); m->mothurOutEndLine(); outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
+ if (countfile == "") {
+ m->mothurOut(rabundFileName); m->mothurOutEndLine(); outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
+ m->mothurOut(sabundFileName); m->mothurOutEndLine(); outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
+ }
+ if (saveCutoff != cutoff) {
+ if (hard) { saveCutoff = m->ceilDist(saveCutoff, precision); }
+ else { saveCutoff = m->roundDist(saveCutoff, precision); }
+
+ m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine();
+ }
+
- mergedList->getRAbundVector().print(rabundFile);
-
- SAbundVector sabund = mergedList->getSAbundVector();
+ SAbundVector sabund = mergedList->getSAbundVector();
+
+ if (countfile == "") {
+ mergedList->getRAbundVector().print(rabundFile);
+ sabund.print(sabundFile);
+ }
distFile = sortedDistFile;
//sort overlap file
string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
distFile = sortedDistFile;
//sort overlap file
string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
+ //for ( int i; i < ct.getNumGroups(); i++ ) { rav.push_back( ct.getNumSeqs(names[i]) ); }
+ //return rav;
+
+ for(int i = 0; i < list->getNumBins(); i++) {
+ vector<string> binNames;
+ string bin = list->get(i);
+ m->splitAtComma(bin, binNames);
+ int total = 0;
+ for (int j = 0; j < binNames.size(); j++) {
+ total += ct->getNumSeqs(binNames[j]);
+ }
+ rabund->push_back(total);
+ }
+
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MGClusterCommand", "createRabund");
+ exit(1);
+ }
+
+}