]> git.donarmstrong.com Git - mothur.git/blobdiff - mgclustercommand.cpp
rewrote metastats command in c++, added mothurRemove function to handle ~ error....
[mothur.git] / mgclustercommand.cpp
index 3ed9bf2131a45ac7a4b0ac44d959b16bfe28cd36..f0886c8c133c12a84c627304cafbe8173ef45519 100644 (file)
@@ -9,70 +9,83 @@
 
 #include "mgclustercommand.h"
 
-
 //**********************************************************************************************************************
-vector<string> MGClusterCommand::getValidParameters(){ 
+vector<string> MGClusterCommand::setParameters(){      
        try {
-               string Array[] =  {"blast", "method", "name", "hard", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
+               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+               CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
+               CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
+               CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
+               CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
+               CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
+               CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
+               CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
+               CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
+               CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                return myArray;
        }
        catch(exception& e) {
-               m->errorOut(e, "MGClusterCommand", "getValidParameters");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-MGClusterCommand::MGClusterCommand(){  
-       try {
-               abort = true;
-               //initialize outputTypes
-               vector<string> tempOutNames;
-               outputTypes["list"] = tempOutNames;
-               outputTypes["rabund"] = tempOutNames;
-               outputTypes["sabund"] = tempOutNames;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
+               m->errorOut(e, "MGClusterCommand", "setParameters");
                exit(1);
        }
 }
 //**********************************************************************************************************************
-vector<string> MGClusterCommand::getRequiredParameters(){      
+string MGClusterCommand::getHelpString(){      
        try {
-               string Array[] =  {"blast"};
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
-               return myArray;
+               string helpString = "";
+               helpString += "The mgcluster command parameter options are blast, name, cutoff, precision, hard,  method, merge, min, length, penalty and hcluster. The blast parameter is required.\n";
+               helpString += "The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n";
+               helpString += "This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n";
+               helpString += "The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n";
+               helpString += "The precision parameter's default value is 100. \n";
+               helpString += "The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then average is assumed.\n";       
+               helpString += "The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n";
+               helpString += "The length parameter is used to specify the minimum overlap required.  The default is 5.\n";
+               helpString += "The penalty parameter is used to adjust the error rate.  The default is 0.10.\n";
+               helpString += "The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n";
+               helpString += "The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n";
+               helpString += "The mgcluster command should be in the following format: \n";
+               helpString += "mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n";
+               helpString += "Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n";
+               return helpString;
        }
        catch(exception& e) {
-               m->errorOut(e, "MGClusterCommand", "getRequiredParameters");
+               m->errorOut(e, "MGClusterCommand", "getHelpString");
                exit(1);
        }
 }
 //**********************************************************************************************************************
-vector<string> MGClusterCommand::getRequiredFiles(){   
+MGClusterCommand::MGClusterCommand(){  
        try {
-               vector<string> myArray;
-               return myArray;
+               abort = true; calledHelp = true; 
+               setParameters();
+               vector<string> tempOutNames;
+               outputTypes["list"] = tempOutNames;
+               outputTypes["rabund"] = tempOutNames;
+               outputTypes["sabund"] = tempOutNames;
        }
        catch(exception& e) {
-               m->errorOut(e, "MGClusterCommand", "getRequiredFiles");
+               m->errorOut(e, "MGClusterCommand", "MGClusterCommand");
                exit(1);
        }
 }
 //**********************************************************************************************************************
 MGClusterCommand::MGClusterCommand(string option) {
        try {
-               globaldata = GlobalData::getInstance();
-               abort = false;
+               abort = false; calledHelp = false;   
                
                //allow user to run help
-               if(option == "help") { help(); abort = true; }
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
                
                else {
-                       //valid paramters for this command
-                       string Array[] =  {"blast", "method", "name", "hard", "cutoff", "precision", "length", "min", "penalty", "hcluster","merge","outputdir","inputdir"};
-                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       vector<string> myArray = setParameters();
                        
                        OptionParser parser(option);
                        map<string, string> parameters = parser.getParameters();
@@ -116,7 +129,7 @@ MGClusterCommand::MGClusterCommand(string option) {
                        
                        //check for required parameters
                        blastfile = validParameter.validFile(parameters, "blast", true);
-                       if (blastfile == "not open") { abort = true; }  
+                       if (blastfile == "not open") { blastfile = ""; abort = true; }  
                        else if (blastfile == "not found") { blastfile = ""; }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
@@ -128,6 +141,7 @@ MGClusterCommand::MGClusterCommand(string option) {
                        namefile = validParameter.validFile(parameters, "name", true);
                        if (namefile == "not open") { abort = true; }   
                        else if (namefile == "not found") { namefile = ""; }
+                       else { m->setNameFile(namefile); }
                        
                        if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
                        
@@ -142,7 +156,7 @@ MGClusterCommand::MGClusterCommand(string option) {
                        cutoff += (5 / (precision * 10.0));
                        
                        method = validParameter.validFile(parameters, "method", false);
-                       if (method == "not found") { method = "furthest"; }
+                       if (method == "not found") { method = "average"; }
                        
                        if ((method == "furthest") || (method == "nearest") || (method == "average")) { }
                        else { m->mothurOut("Not a valid clustering method.  Valid clustering algorithms are furthest, nearest or average."); m->mothurOutEndLine(); abort = true; }
@@ -162,7 +176,7 @@ MGClusterCommand::MGClusterCommand(string option) {
                        temp = validParameter.validFile(parameters, "hcluster", false);                 if (temp == "not found") { temp = "false"; }
                        hclusterWanted = m->isTrue(temp); 
                        
-                       temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "F"; }
+                       temp = validParameter.validFile(parameters, "hard", false);                     if (temp == "not found") { temp = "T"; }
                        hard = m->isTrue(temp);
                }
 
@@ -173,36 +187,10 @@ MGClusterCommand::MGClusterCommand(string option) {
        }
 }
 //**********************************************************************************************************************
-
-void MGClusterCommand::help(){
-       try {
-               m->mothurOut("The mgcluster command parameter options are blast, name, cutoff, precision, method, merge, min, length, penalty and hcluster. The blast parameter is required.\n");
-               m->mothurOut("The mgcluster command reads a blast and name file and clusters the sequences into OPF units similiar to the OTUs.\n");
-               m->mothurOut("This command outputs a .list, .rabund and .sabund file that can be used with mothur other commands to estimate richness.\n");
-               m->mothurOut("The cutoff parameter is used to specify the maximum distance you would like to cluster to. The default is 0.70.\n");
-               m->mothurOut("The precision parameter's default value is 100. \n");
-               m->mothurOut("The acceptable mgcluster methods are furthest, nearest and average.  If no method is provided then furthest is assumed.\n\n");    
-               m->mothurOut("The min parameter allows you to specify is you want the minimum or maximum blast score ratio used in calculating the distance. The default is true, meaning you want the minimum.\n");
-               m->mothurOut("The length parameter is used to specify the minimum overlap required.  The default is 5.\n");
-               m->mothurOut("The penalty parameter is used to adjust the error rate.  The default is 0.10.\n");
-               m->mothurOut("The merge parameter allows you to shut off merging based on overlaps and just cluster.  By default merge is true, meaning you want to merge.\n");
-               m->mothurOut("The hcluster parameter allows you to use the hcluster algorithm when clustering.  This may be neccessary if your file is too large to fit into RAM. The default is false.\n");
-               m->mothurOut("The mgcluster command should be in the following format: \n");
-               m->mothurOut("mgcluster(blast=yourBlastfile, name=yourNameFile, cutoff=yourCutOff).\n");
-               m->mothurOut("Note: No spaces between parameter labels (i.e. balst), '=' and parameters (i.e.yourBlastfile).\n\n");
-       }
-       catch(exception& e) {
-               m->errorOut(e, "MGClusterCommand", "help");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
-MGClusterCommand::~MGClusterCommand(){}
-//**********************************************************************************************************************
 int MGClusterCommand::execute(){
        try {
                
-               if (abort == true) {    return 0;       }
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
                //read names file
                if (namefile != "") {
@@ -242,11 +230,13 @@ int MGClusterCommand::execute(){
                
                if (m->control_pressed) { 
                        delete nameMap; delete read; delete list; delete rabund; 
-                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                        outputTypes.clear();
                        return 0; 
                }
                
+               double saveCutoff = cutoff;
+               
                if (!hclusterWanted) {
                        //get distmatrix and overlap
                        SparseMatrix* distMatrix = read->getDistMatrix();
@@ -263,7 +253,7 @@ int MGClusterCommand::execute(){
                        
                        if (m->control_pressed) { 
                                delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
-                               listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                               listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                                outputTypes.clear();
                                return 0; 
                        }
@@ -275,7 +265,7 @@ int MGClusterCommand::execute(){
                                
                                if (m->control_pressed) { 
                                        delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
-                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                                        outputTypes.clear();
                                        return 0; 
                                }
@@ -298,7 +288,7 @@ int MGClusterCommand::execute(){
                                                
                                                if (m->control_pressed) { 
                                                        delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
-                                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                                                        outputTypes.clear();
                                                        return 0; 
                                                }
@@ -329,7 +319,7 @@ int MGClusterCommand::execute(){
                                        
                                        if (m->control_pressed) { 
                                                        delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
-                                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                                                        outputTypes.clear();
                                                        return 0; 
                                        }
@@ -359,7 +349,7 @@ int MGClusterCommand::execute(){
                        
                        if (m->control_pressed) { 
                                delete nameMap;  delete list; delete rabund; 
-                               listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                               listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                                outputTypes.clear();
                                return 0; 
                        }
@@ -376,7 +366,7 @@ int MGClusterCommand::execute(){
                        
                        if (m->control_pressed) { 
                                delete nameMap;  delete list; delete rabund; delete hcluster;
-                               listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
+                               listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                                outputTypes.clear();
                                return 0; 
                        }
@@ -385,11 +375,16 @@ int MGClusterCommand::execute(){
                
                                seqs = hcluster->getSeqs();
                                
+                               //to account for cutoff change in average neighbor
+                               if (seqs.size() != 0) {
+                                       if (seqs[0].dist > cutoff) { break; }
+                               }
+                               
                                if (m->control_pressed) { 
                                        delete nameMap;  delete list; delete rabund; delete hcluster;
-                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
-                                       remove(distFile.c_str());
-                                       remove(overlapFile.c_str());
+                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                                       m->mothurRemove(distFile);
+                                       m->mothurRemove(overlapFile);
                                        outputTypes.clear();
                                        return 0; 
                                }
@@ -398,13 +393,13 @@ int MGClusterCommand::execute(){
                                        
                                        if (seqs[i].seq1 != seqs[i].seq2) {
                
-                                               hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
+                                               cutoff = hcluster->update(seqs[i].seq1, seqs[i].seq2, seqs[i].dist);
                                                
                                                if (m->control_pressed) { 
                                                        delete nameMap;  delete list; delete rabund; delete hcluster;
-                                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
-                                                       remove(distFile.c_str());
-                                                       remove(overlapFile.c_str());
+                                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                                                       m->mothurRemove(distFile);
+                                                       m->mothurRemove(overlapFile);
                                                        outputTypes.clear();
                                                        return 0; 
                                                }
@@ -426,9 +421,9 @@ int MGClusterCommand::execute(){
                                                                
                                                                if (m->control_pressed) { 
                                                                        delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
-                                                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
-                                                                       remove(distFile.c_str());
-                                                                       remove(overlapFile.c_str());
+                                                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                                                                       m->mothurRemove(distFile);
+                                                                       m->mothurRemove(overlapFile);
                                                                        outputTypes.clear();
                                                                        return 0; 
                                                                }
@@ -462,9 +457,9 @@ int MGClusterCommand::execute(){
                                        
                                        if (m->control_pressed) { 
                                                        delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
-                                                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
-                                                       remove(distFile.c_str());
-                                                       remove(overlapFile.c_str());
+                                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                                                       m->mothurRemove(distFile);
+                                                       m->mothurRemove(overlapFile);
                                                        outputTypes.clear();
                                                        return 0; 
                                        }
@@ -479,8 +474,8 @@ int MGClusterCommand::execute(){
                        }
                        
                        delete hcluster;
-                       remove(distFile.c_str());
-                       remove(overlapFile.c_str());
+                       m->mothurRemove(distFile);
+                       m->mothurRemove(overlapFile);
                }
                
                delete list; 
@@ -489,14 +484,9 @@ int MGClusterCommand::execute(){
                sabundFile.close();
                rabundFile.close();
        
-               globaldata->setListFile(fileroot+ tag + ".list");
-               globaldata->setFormat("list");
-               
                if (m->control_pressed) { 
                        delete nameMap; 
-                       listFile.close(); rabundFile.close(); sabundFile.close(); remove((fileroot+ tag + ".list").c_str()); remove((fileroot+ tag + ".rabund").c_str()); remove((fileroot+ tag + ".sabund").c_str());
-                       globaldata->setListFile("");
-                       globaldata->setFormat("");
+                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
                        outputTypes.clear();
                        return 0; 
                }
@@ -508,6 +498,33 @@ int MGClusterCommand::execute(){
                m->mothurOut(fileroot+ tag + ".sabund"); m->mothurOutEndLine(); outputNames.push_back(fileroot+ tag + ".sabund"); outputTypes["sabund"].push_back(fileroot+ tag + ".sabund");
                m->mothurOutEndLine();
                
+               if (saveCutoff != cutoff) { 
+                       if (hard)       {  saveCutoff = m->ceilDist(saveCutoff, precision);     }
+                       else            {       saveCutoff = m->roundDist(saveCutoff, precision);  }
+                       
+                       m->mothurOut("changed cutoff to " + toString(cutoff)); m->mothurOutEndLine(); 
+               }
+               
+               //set list file as new current listfile
+               string current = "";
+               itTypes = outputTypes.find("list");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
+               }
+               
+               //set rabund file as new current rabundfile
+               itTypes = outputTypes.find("rabund");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setRabundFile(current); }
+               }
+               
+               //set sabund file as new current sabundfile
+               itTypes = outputTypes.find("sabund");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSabundFile(current); }
+               }
+               
+               
                m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to cluster."); m->mothurOutEndLine();
                        
                return 0;
@@ -641,12 +658,12 @@ void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOve
        try {
                //sort distFile
                string sortedDistFile = m->sortFile(unsortedDist, outputDir);
-               remove(unsortedDist.c_str());  //delete unsorted file
+               m->mothurRemove(unsortedDist);  //delete unsorted file
                distFile = sortedDistFile;
                
                //sort overlap file
                string sortedOverlapFile = m->sortFile(unsortedOverlap, outputDir);
-               remove(unsortedOverlap.c_str());  //delete unsorted file
+               m->mothurRemove(unsortedOverlap);  //delete unsorted file
                overlapFile = sortedOverlapFile;
        }
        catch(exception& e) {