]> git.donarmstrong.com Git - mothur.git/blobdiff - mgclustercommand.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / mgclustercommand.cpp
index cffb80f9e8f2963d4983c6637d8eef2b33566de4..24c4f27777d21ed88b6f3b41411394502802404e 100644 (file)
 //**********************************************************************************************************************
 vector<string> MGClusterCommand::setParameters(){      
        try {
-               CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pblast);
-               CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
-        CommandParameter pcount("count", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pcount);
-        //CommandParameter plarge("large", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(plarge);
-               CommandParameter plength("length", "Number", "", "5", "", "", "",false,false); parameters.push_back(plength);
-               CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "",false,false); parameters.push_back(ppenalty);
-               CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pcutoff);
-               CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
-               CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "",false,false); parameters.push_back(pmethod);
-               CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
-               CommandParameter pmin("min", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmin);
-               CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pmerge);
-               CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(phcluster);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter pblast("blast", "InputTypes", "", "", "none", "none", "none","list",false,true,true); parameters.push_back(pblast);
+               CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName","rabund-sabund",false,false,true); parameters.push_back(pname);
+               CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pcount);
+               CommandParameter plength("length", "Number", "", "5", "", "", "","",false,false); parameters.push_back(plength);
+               CommandParameter ppenalty("penalty", "Number", "", "0.10", "", "", "","",false,false); parameters.push_back(ppenalty);
+               CommandParameter pcutoff("cutoff", "Number", "", "0.70", "", "", "","",false,false,true); parameters.push_back(pcutoff);
+               CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
+               CommandParameter pmethod("method", "Multiple", "furthest-nearest-average", "average", "", "", "","",false,false); parameters.push_back(pmethod);
+               CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
+               CommandParameter pmin("min", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmin);
+               CommandParameter pmerge("merge", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmerge);
+               CommandParameter phcluster("hcluster", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(phcluster);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -63,28 +62,23 @@ string MGClusterCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
-string MGClusterCommand::getOutputFileNameTag(string type, string inputName=""){       
-       try {
-        string outputFileName = "";
-               map<string, vector<string> >::iterator it;
+string MGClusterCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
         
-        //is this a type this command creates
-        it = outputTypes.find(type);
-        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
-        else {
-            if (type == "list") {  outputFileName =  "list"; }
-            else if (type == "rabund") {  outputFileName =  "rabund"; }
-            else if (type == "sabund") {  outputFileName =  "sabund"; }
-            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
-        }
-        return outputFileName;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "MGClusterCommand", "getOutputFileNameTag");
-               exit(1);
-       }
+        if (type == "list") {  pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; } 
+        else if (type == "rabund") {  pattern = "[filename],[clustertag],rabund"; } 
+        else if (type == "sabund") {  pattern = "[filename],[clustertag],sabund"; }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "MGClusterCommand", "getOutputPattern");
+        exit(1);
+    }
 }
-//**********************************************************************************************************************
+//*******************************************************************************************************************
 MGClusterCommand::MGClusterCommand(){  
        try {
                abort = true; calledHelp = true; 
@@ -148,6 +142,14 @@ MGClusterCommand::MGClusterCommand(string option) {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["name"] = inputDir + it->second;             }
                                }
+                
+                it = parameters.find("count");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["count"] = inputDir + it->second;            }
+                               }
                        }
 
                        
@@ -172,16 +174,13 @@ MGClusterCommand::MGClusterCommand(string option) {
                        else if (countfile == "not found") { countfile = ""; }
             else { m->setCountTableFile(countfile); }
             
-            if (countfile != "" && namefile != "") { m->mothurOut("Cannot have both a name file and count file. Please use one or the other."); m->mothurOutEndLine(); abort = true; }
+            if (countfile != "" && namefile != "") { m->mothurOut("[ERROR]: Cannot have both a name file and count file. Please use one or the other."); m->mothurOutEndLine(); abort = true; }
                        
                        if ((blastfile == "")) { m->mothurOut("When executing a mgcluster command you must provide a blastfile."); m->mothurOutEndLine(); abort = true; }
                        
                        //check for optional parameter and set defaults
                        string temp;
-            temp = validParameter.validFile(parameters, "large", false);                       if (temp == "not found") { temp = "false"; }            
-                       large = m->isTrue(temp); 
-            
-                       temp = validParameter.validFile(parameters, "precision", false);                if (temp == "not found") { temp = "100"; }
+            temp = validParameter.validFile(parameters, "precision", false);           if (temp == "not found") { temp = "100"; }
                        precisionLength = temp.length();
                        m->mothurConvert(temp, precision); 
                        
@@ -249,8 +248,8 @@ int MGClusterCommand::execute(){
             //map<string, int> nameMapCounts = m->readNames(namefile);
             ct = new CountTable();
             ct->readTable(countfile);
-            createRabund(ct, list);
-            rabund = &rav;
+            rabund = new RAbundVector();
+            createRabund(ct, list, rabund);
         }else {
             rabund = new RAbundVector(list->getRAbundVector());
         }
@@ -270,17 +269,23 @@ int MGClusterCommand::execute(){
                else if (method == "nearest")   { tag = "nn";  }
                else                                                    { tag = "an";  }
                
-        string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
-        string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
-        string listFileName = fileroot+ tag + "." + getOutputFileNameTag("list");
+        map<string, string> variables; 
+        variables["[filename]"] = fileroot;
+        variables["[clustertag]"] = tag;
+        string sabundFileName = getOutputFileName("sabund", variables);
+        string rabundFileName = getOutputFileName("rabund", variables);
+        if (countfile != "") { variables["[tag2]"] = "unique_list"; }
+        string listFileName = getOutputFileName("list", variables);
         
-               m->openOutputFile(sabundFileName,       sabundFile);
-               m->openOutputFile(rabundFileName,       rabundFile);
+        if (countfile == "") {
+            m->openOutputFile(sabundFileName,  sabundFile);
+            m->openOutputFile(rabundFileName,  rabundFile);
+        }
                m->openOutputFile(listFileName, listFile);
                
                if (m->control_pressed) { 
                        delete nameMap; delete read; delete list; delete rabund; 
-                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                       listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
                        outputTypes.clear();
                        return 0; 
                }
@@ -289,7 +294,7 @@ int MGClusterCommand::execute(){
                
                if (!hclusterWanted) {
                        //get distmatrix and overlap
-                       SparseMatrix* distMatrix = read->getDistMatrix();
+                       SparseDistanceMatrix* distMatrix = read->getDistMatrix();
                        overlapMatrix = read->getOverlapMatrix(); //already sorted by read 
                        delete read;
                
@@ -303,19 +308,22 @@ int MGClusterCommand::execute(){
                        
                        if (m->control_pressed) { 
                                delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
-                               listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                               listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
                                outputTypes.clear();
                                return 0; 
                        }
-               
+            
+            
                        //cluster using cluster classes
                        while (distMatrix->getSmallDist() < cutoff && distMatrix->getNNodes() > 0){
                                
+                if (m->debug) {  cout << "numNodes=" << distMatrix->getNNodes() << " smallDist = " << distMatrix->getSmallDist() << endl; }
+                
                                cluster->update(cutoff);
                                
                                if (m->control_pressed) { 
                                        delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster;
-                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                                       listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
                                        outputTypes.clear();
                                        return 0; 
                                }
@@ -338,7 +346,7 @@ int MGClusterCommand::execute(){
                                                
                                                if (m->control_pressed) { 
                                                        delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
-                                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                                                       listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
                                                        outputTypes.clear();
                                                        return 0; 
                                                }
@@ -369,7 +377,7 @@ int MGClusterCommand::execute(){
                                        
                                        if (m->control_pressed) { 
                                                        delete nameMap; delete distMatrix; delete list; delete rabund; delete cluster; delete temp;
-                                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                                                       listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
                                                        outputTypes.clear();
                                                        return 0; 
                                        }
@@ -399,7 +407,7 @@ int MGClusterCommand::execute(){
                        
                        if (m->control_pressed) { 
                                delete nameMap;  delete list; delete rabund; 
-                               listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                               listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
                                outputTypes.clear();
                                return 0; 
                        }
@@ -416,7 +424,7 @@ int MGClusterCommand::execute(){
                        
                        if (m->control_pressed) { 
                                delete nameMap;  delete list; delete rabund; delete hcluster;
-                               listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                               listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
                                outputTypes.clear();
                                return 0; 
                        }
@@ -432,7 +440,7 @@ int MGClusterCommand::execute(){
                                
                                if (m->control_pressed) { 
                                        delete nameMap;  delete list; delete rabund; delete hcluster;
-                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                                       listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
                                        m->mothurRemove(distFile);
                                        m->mothurRemove(overlapFile);
                                        outputTypes.clear();
@@ -447,7 +455,7 @@ int MGClusterCommand::execute(){
                                                
                                                if (m->control_pressed) { 
                                                        delete nameMap;  delete list; delete rabund; delete hcluster;
-                                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                                                       listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
                                                        m->mothurRemove(distFile);
                                                        m->mothurRemove(overlapFile);
                                                        outputTypes.clear();
@@ -471,7 +479,7 @@ int MGClusterCommand::execute(){
                                                                
                                                                if (m->control_pressed) { 
                                                                        delete nameMap;  delete list; delete rabund; delete hcluster; delete temp;
-                                                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                                                                       listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
                                                                        m->mothurRemove(distFile);
                                                                        m->mothurRemove(overlapFile);
                                                                        outputTypes.clear();
@@ -507,7 +515,7 @@ int MGClusterCommand::execute(){
                                        
                                        if (m->control_pressed) { 
                                                        delete nameMap; delete list; delete rabund; delete hcluster; delete temp;
-                                                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                                                       listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
                                                        m->mothurRemove(distFile);
                                                        m->mothurRemove(overlapFile);
                                                        outputTypes.clear();
@@ -529,14 +537,15 @@ int MGClusterCommand::execute(){
                }
                
                delete list;
-               if (!large) {delete rabund;}
+               delete rabund;
                listFile.close();
-               sabundFile.close();
-               rabundFile.close();
-       
+        if (countfile == "") {
+            sabundFile.close();
+            rabundFile.close();
+        }
                if (m->control_pressed) { 
                        delete nameMap; 
-                       listFile.close(); rabundFile.close(); sabundFile.close(); m->mothurRemove((fileroot+ tag + ".list")); m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund"));
+                       listFile.close(); if (countfile == "") { rabundFile.close(); sabundFile.close();  m->mothurRemove((fileroot+ tag + ".rabund")); m->mothurRemove((fileroot+ tag + ".sabund")); } m->mothurRemove((fileroot+ tag + ".list"));
                        outputTypes.clear();
                        return 0; 
                }
@@ -544,8 +553,10 @@ int MGClusterCommand::execute(){
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                m->mothurOut(listFileName); m->mothurOutEndLine();      outputNames.push_back(listFileName); outputTypes["list"].push_back(listFileName);
-               m->mothurOut(rabundFileName); m->mothurOutEndLine();    outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
-               m->mothurOut(sabundFileName); m->mothurOutEndLine();    outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
+               if (countfile == "") {
+            m->mothurOut(rabundFileName); m->mothurOutEndLine();       outputNames.push_back(rabundFileName); outputTypes["rabund"].push_back(rabundFileName);
+            m->mothurOut(sabundFileName); m->mothurOutEndLine();       outputNames.push_back(sabundFileName); outputTypes["sabund"].push_back(sabundFileName);
+        }
                m->mothurOutEndLine();
                
                if (saveCutoff != cutoff) { 
@@ -588,12 +599,14 @@ int MGClusterCommand::execute(){
 void MGClusterCommand::printData(ListVector* mergedList){
        try {
                mergedList->print(listFile);
-               mergedList->getRAbundVector().print(rabundFile);
-               
-               SAbundVector sabund = mergedList->getSAbundVector();
+        SAbundVector sabund = mergedList->getSAbundVector();
+        
+        if (countfile == "") {
+            mergedList->getRAbundVector().print(rabundFile);
+            sabund.print(sabundFile);
+        }
 
                sabund.print(cout);
-               sabund.print(sabundFile);
        }
        catch(exception& e) {
                m->errorOut(e, "MGClusterCommand", "printData");
@@ -724,7 +737,7 @@ void MGClusterCommand::sortHclusterFiles(string unsortedDist, string unsortedOve
 
 //**********************************************************************************************************************
 
-void MGClusterCommand::createRabund(CountTable* ct, ListVector* list){
+void MGClusterCommand::createRabund(CountTable*& ct, ListVector*& list, RAbundVector*& rabund){
     try {
         //vector<string> names = ct.getNamesOfSeqs();
 
@@ -739,7 +752,7 @@ void MGClusterCommand::createRabund(CountTable* ct, ListVector* list){
            for (int j = 0; j < binNames.size(); j++) { 
                total += ct->getNumSeqs(binNames[j]);
            }
-           rav.push_back(total);   
+           rabund->push_back(total);   
        }