]> git.donarmstrong.com Git - mothur.git/blobdiff - clustercommand.cpp
changing command name classify.shared to classifyrf.shared
[mothur.git] / clustercommand.cpp
index 06e627a615dacf69fe80464db0123582bef510ed..94129654e1d2b11275e82d3a5da0382b1448efa9 100644 (file)
 //**********************************************************************************************************************
 vector<string> ClusterCommand::setParameters(){        
        try {
-               CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pphylip);
-               CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName",false,false); parameters.push_back(pname);
-               CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pcount);
-        CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "ColumnName",false,false); parameters.push_back(pcolumn);             
-               CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "",false,false); parameters.push_back(pcutoff);
-               CommandParameter pprecision("precision", "Number", "", "100", "", "", "",false,false); parameters.push_back(pprecision);
-               CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "",false,false); parameters.push_back(pmethod);
-               CommandParameter pshowabund("showabund", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pshowabund);
-               CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ptiming);
-               CommandParameter psim("sim", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psim);
-               CommandParameter phard("hard", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(phard);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none","list",false,false,true); parameters.push_back(pphylip);
+               CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "ColumnName","rabund-sabund",false,false,true); parameters.push_back(pname);
+               CommandParameter pcount("count", "InputTypes", "", "", "NameCount", "none", "none","",false,false,true); parameters.push_back(pcount);
+        CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "ColumnName","list",false,false,true); parameters.push_back(pcolumn);         
+               CommandParameter pcutoff("cutoff", "Number", "", "10", "", "", "","",false,false,true); parameters.push_back(pcutoff);
+               CommandParameter pprecision("precision", "Number", "", "100", "", "", "","",false,false); parameters.push_back(pprecision);
+               CommandParameter pmethod("method", "Multiple", "furthest-nearest-average-weighted", "average", "", "", "","",false,false,true); parameters.push_back(pmethod);
+               CommandParameter pshowabund("showabund", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pshowabund);
+               CommandParameter ptiming("timing", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptiming);
+               CommandParameter psim("sim", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psim);
+               CommandParameter phard("hard", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(phard);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+        //CommandParameter padjust("adjust", "String", "", "F", "", "", "","",false,false); parameters.push_back(padjust);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
                
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -45,7 +46,8 @@ string ClusterCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The cluster command parameter options are phylip, column, name, count, method, cuttoff, hard, precision, sim, showabund and timing. Phylip or column and name are required, unless you have a valid current file.\n";
-               helpString += "The cluster command should be in the following format: \n";
+               //helpString += "The adjust parameter is used to handle missing distances.  If you set a cutoff, adjust=f by default.  If not, adjust=t by default. Adjust=f, means ignore missing distances and adjust cutoff as needed with the average neighbor method.  Adjust=t, will treat missing distances as 1.0. You can also set the value the missing distances should be set to, adjust=0.5 would give missing distances a value of 0.5.\n";
+        helpString += "The cluster command should be in the following format: \n";
                helpString += "cluster(method=yourMethod, cutoff=yourCutoff, precision=yourPrecision) \n";
                helpString += "The acceptable cluster methods are furthest, nearest, average and weighted.  If no method is provided then average is assumed.\n";       
                return helpString;
@@ -56,26 +58,21 @@ string ClusterCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
-string ClusterCommand::getOutputFileNameTag(string type, string inputName=""){ 
-       try {
-        string outputFileName = "";
-               map<string, vector<string> >::iterator it;
+string ClusterCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
         
-        //is this a type this command creates
-        it = outputTypes.find(type);
-        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
-        else {
-            if (type == "list") {  outputFileName =  "list"; }
-            else if (type == "rabund") {  outputFileName =  "rabund"; }
-            else if (type == "sabund") {  outputFileName =  "sabund"; }
-            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
-        }
-        return outputFileName;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "ClusterCommand", "getOutputFileNameTag");
-               exit(1);
-       }
+        if (type == "list") {  pattern = "[filename],[clustertag],list-[filename],[clustertag],[tag2],list"; } 
+        else if (type == "rabund") {  pattern = "[filename],[clustertag],rabund"; } 
+        else if (type == "sabund") {  pattern = "[filename],[clustertag],sabund"; }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ClusterCommand", "getOutputPattern");
+        exit(1);
+    }
 }
 //**********************************************************************************************************************
 ClusterCommand::ClusterCommand(){      
@@ -234,10 +231,18 @@ ClusterCommand::ClusterCommand(string option)  {
                        temp = validParameter.validFile(parameters, "sim", false);                              if (temp == "not found") { temp = "F"; }
                        sim = m->isTrue(temp); 
                        
+            //bool cutoffSet = false;
                        temp = validParameter.validFile(parameters, "cutoff", false);
                        if (temp == "not found") { temp = "10"; }
+            //else { cutoffSet = true; }
                        m->mothurConvert(temp, cutoff); 
-                       cutoff += (5 / (precision * 10.0));  
+                       cutoff += (5 / (precision * 10.0));
+            
+            //temp = validParameter.validFile(parameters, "adjust", false);                            if (temp == "not found") { temp = "F"; }
+            //if (m->isNumeric1(temp))    { m->mothurConvert(temp, adjust);   }
+            //else if (m->isTrue(temp))   { adjust = 1.0;                     }
+            //else                        { adjust = -1.0;                    }
+            adjust=-1.0;
                        
                        method = validParameter.validFile(parameters, "method", false);
                        if (method == "not found") { method = "average"; }
@@ -310,9 +315,9 @@ int ClusterCommand::execute(){
             read->read(nameMap);
                }else if (countfile != "") {
             ct = new CountTable();
-            ct->readTable(countfile);
+            ct->readTable(countfile, false);
             read->read(ct);
-        }
+        }else { read->read(nameMap); }
                
                list = read->getListVector();
                matrix = read->getDMatrix();
@@ -330,20 +335,22 @@ int ClusterCommand::execute(){
                }
                
                //create cluster
-               if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method); }
-               else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method); }
-               else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method);     }
-               else if(method == "weighted"){  cluster = new WeightedLinkage(rabund, list, matrix, cutoff, method);    }
+               if (method == "furthest")       {       cluster = new CompleteLinkage(rabund, list, matrix, cutoff, method, adjust); }
+               else if(method == "nearest"){   cluster = new SingleLinkage(rabund, list, matrix, cutoff, method, adjust); }
+               else if(method == "average"){   cluster = new AverageLinkage(rabund, list, matrix, cutoff, method, adjust);     }
+               else if(method == "weighted"){  cluster = new WeightedLinkage(rabund, list, matrix, cutoff, method, adjust);    }
                tag = cluster->getTag();
                
                if (outputDir == "") { outputDir += m->hasPath(distfile); }
                fileroot = outputDir + m->getRootName(m->getSimpleName(distfile));
                
-        string sabundFileName = fileroot+ tag + "." + getOutputFileNameTag("sabund");
-        string rabundFileName = fileroot+ tag + "." + getOutputFileNameTag("rabund");
-        string listFileName = fileroot+ tag + ".";
-        if (countfile != "") { listFileName += "unique_"; }
-        listFileName += getOutputFileNameTag("list");
+        map<string, string> variables; 
+        variables["[filename]"] = fileroot;
+        variables["[clustertag]"] = tag;
+        string sabundFileName = getOutputFileName("sabund", variables);
+        string rabundFileName = getOutputFileName("rabund", variables);
+        if (countfile != "") { variables["[tag2]"] = "unique_list"; }
+        string listFileName = getOutputFileName("list", variables);
         
         if (countfile == "") {
             m->openOutputFile(sabundFileName,  sabundFile);