]> git.donarmstrong.com Git - mothur.git/blobdiff - getsharedotucommand.cpp
fixed bug in pre.cluster with output file name change and other bugs while testing...
[mothur.git] / getsharedotucommand.cpp
index 99687701436dc5ffac7e71a444d97b42f4b6815a..5ae8da588ba4a6d36b6658527d8cd12e208c50c4 100644 (file)
@@ -8,19 +8,20 @@
  */
 
 #include "getsharedotucommand.h"
+#include "sharedutilities.h"
 
 //**********************************************************************************************************************
 vector<string> GetSharedOTUCommand::setParameters(){   
        try {
-               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
-               CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,true); parameters.push_back(pgroup);
-               CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,true); parameters.push_back(plist);
-               CommandParameter poutput("output", "Multiple", "accnos-default", "default", "", "", "",false,false); parameters.push_back(poutput);
-               CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
-               CommandParameter punique("unique", "String", "", "", "", "", "",false,false); parameters.push_back(punique);
-               CommandParameter pshared("shared", "String", "", "", "", "", "",false,false); parameters.push_back(pshared);
-               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
-               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none","fasta",false,false); parameters.push_back(pfasta);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none","sharedseq",false,true,true); parameters.push_back(pgroup);
+               CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","sharedseq",false,true,true); parameters.push_back(plist);
+               CommandParameter poutput("output", "Multiple", "accnos-default", "default", "", "", "","",false,false); parameters.push_back(poutput);
+               CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+               CommandParameter punique("unique", "String", "", "", "", "", "","",false,false,true); parameters.push_back(punique);
+               CommandParameter pshared("shared", "String", "", "", "", "", "","",false,false,true); parameters.push_back(pshared);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
 
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -45,8 +46,8 @@ string GetSharedOTUCommand::getHelpString(){
                helpString += "The output parameter allows you to output the list of names without the group and bin number added. \n";
                helpString += "With this option you can use the names file as an input in get.seqs and remove.seqs commands. To do this enter output=accnos. \n";
                helpString += "The get.sharedseqs command outputs a .names file for each distance level containing a list of sequences in the OTUs shared by the groups specified.\n";
-               helpString += "The get.sharedseqs command should be in the following format: get.sharedseqs(label=yourLabels, groups=yourGroups, fasta=yourFastafile, output=yourOutput).\n";
-               helpString += "Example get.sharedseqs(list=amazon.fn.list, label=unique-0.01, group=forest-pasture, fasta=amazon.fasta, output=accnos).\n";
+               helpString += "The get.sharedseqs command should be in the following format: get.sharedseqs(list=yourListFile, group=yourGroupFile, label=yourLabels, unique=yourGroups, fasta=yourFastafile, output=yourOutput).\n";
+               helpString += "Example get.sharedseqs(list=amazon.fn.list, label=unique-0.01, group= amazon.groups, unique=forest-pasture, fasta=amazon.fasta, output=accnos).\n";
                helpString += "The output to the screen is the distance and the number of otus at that distance for the groups you specified.\n";
                helpString += "The default value for label is all labels in your inputfile. The default for groups is all groups in your file.\n";
                helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
@@ -58,6 +59,23 @@ string GetSharedOTUCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
+string GetSharedOTUCommand::getOutputPattern(string type) {
+    try {
+        string pattern = "";
+
+        if (type == "fasta")            {   pattern =  "[filename],[distance],[group],shared.fasta";   }
+        else if (type == "accnos")      {   pattern =  "[filename],[distance],[group],accnos";         }
+        else if (type == "sharedseqs")  {   pattern =  "[filename],[distance],[group],shared.seqs";    }
+        else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
+        
+        return pattern;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "GetSharedOTUCommand", "getOutputPattern");
+        exit(1);
+    }
+}
+//**********************************************************************************************************************
 GetSharedOTUCommand::GetSharedOTUCommand(){    
        try {
                abort = true; calledHelp = true;
@@ -148,7 +166,7 @@ GetSharedOTUCommand::GetSharedOTUCommand(string option)  {
                                        m->mothurOut("No valid current list file. You must provide a list file."); m->mothurOutEndLine(); 
                                        abort = true;
                                }
-                       }else {  format = "list";       }
+                       }else {  format = "list";       m->setListFile(listfile); }
                        
                        groupfile = validParameter.validFile(parameters, "group", true);
                        if (groupfile == "not open") { abort = true; }  
@@ -159,7 +177,7 @@ GetSharedOTUCommand::GetSharedOTUCommand(string option)  {
                                        m->mothurOut("No valid current group file. You must provide a group file."); m->mothurOutEndLine(); 
                                        abort = true;
                                }
-                       }
+                       }else { m->setGroupFile(groupfile); }
                                                
                        if ((listfile == "") || (groupfile == "")) { m->mothurOut("The list and group parameters are required."); m->mothurOutEndLine(); abort = true; }
                        
@@ -181,7 +199,7 @@ GetSharedOTUCommand::GetSharedOTUCommand(string option)  {
                        else { 
                                userGroups = "unique." + groups;
                                m->splitAtDash(groups, Groups);
-                               m->Groups = Groups;
+                               m->setGroups(Groups);
                                
                        }
                        
@@ -190,14 +208,14 @@ GetSharedOTUCommand::GetSharedOTUCommand(string option)  {
                        else { 
                                userGroups = groups;
                                m->splitAtDash(groups, Groups);
-                               m->Groups = Groups;
+                               m->setGroups(Groups);
                                unique = false;
                        }
                        
                        fastafile = validParameter.validFile(parameters, "fasta", true);
                        if (fastafile == "not open") { abort = true; }
                        else if (fastafile == "not found") {  fastafile = "";  }        
-                               
+                       else { m->setFastaFile(fastafile); }
                }
 
        }
@@ -220,12 +238,18 @@ int GetSharedOTUCommand::execute(){
                if (m->control_pressed) { delete groupMap; return 0; }
                
                if (Groups.size() == 0) {
-                       Groups = groupMap->namesOfGroups;
+                       Groups = groupMap->getNamesOfGroups();
                        
                        //make string for outputfile name
                        userGroups = "unique.";
                        for(int i = 0; i < Groups.size(); i++) {  userGroups += Groups[i] + "-";  }
                        userGroups = userGroups.substr(0, userGroups.length()-1);
+               }else{
+                       //sanity check for group names
+                       SharedUtil util;
+                       vector<string> namesOfGroups = groupMap->getNamesOfGroups(); 
+                       util.setGroups(Groups, namesOfGroups);
+                       groupMap->setNamesOfGroups(namesOfGroups);
                }
        
                //put groups in map to find easier
@@ -261,7 +285,7 @@ int GetSharedOTUCommand::execute(){
                        
                        if (m->control_pressed) { 
                                if (lastlist != NULL) {         delete lastlist;        }
-                               for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }  outputTypes.clear();
+                               for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }  outputTypes.clear();
                                delete groupMap; return 0;
                        }
                        
@@ -320,11 +344,11 @@ int GetSharedOTUCommand::execute(){
                
 
                //reset groups parameter
-               m->Groups.clear();  
+               m->clearGroups();  
                
                if (lastlist != NULL) {         delete lastlist;        }
                
-               if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str()); }  delete groupMap; return 0; } 
+               if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]); }  delete groupMap; return 0; } 
                
                //set fasta file as new current fastafile
                string current = "";
@@ -364,11 +388,13 @@ int GetSharedOTUCommand::process(ListVector* shared) {
                string outputFileNames;
                
                if (outputDir == "") { outputDir += m->hasPath(listfile); }
-               if (output != "accnos") {
-                       outputFileNames = outputDir + m->getRootName(m->getSimpleName(listfile)) + shared->getLabel() + userGroups + ".shared.seqs";
-               }else {
-                       outputFileNames = outputDir + m->getRootName(m->getSimpleName(listfile)) + shared->getLabel() + userGroups + ".accnos";
-               }
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
+        variables["[distance]"] = shared->getLabel();
+        variables["[group]"] = userGroups;
+               if (output != "accnos") { outputFileNames = getOutputFileName("sharedseqs", variables); }
+               else { outputFileNames = getOutputFileName("accnos", variables); }
+        
                m->openOutputFile(outputFileNames, outNames);
                
                bool wroteSomething = false;
@@ -376,7 +402,7 @@ int GetSharedOTUCommand::process(ListVector* shared) {
                                
                //go through each bin, find out if shared
                for (int i = 0; i < shared->getNumBins(); i++) {
-                       if (m->control_pressed) { outNames.close(); remove(outputFileNames.c_str()); return 0; }
+                       if (m->control_pressed) { outNames.close(); m->mothurRemove(outputFileNames); return 0; }
                        
                        bool uniqueOTU = true;
                        
@@ -387,10 +413,11 @@ int GetSharedOTUCommand::process(ListVector* shared) {
                        
                        vector<string> namesOfSeqsInThisBin;
                        
-                       string names = shared->get(i);  
-                       while ((names.find_first_of(',') != -1)) { 
-                               string name = names.substr(0,names.find_first_of(','));
-                               names = names.substr(names.find_first_of(',')+1, names.length());
+                       string names = shared->get(i); 
+            vector<string> binNames;
+            m->splitAtComma(names, binNames);
+                       for(int j = 0; j < binNames.size(); j++) {
+                               string name = binNames[j];
                                
                                //find group
                                string seqGroup = groupMap->getGroup(name);
@@ -406,20 +433,6 @@ int GetSharedOTUCommand::process(ListVector* shared) {
                                else {  atLeastOne[seqGroup]++;  }
                        }
                        
-                       //get last name
-                       string seqGroup = groupMap->getGroup(names);
-                       if (output != "accnos") {
-                               namesOfSeqsInThisBin.push_back((names + "|" + seqGroup + "|" + toString(i+1)));
-                       }else {  namesOfSeqsInThisBin.push_back(names); }
-                       
-                       if (seqGroup == "not found") { m->mothurOut(names + " is not in your groupfile. Please correct."); m->mothurOutEndLine(); exit(1);  }
-                       
-                       //is this seq in one of hte groups we care about
-                       it = groupFinder.find(seqGroup);
-                       if (it == groupFinder.end()) {  uniqueOTU = false;  } //you have a sequence from a group you don't want
-                       else {  atLeastOne[seqGroup]++;  }
-                       
-                       
                        //make sure you have at least one seq from each group you want
                        bool sharedByAll = true;
                        map<string, int>::iterator it2;
@@ -454,7 +467,7 @@ int GetSharedOTUCommand::process(ListVector* shared) {
                outNames.close();
                
                if (!wroteSomething) {
-                       remove(outputFileNames.c_str());
+                       m->mothurRemove(outputFileNames);
                        string outputString = "\t" + toString(num) + " - No otus shared by groups";
                        
                        string groupString = "";
@@ -474,7 +487,8 @@ int GetSharedOTUCommand::process(ListVector* shared) {
                //if fasta file provided output new fasta file
                if ((fastafile != "") && wroteSomething) {
                        if (outputDir == "") { outputDir += m->hasPath(fastafile); }
-                       string outputFileFasta = outputDir + m->getRootName(m->getSimpleName(fastafile)) + shared->getLabel() + userGroups + ".shared.fasta";
+            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
+                       string outputFileFasta = getOutputFileName("fasta", variables);
                        ofstream outFasta;
                        m->openOutputFile(outputFileFasta, outFasta);
                        outputNames.push_back(outputFileFasta); outputTypes["fasta"].push_back(outputFileFasta);