]> git.donarmstrong.com Git - mothur.git/blobdiff - getsharedotucommand.cpp
added sparseDistanceMatrix class. Modified cluster commands to use the new sparse...
[mothur.git] / getsharedotucommand.cpp
index f8c245f95f8e159b61db3acda6cfa42dee9417d3..1b69a25d4af7e638dbf48a64baec2390f37dcfec 100644 (file)
@@ -8,54 +8,91 @@
  */
 
 #include "getsharedotucommand.h"
-
+#include "sharedutilities.h"
 
 //**********************************************************************************************************************
-vector<string> GetSharedOTUCommand::getValidParameters(){      
+vector<string> GetSharedOTUCommand::setParameters(){   
        try {
-               string Array[] =  {"label","unique","shared","fasta","list","group","output","outputdir","inputdir"};
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
+               CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT", "none",false,true); parameters.push_back(pgroup);
+               CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,true); parameters.push_back(plist);
+               CommandParameter poutput("output", "Multiple", "accnos-default", "default", "", "", "",false,false); parameters.push_back(poutput);
+               CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
+               CommandParameter punique("unique", "String", "", "", "", "", "",false,false); parameters.push_back(punique);
+               CommandParameter pshared("shared", "String", "", "", "", "", "",false,false); parameters.push_back(pshared);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                return myArray;
        }
        catch(exception& e) {
-               m->errorOut(e, "GetSharedOTUCommand", "getValidParameters");
+               m->errorOut(e, "GetSharedOTUCommand", "setParameters");
                exit(1);
        }
 }
 //**********************************************************************************************************************
-GetSharedOTUCommand::GetSharedOTUCommand(){    
+string GetSharedOTUCommand::getHelpString(){   
        try {
-               //initialize outputTypes
-               vector<string> tempOutNames;
-               outputTypes["fasta"] = tempOutNames;
-               outputTypes["accnos"] = tempOutNames;
-               outputTypes["sharedseqs"] = tempOutNames;
+               string helpString = "";
+               helpString += "The get.sharedseqs command parameters are list, group, label, unique, shared, output and fasta.  The list and group parameters are required, unless you have valid current files.\n";
+               helpString += "The label parameter allows you to select what distance levels you would like output files for, and are separated by dashes.\n";
+               helpString += "The unique and shared parameters allow you to select groups you would like to know the shared info for, and are separated by dashes.\n";
+               helpString += "If you enter your groups under the unique parameter mothur will return the otus that contain ONLY sequences from those groups.\n";
+               helpString += "If you enter your groups under the shared parameter mothur will return the otus that contain sequences from those groups and may also contain sequences from other groups.\n";
+               helpString += "If you do not enter any groups then the get.sharedseqs command will return sequences that are unique to all groups in your group file.\n";
+               helpString += "The fasta parameter allows you to input a fasta file and outputs a fasta file for each distance level containing only the sequences that are in OTUs shared by the groups specified.\n";
+               helpString += "The output parameter allows you to output the list of names without the group and bin number added. \n";
+               helpString += "With this option you can use the names file as an input in get.seqs and remove.seqs commands. To do this enter output=accnos. \n";
+               helpString += "The get.sharedseqs command outputs a .names file for each distance level containing a list of sequences in the OTUs shared by the groups specified.\n";
+               helpString += "The get.sharedseqs command should be in the following format: get.sharedseqs(list=yourListFile, group=yourGroupFile, label=yourLabels, unique=yourGroups, fasta=yourFastafile, output=yourOutput).\n";
+               helpString += "Example get.sharedseqs(list=amazon.fn.list, label=unique-0.01, group= amazon.groups, unique=forest-pasture, fasta=amazon.fasta, output=accnos).\n";
+               helpString += "The output to the screen is the distance and the number of otus at that distance for the groups you specified.\n";
+               helpString += "The default value for label is all labels in your inputfile. The default for groups is all groups in your file.\n";
+               helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
+               return helpString;
        }
        catch(exception& e) {
-               m->errorOut(e, "GetSharedOTUCommand", "GetSharedOTUCommand");
+               m->errorOut(e, "GetSharedOTUCommand", "getHelpString");
                exit(1);
        }
 }
 //**********************************************************************************************************************
-vector<string> GetSharedOTUCommand::getRequiredParameters(){   
+string GetSharedOTUCommand::getOutputFileNameTag(string type, string inputName=""){    
        try {
-               string Array[] =  {"list","group"};
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
-               return myArray;
+        string outputFileName = "";
+               map<string, vector<string> >::iterator it;
+        
+        //is this a type this command creates
+        it = outputTypes.find(type);
+        if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+        else {
+            if (type == "fasta")            {   outputFileName =  "shared.fasta";   }
+            else if (type == "accnos")      {   outputFileName =  "accnos";         }
+            else if (type == "sharedseqs")  {   outputFileName =  "shared.seqs";    }
+            else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
+        }
+        return outputFileName;
        }
        catch(exception& e) {
-               m->errorOut(e, "GetSharedOTUCommand", "getRequiredParameters");
+               m->errorOut(e, "GetSharedOTUCommand", "getOutputFileNameTag");
                exit(1);
        }
 }
+
 //**********************************************************************************************************************
-vector<string> GetSharedOTUCommand::getRequiredFiles(){        
+GetSharedOTUCommand::GetSharedOTUCommand(){    
        try {
-               vector<string> myArray;
-               return myArray;
+               abort = true; calledHelp = true;
+               setParameters();
+               vector<string> tempOutNames;
+               outputTypes["fasta"] = tempOutNames;
+               outputTypes["accnos"] = tempOutNames;
+               outputTypes["sharedseqs"] = tempOutNames;
        }
        catch(exception& e) {
-               m->errorOut(e, "GetSharedOTUCommand", "getRequiredFiles");
+               m->errorOut(e, "GetSharedOTUCommand", "GetSharedOTUCommand");
                exit(1);
        }
 }
@@ -63,19 +100,16 @@ vector<string> GetSharedOTUCommand::getRequiredFiles(){
 GetSharedOTUCommand::GetSharedOTUCommand(string option)  {
        try {
        
-               globaldata = GlobalData::getInstance();
-               abort = false;
+               abort = false; calledHelp = false;   
                unique = true;
                allLines = 1;
-               labels.clear();
                
                //allow user to run help
-               if(option == "help") { help(); abort = true; }
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
                
                else {
-                       //valid paramters for this command
-                       string Array[] =  {"label","unique","shared","fasta","list","group","output","outputdir","inputdir"};
-                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       vector<string> myArray = setParameters();
                        
                        OptionParser parser(option);
                        map<string,string> parameters = parser.getParameters();
@@ -131,12 +165,25 @@ GetSharedOTUCommand::GetSharedOTUCommand(string option)  {
                        //check for required parameters
                        listfile = validParameter.validFile(parameters, "list", true);
                        if (listfile == "not open") { abort = true; }
-                       else if (listfile == "not found") { listfile = ""; }    
-                       else {  globaldata->setListFile(listfile);  globaldata->setFormat("list");      }
+                       else if (listfile == "not found") { 
+                               listfile = m->getListFile(); 
+                               if (listfile != "") { format = "list"; m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
+                               else { 
+                                       m->mothurOut("No valid current list file. You must provide a list file."); m->mothurOutEndLine(); 
+                                       abort = true;
+                               }
+                       }else {  format = "list";       m->setListFile(listfile); }
                        
                        groupfile = validParameter.validFile(parameters, "group", true);
                        if (groupfile == "not open") { abort = true; }  
-                       else if (groupfile == "not found") { groupfile = ""; }
+                       else if (groupfile == "not found") { 
+                               groupfile = m->getGroupFile(); 
+                               if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
+                               else { 
+                                       m->mothurOut("No valid current group file. You must provide a group file."); m->mothurOutEndLine(); 
+                                       abort = true;
+                               }
+                       }else { m->setGroupFile(groupfile); }
                                                
                        if ((listfile == "") || (groupfile == "")) { m->mothurOut("The list and group parameters are required."); m->mothurOutEndLine(); abort = true; }
                        
@@ -151,13 +198,14 @@ GetSharedOTUCommand::GetSharedOTUCommand(string option)  {
                        
                        output = validParameter.validFile(parameters, "output", false);                 
                        if (output == "not found") { output = ""; }
+                       else if (output == "default") { output = ""; }
                        
                        groups = validParameter.validFile(parameters, "unique", false);                 
                        if (groups == "not found") { groups = ""; }
                        else { 
                                userGroups = "unique." + groups;
                                m->splitAtDash(groups, Groups);
-                               globaldata->Groups = Groups;
+                               m->setGroups(Groups);
                                
                        }
                        
@@ -166,14 +214,14 @@ GetSharedOTUCommand::GetSharedOTUCommand(string option)  {
                        else { 
                                userGroups = groups;
                                m->splitAtDash(groups, Groups);
-                               globaldata->Groups = Groups;
+                               m->setGroups(Groups);
                                unique = false;
                        }
                        
                        fastafile = validParameter.validFile(parameters, "fasta", true);
                        if (fastafile == "not open") { abort = true; }
                        else if (fastafile == "not found") {  fastafile = "";  }        
-                               
+                       else { m->setFastaFile(fastafile); }
                }
 
        }
@@ -184,40 +232,10 @@ GetSharedOTUCommand::GetSharedOTUCommand(string option)  {
 }
 //**********************************************************************************************************************
 
-void GetSharedOTUCommand::help(){
-       try {
-               m->mothurOut("The get.sharedseqs command parameters are list, group, label, unique, shared, output and fasta.  The list and group parameters are required.\n");
-               m->mothurOut("The label parameter allows you to select what distance levels you would like output files for, and are separated by dashes.\n");
-               m->mothurOut("The unique and shared parameters allow you to select groups you would like to know the shared info for, and are separated by dashes.\n");
-               m->mothurOut("If you enter your groups under the unique parameter mothur will return the otus that contain ONLY sequences from those groups.\n");
-               m->mothurOut("If you enter your groups under the shared parameter mothur will return the otus that contain sequences from those groups and may also contain sequences from other groups.\n");
-               m->mothurOut("If you do not enter any groups then the get.sharedseqs command will return sequences that are unique to all groups in your group file.\n");
-               m->mothurOut("The fasta parameter allows you to input a fasta file and outputs a fasta file for each distance level containing only the sequences that are in OTUs shared by the groups specified.\n");
-               m->mothurOut("The output parameter allows you to output the list of names without the group and bin number added. \n");
-               m->mothurOut("With this option you can use the names file as an input in get.seqs and remove.seqs commands. To do this enter output=accnos. \n");
-               m->mothurOut("The get.sharedseqs command outputs a .names file for each distance level containing a list of sequences in the OTUs shared by the groups specified.\n");
-               m->mothurOut("The get.sharedseqs command should be in the following format: get.sabund(label=yourLabels, groups=yourGroups, fasta=yourFastafile, output=yourOutput).\n");
-               m->mothurOut("Example get.sharedseqs(list=amazon.fn.list, label=unique-0.01, group=forest-pasture, fasta=amazon.fasta, output=accnos).\n");
-               m->mothurOut("The output to the screen is the distance and the number of otus at that distance for the groups you specified.\n");
-               m->mothurOut("The default value for label is all labels in your inputfile. The default for groups is all groups in your file.\n");
-               m->mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n\n");
-       }
-       catch(exception& e) {
-               m->errorOut(e, "GetSharedOTUCommand", "help");
-               exit(1);
-       }
-}
-
-//**********************************************************************************************************************
-
-GetSharedOTUCommand::~GetSharedOTUCommand(){}
-
-//**********************************************************************************************************************
-
 int GetSharedOTUCommand::execute(){
        try {
                
-               if (abort == true) { return 0; }
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
                groupMap = new GroupMap(groupfile);
                int error = groupMap->readMap();
@@ -225,15 +243,19 @@ int GetSharedOTUCommand::execute(){
                
                if (m->control_pressed) { delete groupMap; return 0; }
                
-               globaldata->gGroupmap = groupMap;
-               
                if (Groups.size() == 0) {
-                       Groups = groupMap->namesOfGroups;
+                       Groups = groupMap->getNamesOfGroups();
                        
                        //make string for outputfile name
                        userGroups = "unique.";
                        for(int i = 0; i < Groups.size(); i++) {  userGroups += Groups[i] + "-";  }
                        userGroups = userGroups.substr(0, userGroups.length()-1);
+               }else{
+                       //sanity check for group names
+                       SharedUtil util;
+                       vector<string> namesOfGroups = groupMap->getNamesOfGroups(); 
+                       util.setGroups(Groups, namesOfGroups);
+                       groupMap->setNamesOfGroups(namesOfGroups);
                }
        
                //put groups in map to find easier
@@ -269,7 +291,7 @@ int GetSharedOTUCommand::execute(){
                        
                        if (m->control_pressed) { 
                                if (lastlist != NULL) {         delete lastlist;        }
-                               for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }  outputTypes.clear();
+                               for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); }  outputTypes.clear();
                                delete groupMap; return 0;
                        }
                        
@@ -328,11 +350,25 @@ int GetSharedOTUCommand::execute(){
                
 
                //reset groups parameter
-               globaldata->Groups.clear();  
+               m->clearGroups();  
                
                if (lastlist != NULL) {         delete lastlist;        }
                
-               if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   remove(outputNames[i].c_str()); }  delete groupMap; return 0; } 
+               if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]); }  delete groupMap; return 0; } 
+               
+               //set fasta file as new current fastafile
+               string current = "";
+               itTypes = outputTypes.find("fasta");
+               if (itTypes != outputTypes.end()) {
+                       if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
+               }
+               
+               if (output == "accnos") {
+                       itTypes = outputTypes.find("accnos");
+                       if (itTypes != outputTypes.end()) {
+                               if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
+                       }
+               }
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
@@ -359,9 +395,9 @@ int GetSharedOTUCommand::process(ListVector* shared) {
                
                if (outputDir == "") { outputDir += m->hasPath(listfile); }
                if (output != "accnos") {
-                       outputFileNames = outputDir + m->getRootName(m->getSimpleName(listfile)) + shared->getLabel() + userGroups + ".shared.seqs";
+                       outputFileNames = outputDir + m->getRootName(m->getSimpleName(listfile)) + shared->getLabel() + userGroups + "." + getOutputFileNameTag("sharedseqs");
                }else {
-                       outputFileNames = outputDir + m->getRootName(m->getSimpleName(listfile)) + shared->getLabel() + userGroups + ".accnos";
+                       outputFileNames = outputDir + m->getRootName(m->getSimpleName(listfile)) + shared->getLabel() + userGroups + "." + getOutputFileNameTag("accnos");
                }
                m->openOutputFile(outputFileNames, outNames);
                
@@ -370,7 +406,7 @@ int GetSharedOTUCommand::process(ListVector* shared) {
                                
                //go through each bin, find out if shared
                for (int i = 0; i < shared->getNumBins(); i++) {
-                       if (m->control_pressed) { outNames.close(); remove(outputFileNames.c_str()); return 0; }
+                       if (m->control_pressed) { outNames.close(); m->mothurRemove(outputFileNames); return 0; }
                        
                        bool uniqueOTU = true;
                        
@@ -381,10 +417,11 @@ int GetSharedOTUCommand::process(ListVector* shared) {
                        
                        vector<string> namesOfSeqsInThisBin;
                        
-                       string names = shared->get(i);  
-                       while ((names.find_first_of(',') != -1)) { 
-                               string name = names.substr(0,names.find_first_of(','));
-                               names = names.substr(names.find_first_of(',')+1, names.length());
+                       string names = shared->get(i); 
+            vector<string> binNames;
+            m->splitAtComma(names, binNames);
+                       for(int j = 0; j < binNames.size(); j++) {
+                               string name = binNames[j];
                                
                                //find group
                                string seqGroup = groupMap->getGroup(name);
@@ -400,20 +437,6 @@ int GetSharedOTUCommand::process(ListVector* shared) {
                                else {  atLeastOne[seqGroup]++;  }
                        }
                        
-                       //get last name
-                       string seqGroup = groupMap->getGroup(names);
-                       if (output != "accnos") {
-                               namesOfSeqsInThisBin.push_back((names + "|" + seqGroup + "|" + toString(i+1)));
-                       }else {  namesOfSeqsInThisBin.push_back(names); }
-                       
-                       if (seqGroup == "not found") { m->mothurOut(names + " is not in your groupfile. Please correct."); m->mothurOutEndLine(); exit(1);  }
-                       
-                       //is this seq in one of hte groups we care about
-                       it = groupFinder.find(seqGroup);
-                       if (it == groupFinder.end()) {  uniqueOTU = false;  } //you have a sequence from a group you don't want
-                       else {  atLeastOne[seqGroup]++;  }
-                       
-                       
                        //make sure you have at least one seq from each group you want
                        bool sharedByAll = true;
                        map<string, int>::iterator it2;
@@ -448,7 +471,7 @@ int GetSharedOTUCommand::process(ListVector* shared) {
                outNames.close();
                
                if (!wroteSomething) {
-                       remove(outputFileNames.c_str());
+                       m->mothurRemove(outputFileNames);
                        string outputString = "\t" + toString(num) + " - No otus shared by groups";
                        
                        string groupString = "";
@@ -468,7 +491,7 @@ int GetSharedOTUCommand::process(ListVector* shared) {
                //if fasta file provided output new fasta file
                if ((fastafile != "") && wroteSomething) {
                        if (outputDir == "") { outputDir += m->hasPath(fastafile); }
-                       string outputFileFasta = outputDir + m->getRootName(m->getSimpleName(fastafile)) + shared->getLabel() + userGroups + ".shared.fasta";
+                       string outputFileFasta = outputDir + m->getRootName(m->getSimpleName(fastafile)) + shared->getLabel() + userGroups + "." + getOutputFileNameTag("fasta");
                        ofstream outFasta;
                        m->openOutputFile(outputFileFasta, outFasta);
                        outputNames.push_back(outputFileFasta); outputTypes["fasta"].push_back(outputFileFasta);