]> git.donarmstrong.com Git - mothur.git/commitdiff
added get.otus and remove.otus commands
authorwestcott <westcott>
Fri, 12 Nov 2010 12:43:09 +0000 (12:43 +0000)
committerwestcott <westcott>
Fri, 12 Nov 2010 12:43:09 +0000 (12:43 +0000)
15 files changed:
Mothur.xcodeproj/project.pbxproj
commandfactory.cpp
getotuscommand.cpp [new file with mode: 0644]
getotuscommand.h [new file with mode: 0644]
makegroupcommand.cpp
makegroupcommand.h
metastats2.c
mothur
mothurout.cpp
removeotuscommand.cpp [new file with mode: 0644]
removeotuscommand.h [new file with mode: 0644]
unifracunweightedcommand.cpp
unifracunweightedcommand.h
unifracweightedcommand.cpp
unifracweightedcommand.h

index f27f7c36d9ed36eb9faf93a7f84597c1d88939f4..369bae98e626911a3afac441c728b2c469992e85 100644 (file)
@@ -61,6 +61,8 @@
                A751032B128AC93F00ABB831 /* removegroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removegroupscommand.cpp; sourceTree = "<group>"; };
                A75103D6128AE34100ABB831 /* getgroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getgroupscommand.h; sourceTree = "<group>"; };
                A75103D7128AE34100ABB831 /* getgroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getgroupscommand.cpp; sourceTree = "<group>"; };
+               A7510407128AFCC100ABB831 /* getotuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getotuscommand.h; sourceTree = "<group>"; };
+               A7510408128AFCC100ABB831 /* getotuscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getotuscommand.cpp; sourceTree = "<group>"; };
                A7639F8D1175DF35008F5578 /* makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = makefile; sourceTree = "<group>"; };
                A7653CB112789EFD009D6C09 /* subsamplecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = subsamplecommand.h; sourceTree = "<group>"; };
                A7653CB212789EFD009D6C09 /* subsamplecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = subsamplecommand.cpp; sourceTree = "<group>"; };
@@ -73,6 +75,8 @@
                A76AAD03117F322B003D8DA1 /* phylosummary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylosummary.cpp; sourceTree = "<group>"; };
                A76C4A1011876BAF0009460B /* setlogfilecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = setlogfilecommand.h; sourceTree = SOURCE_ROOT; };
                A76C4A1111876BAF0009460B /* setlogfilecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = setlogfilecommand.cpp; sourceTree = SOURCE_ROOT; };
+               A76D1451128D6A03005D4DFE /* removeotuscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removeotuscommand.h; sourceTree = "<group>"; };
+               A76D1452128D6A03005D4DFE /* removeotuscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removeotuscommand.cpp; sourceTree = "<group>"; };
                A77D787B126F387700F351BD /* pairwiseseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pairwiseseqscommand.h; sourceTree = "<group>"; };
                A77D787C126F387700F351BD /* pairwiseseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pairwiseseqscommand.cpp; sourceTree = "<group>"; };
                A780E6CB11E7745D00BB5718 /* endiannessmacros.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = endiannessmacros.h; sourceTree = "<group>"; };
                                A76714DE126DE45A003F359A /* deuniqueseqscommand.cpp */,
                                A7DA2050113FECD400BF472F /* filterseqscommand.h */,
                                A7DA204F113FECD400BF472F /* filterseqscommand.cpp */,
-                               A75103D6128AE34100ABB831 /* getgroupscommand.h */,
-                               A75103D7128AE34100ABB831 /* getgroupscommand.cpp */,
                                A7DA205B113FECD400BF472F /* getgroupcommand.h */,
                                A7DA205A113FECD400BF472F /* getgroupcommand.cpp */,
+                               A75103D6128AE34100ABB831 /* getgroupscommand.h */,
+                               A75103D7128AE34100ABB831 /* getgroupscommand.cpp */,
                                A7DA205D113FECD400BF472F /* getlabelcommand.h */,
                                A7DA205C113FECD400BF472F /* getlabelcommand.cpp */,
                                A787A24F124CB46C0076EB84 /* getlineagecommand.h */,
                                A787A250124CB46C0076EB84 /* getlineagecommand.cpp */,
                                A7DA205F113FECD400BF472F /* getlistcountcommand.h */,
                                A7DA205E113FECD400BF472F /* getlistcountcommand.cpp */,
+                               A7510407128AFCC100ABB831 /* getotuscommand.h */,
+                               A7510408128AFCC100ABB831 /* getotuscommand.cpp */,
                                A7DA2061113FECD400BF472F /* getoturepcommand.h */,
                                A7DA2060113FECD400BF472F /* getoturepcommand.cpp */,
                                A7DA2063113FECD400BF472F /* getrabundcommand.h */,
                                A751032B128AC93F00ABB831 /* removegroupscommand.cpp */,
                                A787A28E124CE1470076EB84 /* removelineagecommand.h */,
                                A787A28F124CE1470076EB84 /* removelineagecommand.cpp */,
+                               A76D1451128D6A03005D4DFE /* removeotuscommand.h */,
+                               A76D1452128D6A03005D4DFE /* removeotuscommand.cpp */,
                                A7DA20F3113FECD400BF472F /* removeseqscommand.h */,
                                A7DA20F2113FECD400BF472F /* removeseqscommand.cpp */,
                                A7DA20F5113FECD400BF472F /* reversecommand.h */,
index 1f77b71bfb63b0519527bac53cfbf51511fdbb68..8d038e0c08e6010d21188808b3ff3ad40087a7f7 100644 (file)
@@ -99,7 +99,8 @@
 #include "subsamplecommand.h"
 #include "removegroupscommand.h"
 #include "getgroupscommand.h"
-
+#include "getotuscommand.h"
+#include "removeotuscommand.h"
 
 /*******************************************************/
 
@@ -203,6 +204,8 @@ CommandFactory::CommandFactory(){
        commands["sub.sample"]                  = "sub.sample";
        commands["remove.groups"]               = "remove.groups";
        commands["get.groups"]                  = "get.groups";
+       commands["get.otus"]                    = "get.otus";
+       commands["remove.otus"]                 = "remove.otus";
        commands["pairwise.seqs"]               = "MPIEnabled";
        commands["pipeline.pds"]                = "MPIEnabled";
        commands["classify.seqs"]               = "MPIEnabled"; 
@@ -347,6 +350,8 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "remove.lineage")                {       command = new RemoveLineageCommand(optionString);                       }
                else if(commandName == "get.groups")                    {       command = new GetGroupsCommand(optionString);                           }
                else if(commandName == "remove.groups")                 {       command = new RemoveGroupsCommand(optionString);                        }
+               else if(commandName == "get.otus")                              {       command = new GetOtusCommand(optionString);                                     }
+               else if(commandName == "remove.otus")                   {       command = new RemoveOtusCommand(optionString);                          }
                else if(commandName == "fastq.info")                    {       command = new ParseFastaQCommand(optionString);                         }
                else if(commandName == "pipeline.pds")                  {       command = new PipelineCommand(optionString);                            }
                else if(commandName == "deunique.seqs")                 {       command = new DeUniqueSeqsCommand(optionString);                        }
@@ -465,6 +470,8 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
                else if(commandName == "get.groups")                    {       pipecommand = new GetGroupsCommand(optionString);                               }
                else if(commandName == "remove.lineage")                {       pipecommand = new RemoveLineageCommand(optionString);                   }
                else if(commandName == "remove.groups")                 {       pipecommand = new RemoveGroupsCommand(optionString);                    }
+               else if(commandName == "get.otus")                              {       pipecommand = new GetOtusCommand(optionString);                                 }
+               else if(commandName == "remove.otus")                   {       pipecommand = new RemoveOtusCommand(optionString);                              }
                else if(commandName == "fastq.info")                    {       pipecommand = new ParseFastaQCommand(optionString);                             }
                else if(commandName == "deunique.seqs")                 {       pipecommand = new DeUniqueSeqsCommand(optionString);                    }
                else if(commandName == "pairwise.seqs")                 {       pipecommand = new PairwiseSeqsCommand(optionString);                    }
@@ -570,6 +577,8 @@ Command* CommandFactory::getCommand(string commandName){
                else if(commandName == "remove.lineage")                {       shellcommand = new RemoveLineageCommand();                      }
                else if(commandName == "get.groups")                    {       shellcommand = new GetGroupsCommand();                          }
                else if(commandName == "remove.groups")                 {       shellcommand = new RemoveGroupsCommand();                       }
+               else if(commandName == "get.otus")                              {       shellcommand = new GetOtusCommand();                            }
+               else if(commandName == "remove.otus")                   {       shellcommand = new RemoveOtusCommand();                         }
                else if(commandName == "fastq.info")                    {       shellcommand = new ParseFastaQCommand();                        }
                else if(commandName == "deunique.seqs")                 {       shellcommand = new DeUniqueSeqsCommand();                       }
                else if(commandName == "pairwise.seqs")                 {       shellcommand = new PairwiseSeqsCommand();                       }
diff --git a/getotuscommand.cpp b/getotuscommand.cpp
new file mode 100644 (file)
index 0000000..ba9a5e7
--- /dev/null
@@ -0,0 +1,417 @@
+/*
+ *  getotuscommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 11/10/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "getotuscommand.h"
+#include "inputdata.h"
+#include "sharedutilities.h"
+
+
+//**********************************************************************************************************************
+vector<string> GetOtusCommand::getValidParameters(){   
+       try {
+               string Array[] =  { "group", "accnos","label", "groups","list","outputdir","inputdir" };
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtusCommand", "getValidParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+GetOtusCommand::GetOtusCommand(){      
+       try {
+               abort = true;
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["group"] = tempOutNames;
+               outputTypes["list"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtusCommand", "GetOtusCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> GetOtusCommand::getRequiredParameters(){        
+       try {
+               string Array[] =  {"group","label", "list"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtusCommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> GetOtusCommand::getRequiredFiles(){     
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtusCommand", "getRequiredFiles");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+GetOtusCommand::GetOtusCommand(string option)  {
+       try {
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  { "group", "accnos","label", "groups", "list","outputdir","inputdir" };
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["group"] = tempOutNames;
+                       outputTypes["list"] = tempOutNames;
+                       
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("accnos");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
+                               }
+                               
+                               it = parameters.find("list");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["list"] = inputDir + it->second;             }
+                               }
+                               
+                               it = parameters.find("group");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                               }
+                       }
+                       
+                       
+                       //check for required parameters
+                       accnosfile = validParameter.validFile(parameters, "accnos", true);
+                       if (accnosfile == "not open") { abort = true; }
+                       else if (accnosfile == "not found") {  accnosfile = ""; }       
+                       
+                       groupfile = validParameter.validFile(parameters, "group", true);
+                       if (groupfile == "not open") { abort = true; }
+                       else if (groupfile == "not found") {  groupfile = "";  m->mothurOut("You must provide a group file."); m->mothurOutEndLine(); abort = true; }   
+                       
+                       listfile = validParameter.validFile(parameters, "list", true);
+                       if (listfile == "not open") { abort = true; }
+                       else if (listfile == "not found") {  listfile = ""; m->mothurOut("You must provide a list file."); m->mothurOutEndLine(); abort = true; }       
+                       
+                       groups = validParameter.validFile(parameters, "groups", false);                 
+                       if (groups == "not found") { groups = ""; }
+                       else { 
+                               m->splitAtDash(groups, Groups);
+                       }
+                       
+                       label = validParameter.validFile(parameters, "label", false);                   
+                       if (label == "not found") { label = ""; m->mothurOut("You must provide a label to process."); m->mothurOutEndLine(); abort = true; }    
+                       
+                       if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; }
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtusCommand", "GetOtusCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+void GetOtusCommand::help(){
+       try {
+               m->mothurOut("The get.otus command selects otus containing sequences from a specfic group or set of groups.\n");
+               m->mothurOut("It outputs a new list file containing the otus containing sequences from in the those specified groups.\n");
+               m->mothurOut("The get.otus command parameters are accnos, group, list, label and groups. The group, list and label parameters are required.\n");
+               m->mothurOut("You must also provide an accnos containing the list of groups to get or set the groups parameter to the groups you wish to select.\n");
+               m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like.  You can separate group names with dashes.\n");
+               m->mothurOut("The label parameter allows you to specify which distance you want to process.\n");
+               m->mothurOut("The get.otus command should be in the following format: get.otus(accnos=yourAccnos, list=yourListFile, group=yourGroupFile, label=yourLabel).\n");
+               m->mothurOut("Example get.otus(accnos=amazon.accnos, list=amazon.fn.list, group=amazon.groups, label=0.03).\n");
+               m->mothurOut("or get.otus(groups=pasture, list=amazon.fn.list, amazon.groups, label=0.03).\n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n\n");
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtusCommand", "help");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+int GetOtusCommand::execute(){
+       try {
+               
+               if (abort == true) { return 0; }
+               
+               groupMap = new GroupMap(groupfile);
+               groupMap->readMap();
+               
+               //get groups you want to remove
+               if (accnosfile != "") { readAccnos(); }
+               
+               //make sure groups are valid
+               //takes care of user setting groupNames that are invalid or setting groups=all
+               SharedUtil* util = new SharedUtil();
+               util->setGroups(Groups, groupMap->namesOfGroups);
+               delete util;
+               
+               if (m->control_pressed) { delete groupMap; return 0; }
+               
+               //read through the list file keeping any otus that contain any sequence from the groups selected
+               readListGroup();
+               
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
+                               
+               if (outputNames.size() != 0) {
+                       m->mothurOutEndLine();
+                       m->mothurOut("Output File names: "); m->mothurOutEndLine();
+                       for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+                       m->mothurOutEndLine();
+               }
+               
+               return 0;               
+       }
+       
+       catch(exception& e) {
+               m->errorOut(e, "GetOtusCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int GetOtusCommand::readListGroup(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick." + label +  m->getExtension(listfile);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               string GroupOutputDir = outputDir;
+               if (outputDir == "") {  GroupOutputDir += m->hasPath(groupfile);  }
+               string outputGroupFileName = GroupOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick." + label  + m->getExtension(groupfile);
+               
+               ofstream outGroup;
+               m->openOutputFile(outputGroupFileName, outGroup);
+                       
+               InputData* input = new InputData(listfile, "list");
+               ListVector* list = input->getListVector();
+               string lastLabel = list->getLabel();
+               
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> labels; labels.insert(label);
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+               
+               bool wroteSomething = false;
+
+               //as long as you are not at the end of the file or done wih the lines you want
+               while((list != NULL) && (userLabels.size() != 0)) {
+                       
+                       if (m->control_pressed) {  delete list; delete input; out.close();  outGroup.close(); remove(outputFileName.c_str());  remove(outputGroupFileName.c_str());return 0;  }
+                       
+                       if(labels.count(list->getLabel()) == 1){
+                               processList(list, groupMap, out, outGroup, wroteSomething);
+                               
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                       }
+                       
+                       if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = list->getLabel();
+                               
+                               delete list; 
+                               
+                               list = input->getListVector(lastLabel);
+                               
+                               processList(list, groupMap, out, outGroup, wroteSomething);
+                               
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               list->setLabel(saveLabel);
+                       }
+                       
+                       lastLabel = list->getLabel();
+                       
+                       delete list; list = NULL;
+                       
+                       //get next line to process
+                       list = input->getListVector();                          
+               }
+               
+               
+               if (m->control_pressed) {  if (list != NULL) { delete list; } delete input; out.close(); outGroup.close(); remove(outputFileName.c_str());  remove(outputGroupFileName.c_str()); return 0;  }
+               
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       m->mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                       }
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+                       if (list != NULL) { delete list; }
+                       
+                       list = input->getListVector(lastLabel);
+                       
+                       processList(list, groupMap, out, outGroup, wroteSomething);
+                       
+                       delete list; list = NULL;
+               }
+                                       
+               out.close();
+               outGroup.close();
+               
+               if (wroteSomething == false) {  m->mothurOut("At distance " + label + " your file does NOT contain any otus containing sequences from the groups you wish to get."); m->mothurOutEndLine();  }
+               outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
+               outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName);
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtusCommand", "readList");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int GetOtusCommand::processList(ListVector*& list, GroupMap*& groupMap, ofstream& out, ofstream& outGroup, bool& wroteSomething){
+       try {
+               
+               //make a new list vector
+               ListVector newList;
+               newList.setLabel(list->getLabel());
+               
+               int numOtus = 0;
+               //for each bin
+               for (int i = 0; i < list->getNumBins(); i++) {
+                       if (m->control_pressed) { return 0; }
+                       
+                       //parse out names that are in accnos file
+                       string binnames = list->get(i);
+                       
+                       bool keepBin = false;
+                       string groupFileOutput = "";
+                       
+                       //parse names
+                       string individual = "";
+                       int length = binnames.length();
+                       for(int j=0;j<length;j++){
+                               if(binnames[j] == ','){
+                                       string group = groupMap->getGroup(individual);
+                                       if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
+                                       
+                                       if (m->inUsersGroups(group, Groups)) {  keepBin = true; }
+                                       groupFileOutput += individual + "\t" + group + "\n";
+                                       individual = "";        
+                                       
+                               }
+                               else{  individual += binnames[j];  }
+                       }
+                       
+                       string group = groupMap->getGroup(individual);
+                       if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
+                       
+                       if (m->inUsersGroups(group, Groups)) {  keepBin = true; }
+                       groupFileOutput += individual + "\t" + group + "\n";
+                       
+                       //if there are sequences from the groups we want in this bin add to new list, output to groupfile
+                       if (keepBin) {  
+                               newList.push_back(binnames);    
+                               outGroup << groupFileOutput;
+                               numOtus++;
+                       }
+               }
+               
+               //print new listvector
+               if (newList.getNumBins() != 0) {
+                       wroteSomething = true;
+                       newList.print(out);
+               }
+               
+               m->mothurOut(newList.getLabel() + " - selected " + toString(numOtus) + " of the " + toString(list->getNumBins()) + " OTUs."); m->mothurOutEndLine();
+       
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtusCommand", "processList");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+void GetOtusCommand::readAccnos(){
+       try {
+               Groups.clear();
+               
+               ifstream in;
+               m->openInputFile(accnosfile, in);
+               string name;
+               
+               while(!in.eof()){
+                       in >> name;
+                       
+                       Groups.push_back(name);
+                       
+                       m->gobble(in);
+               }
+               in.close();             
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOtusCommand", "readAccnos");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+
diff --git a/getotuscommand.h b/getotuscommand.h
new file mode 100644 (file)
index 0000000..ec69acb
--- /dev/null
@@ -0,0 +1,49 @@
+#ifndef GETOTUSCOMMAND_H
+#define GETOTUSCOMMAND_H
+
+/*
+ *  getotuscommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 11/10/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+
+
+#include "command.hpp"
+#include "groupmap.h"
+#include "listvector.hpp"
+
+class GetOtusCommand : public Command {
+       
+public:
+       
+       GetOtusCommand(string); 
+       GetOtusCommand();
+       ~GetOtusCommand(){}
+       vector<string> getRequiredParameters();
+       vector<string> getValidParameters();
+       vector<string> getRequiredFiles();
+       map<string, vector<string> > getOutputFiles() { return outputTypes; }
+       int execute();
+       void help();    
+       
+private:
+       string accnosfile, groupfile, listfile, outputDir, groups, label;
+       bool abort;
+       vector<string> outputNames, Groups;
+       map<string, vector<string> > outputTypes;
+       GroupMap* groupMap;
+       
+       void readAccnos();
+       int readListGroup();
+       int processList(ListVector*&, GroupMap*&, ofstream&, ofstream&, bool&);
+       
+};
+
+#endif
+
+
+
index c427db5b385c166176f3f3a5272df2f72c3400b8..12be82690fc7068e4d713353ee84d79aa6330ba7 100644 (file)
@@ -13,7 +13,7 @@
 //**********************************************************************************************************************
 vector<string> MakeGroupCommand::getValidParameters(){ 
        try {
-               string Array[] =  {"fasta", "groups","outputdir","inputdir"};
+               string Array[] =  {"fasta", "output","groups","outputdir","inputdir"};
                vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                return myArray;
        }
@@ -71,7 +71,7 @@ MakeGroupCommand::MakeGroupCommand(string option)  {
                else {
                        
                        //valid paramters for this command
-                       string AlignArray[] =  {"fasta","groups","outputdir","inputdir"};
+                       string AlignArray[] =  {"fasta","groups","output","outputdir","inputdir"};
                        vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -92,8 +92,6 @@ MakeGroupCommand::MakeGroupCommand(string option)  {
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
                        if (inputDir == "not found"){   inputDir = "";          }
-                       
-                       filename = outputDir;
 
                        fastaFileName = validParameter.validFile(parameters, "fasta", false);
                        if (fastaFileName == "not found") { m->mothurOut("fasta is a required parameter for the make.group command."); m->mothurOutEndLine(); abort = true;  }
@@ -155,13 +153,17 @@ MakeGroupCommand::MakeGroupCommand(string option)  {
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
                        
+                       output = validParameter.validFile(parameters, "output", false);                 
+                       if (output == "not found") { output = "";  }
+                       else{ filename = output; }
+                       
                        groups = validParameter.validFile(parameters, "groups", false);                 
                        if (groups == "not found") { m->mothurOut("groups is a required parameter for the make.group command."); m->mothurOutEndLine(); abort = true;  }
                        else { m->splitAtDash(groups, groupsNames);     }
 
                        if (groupsNames.size() != fastaFileNames.size()) { m->mothurOut("You do not have the same number of valid fastfile files as groups.  This could be because we could not open a fastafile."); m->mothurOutEndLine(); abort = true;  }
                }
-               
+               cout << "here" << endl;
        }
        catch(exception& e) {
                m->errorOut(e, "MakeGroupCommand", "MakeGroupCommand");
@@ -178,9 +180,10 @@ MakeGroupCommand::~MakeGroupCommand(){     }
 void MakeGroupCommand::help(){
        try {
                m->mothurOut("The make.group command reads a fasta file or series of fasta files and creates a groupfile.\n");
-               m->mothurOut("The make.group command parameters are fasta and groups, both are required.\n");
+               m->mothurOut("The make.group command parameters are fasta, groups and output. Fasta and group are required.\n");
+               m->mothurOut("The output parameter allows you to specify the name of groupfile created. \n");
                m->mothurOut("The make.group command should be in the following format: \n");
-               m->mothurOut("make.group(fasta=yourFastaFiles, groups=yourGroups. \n");
+               m->mothurOut("make.group(fasta=yourFastaFiles, groups=yourGroups). \n");
                m->mothurOut("Example make.group(fasta=seqs1.fasta-seq2.fasta-seqs3.fasta, groups=A-B-C)\n");
                m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFiles).\n\n");
        }
@@ -195,10 +198,10 @@ void MakeGroupCommand::help(){
 
 int MakeGroupCommand::execute(){
        try {
-               if (abort == true) {    return 0;       }
+               if (abort == true) { return 0;  }
                
                if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[0]); }
-               
+                       
                filename = outputDir + filename;
                
                ofstream out;
index 260deaa5637a580c24d6be89b1c8b822ba262591..e836d20f50b7b04e6cedefbb4f01616f6380b538 100644 (file)
@@ -27,7 +27,7 @@ public:
        
 private:
                
-       string fastaFileName, groups, outputDir, filename;
+       string fastaFileName, groups, outputDir, filename, output;
        vector<string> fastaFileNames;
        vector<string> groupsNames, outputNames;
        map<string, vector<string> > outputTypes;
index b8db243e9f04a05d84e23e2651e62429a4f98387..81ffb00347de5f51795cf74e6c605e664cc80f99 100644 (file)
@@ -186,9 +186,12 @@ int metastat_main (char* outputFileName, int numRows, int numCols, double thresh
        
        int *nr, *nc, *ldtabl, *work;
        int nrow=2, ncol=2, ldtable=2;
-       int workspace=(row*sizeof(double *)+size*sizeof(double *));
+       int workspace = 2*(row*col*sizeof(double *)); 
        double *expect, *prc, *emin,*prt,*pre;
        double e=0, prc1=0, emin1=0, prt1=0, pre1=0;
+         
+       prt = (double *) malloc(size*sizeof(double *));
+       prc = (double *) malloc(size*sizeof(double *));
 
        nr = &nrow;
        nc = &ncol;
@@ -258,7 +261,7 @@ printf("here before testp\n");
        double data[] = {f11, f12, f21, f22};
 
        int *nr, *nc, *ldtabl, *work;
-       int nrow=2, ncol=2, ldtable=2, workspace=10000000; // I added two zeros for larger data sets
+       int nrow=2, ncol=2, ldtable=2, workspace=INT_MAX; // I added two zeros for larger data sets
        double *expect, *prc, *emin,*prt,*pre;
        double e=0, prc1=0, emin1=0, prt1=0, pre1=0;
 
diff --git a/mothur b/mothur
index bcb20338fba94edc9ea5135c57f3640ae061377d..b122ea07aece410563672cbc0d00719fa614c608 100755 (executable)
Binary files a/mothur and b/mothur differ
index bd1a098c8d5662044552071c7c44a273214018ce..b1d818850bd974ff482bc8c88ffc4bdc0a3dc7bd 100644 (file)
@@ -1462,20 +1462,21 @@ bool MothurOut::anyLabelsToProcess(string label, set<string>& userLabels, string
                }
                
                //go through users set and make them floats
-               for(it = userLabels.begin(); it != userLabels.end(); ++it) {
+               for(it = userLabels.begin(); it != userLabels.end();) {
                        
                        float temp;
                        if ((*it != "unique") && (convertTestFloat(*it, temp) == true)){
                                convert(*it, temp);
                                orderFloat.push_back(temp);
                                userMap[*it] = temp;
+                               it++;
                        }else if (*it == "unique") { 
                                orderFloat.push_back(-1.0);
                                userMap["unique"] = -1.0;
+                               it++;
                        }else {
-                               if (errorOff == "") {  cout << *it << " is not a valid label." << endl;  }
-                               userLabels.erase(*it); 
-                               it--;
+                               if (errorOff == "") {  mothurOut(*it + " is not a valid label."); mothurOutEndLine();  }
+                               userLabels.erase(it++); 
                        }
                }
                
@@ -1491,11 +1492,11 @@ bool MothurOut::anyLabelsToProcess(string label, set<string>& userLabels, string
                        if (orderFloat[i] < labelFloat) {
                                smaller = true;
                                if (orderFloat[i] == -1) { 
-                                       if (errorOff == "") { cout << "Your file does not include the label unique." << endl; }
+                                       if (errorOff == "") { mothurOut("Your file does not include the label unique."); mothurOutEndLine(); }
                                        userLabels.erase("unique");
                                }
                                else {  
-                                       if (errorOff == "") { cout << "Your file does not include the label " << endl; }
+                                       if (errorOff == "") { mothurOut("Your file does not include the label "); mothurOutEndLine(); }
                                        string s = "";
                                        for (it2 = userMap.begin(); it2!= userMap.end(); it2++) {  
                                                if (it2->second == orderFloat[i]) {  
@@ -1505,7 +1506,7 @@ bool MothurOut::anyLabelsToProcess(string label, set<string>& userLabels, string
                                                        break;
                                                }
                                        }
-                                       if (errorOff == "") {cout << s <<  ". I will use the next smallest distance. " << endl; }
+                                       if (errorOff == "") {mothurOut( s +  ". I will use the next smallest distance. "); mothurOutEndLine(); }
                                }
                        //since they are sorted once you find a bigger one stop looking
                        }else { break; }
diff --git a/removeotuscommand.cpp b/removeotuscommand.cpp
new file mode 100644 (file)
index 0000000..6828c72
--- /dev/null
@@ -0,0 +1,419 @@
+/*
+ *  removeotuscommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 11/12/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "removeotuscommand.h"
+#include "inputdata.h"
+#include "sharedutilities.h"
+
+
+//**********************************************************************************************************************
+vector<string> RemoveOtusCommand::getValidParameters(){        
+       try {
+               string Array[] =  { "group", "accnos","label", "groups","list","outputdir","inputdir" };
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtusCommand", "getValidParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+RemoveOtusCommand::RemoveOtusCommand(){        
+       try {
+               abort = true;
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["group"] = tempOutNames;
+               outputTypes["list"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtusCommand", "RemoveOtusCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> RemoveOtusCommand::getRequiredParameters(){     
+       try {
+               string Array[] =  {"group","label", "list"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtusCommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> RemoveOtusCommand::getRequiredFiles(){  
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtusCommand", "getRequiredFiles");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+RemoveOtusCommand::RemoveOtusCommand(string option)  {
+       try {
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  { "group", "accnos","label", "groups", "list","outputdir","inputdir" };
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string,string>::iterator it;
+                       
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["group"] = tempOutNames;
+                       outputTypes["list"] = tempOutNames;
+                       
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
+                       
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("accnos");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
+                               }
+                               
+                               it = parameters.find("list");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["list"] = inputDir + it->second;             }
+                               }
+                               
+                               it = parameters.find("group");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                               }
+                       }
+                       
+                       
+                       //check for required parameters
+                       accnosfile = validParameter.validFile(parameters, "accnos", true);
+                       if (accnosfile == "not open") { abort = true; }
+                       else if (accnosfile == "not found") {  accnosfile = ""; }       
+                       
+                       groupfile = validParameter.validFile(parameters, "group", true);
+                       if (groupfile == "not open") { abort = true; }
+                       else if (groupfile == "not found") {  groupfile = "";  m->mothurOut("You must provide a group file."); m->mothurOutEndLine(); abort = true; }   
+                       
+                       listfile = validParameter.validFile(parameters, "list", true);
+                       if (listfile == "not open") { abort = true; }
+                       else if (listfile == "not found") {  listfile = ""; m->mothurOut("You must provide a list file."); m->mothurOutEndLine(); abort = true; }       
+                       
+                       groups = validParameter.validFile(parameters, "groups", false);                 
+                       if (groups == "not found") { groups = ""; }
+                       else { 
+                               m->splitAtDash(groups, Groups);
+                       }
+                       
+                       label = validParameter.validFile(parameters, "label", false);                   
+                       if (label == "not found") { label = ""; m->mothurOut("You must provide a label to process."); m->mothurOutEndLine(); abort = true; }    
+                       
+                       if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; }
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtusCommand", "RemoveOtusCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+void RemoveOtusCommand::help(){
+       try {
+               m->mothurOut("The remove.otus command removes otus containing sequences from a specfic group or set of groups.\n");
+               m->mothurOut("It outputs a new list file containing the otus containing sequences NOT from in the those specified groups.\n");
+               m->mothurOut("The remove.otus command parameters are accnos, group, list, label and groups. The group, list and label parameters are required.\n");
+               m->mothurOut("You must also provide an accnos containing the list of groups to get or set the groups parameter to the groups you wish to select.\n");
+               m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like.  You can separate group names with dashes.\n");
+               m->mothurOut("The label parameter allows you to specify which distance you want to process.\n");
+               m->mothurOut("The remove.otus command should be in the following format: remove.otus(accnos=yourAccnos, list=yourListFile, group=yourGroupFile, label=yourLabel).\n");
+               m->mothurOut("Example remove.otus(accnos=amazon.accnos, list=amazon.fn.list, group=amazon.groups, label=0.03).\n");
+               m->mothurOut("or remove.otus(groups=pasture, list=amazon.fn.list, amazon.groups, label=0.03).\n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListFile).\n\n");
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtusCommand", "help");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+int RemoveOtusCommand::execute(){
+       try {
+               
+               if (abort == true) { return 0; }
+               
+               groupMap = new GroupMap(groupfile);
+               groupMap->readMap();
+               
+               //get groups you want to remove
+               if (accnosfile != "") { readAccnos(); }
+               
+               //make sure groups are valid
+               //takes care of user setting groupNames that are invalid or setting groups=all
+               SharedUtil* util = new SharedUtil();
+               util->setGroups(Groups, groupMap->namesOfGroups);
+               delete util;
+               
+               if (m->control_pressed) { delete groupMap; return 0; }
+               
+               //read through the list file keeping any otus that contain any sequence from the groups selected
+               readListGroup();
+               
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
+               
+               if (outputNames.size() != 0) {
+                       m->mothurOutEndLine();
+                       m->mothurOut("Output File names: "); m->mothurOutEndLine();
+                       for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+                       m->mothurOutEndLine();
+               }
+               
+               return 0;               
+       }
+       
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtusCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int RemoveOtusCommand::readListGroup(){
+       try {
+               string thisOutputDir = outputDir;
+               if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
+               string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick." + label +  m->getExtension(listfile);
+               
+               ofstream out;
+               m->openOutputFile(outputFileName, out);
+               
+               string GroupOutputDir = outputDir;
+               if (outputDir == "") {  GroupOutputDir += m->hasPath(groupfile);  }
+               string outputGroupFileName = GroupOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick." + label  + m->getExtension(groupfile);
+               
+               ofstream outGroup;
+               m->openOutputFile(outputGroupFileName, outGroup);
+               
+               InputData* input = new InputData(listfile, "list");
+               ListVector* list = input->getListVector();
+               string lastLabel = list->getLabel();
+               
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> labels; labels.insert(label);
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+               
+               bool wroteSomething = false;
+               
+               //as long as you are not at the end of the file or done wih the lines you want
+               while((list != NULL) && (userLabels.size() != 0)) {
+                       
+                       if (m->control_pressed) {  delete list; delete input; out.close();  outGroup.close(); remove(outputFileName.c_str());  remove(outputGroupFileName.c_str());return 0;  }
+                       
+                       if(labels.count(list->getLabel()) == 1){
+                               processList(list, groupMap, out, outGroup, wroteSomething);
+                               
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                       }
+                       
+                       if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = list->getLabel();
+                               
+                               delete list; 
+                               
+                               list = input->getListVector(lastLabel);
+                               
+                               processList(list, groupMap, out, outGroup, wroteSomething);
+                               
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               list->setLabel(saveLabel);
+                       }
+                       
+                       lastLabel = list->getLabel();
+                       
+                       delete list; list = NULL;
+                       
+                       //get next line to process
+                       list = input->getListVector();                          
+               }
+               
+               
+               if (m->control_pressed) {  if (list != NULL) { delete list; } delete input; out.close(); outGroup.close(); remove(outputFileName.c_str());  remove(outputGroupFileName.c_str()); return 0;  }
+               
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       m->mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                       }
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+                       if (list != NULL) { delete list; }
+                       
+                       list = input->getListVector(lastLabel);
+                       
+                       processList(list, groupMap, out, outGroup, wroteSomething);
+                       
+                       delete list; list = NULL;
+               }
+               
+               out.close();
+               outGroup.close();
+               
+               if (wroteSomething == false) {  m->mothurOut("At distance " + label + " your file ONLY contains otus containing sequences from the groups you wish to remove."); m->mothurOutEndLine();  }
+               outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
+               outputTypes["group"].push_back(outputGroupFileName); outputNames.push_back(outputGroupFileName);
+               
+               return 0;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtusCommand", "readList");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int RemoveOtusCommand::processList(ListVector*& list, GroupMap*& groupMap, ofstream& out, ofstream& outGroup, bool& wroteSomething){
+       try {
+               
+               //make a new list vector
+               ListVector newList;
+               newList.setLabel(list->getLabel());
+               
+               int numOtus = 0;
+               //for each bin
+               for (int i = 0; i < list->getNumBins(); i++) {
+                       if (m->control_pressed) { return 0; }
+                       
+                       //parse out names that are in accnos file
+                       string binnames = list->get(i);
+                       
+                       bool removeBin = false;
+                       string groupFileOutput = "";
+                       
+                       //parse names
+                       string individual = "";
+                       int length = binnames.length();
+                       for(int j=0;j<length;j++){
+                               if(binnames[j] == ','){
+                                       string group = groupMap->getGroup(individual);
+                                       if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
+                                       
+                                       if (m->inUsersGroups(group, Groups)) {  removeBin = true; break; }
+                                       groupFileOutput += individual + "\t" + group + "\n";
+                                       individual = "";        
+                                       
+                               }
+                               else{  individual += binnames[j];  }
+                       }
+                       
+                       if (!removeBin) { 
+                               //get last name
+                               string group = groupMap->getGroup(individual);
+                               if (group == "not found") { m->mothurOut("[ERROR]: " + individual + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
+                               
+                               if (m->inUsersGroups(group, Groups)) {  removeBin = true; }
+                               groupFileOutput += individual + "\t" + group + "\n";                            
+                               
+                               //if there are no sequences from the groups we want to remove in this bin add to new list, output to groupfile
+                               newList.push_back(binnames);    
+                               outGroup << groupFileOutput;
+                               numOtus++;
+                       }
+               }
+               
+               //print new listvector
+               if (newList.getNumBins() != 0) {
+                       wroteSomething = true;
+                       newList.print(out);
+               }
+               
+               m->mothurOut(newList.getLabel() + " - removed " + toString(numOtus) + " of the " + toString(list->getNumBins()) + " OTUs."); m->mothurOutEndLine();
+               
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtusCommand", "processList");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+void RemoveOtusCommand::readAccnos(){
+       try {
+               Groups.clear();
+               
+               ifstream in;
+               m->openInputFile(accnosfile, in);
+               string name;
+               
+               while(!in.eof()){
+                       in >> name;
+                       
+                       Groups.push_back(name);
+                       
+                       m->gobble(in);
+               }
+               in.close();             
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "RemoveOtusCommand", "readAccnos");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+
+
diff --git a/removeotuscommand.h b/removeotuscommand.h
new file mode 100644 (file)
index 0000000..ce1646b
--- /dev/null
@@ -0,0 +1,50 @@
+#ifndef REMOVEOTUSCOMMAND_H
+#define REMOVEOTUSCOMMAND_H
+
+/*
+ *  removeotuscommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 11/12/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+
+
+#include "command.hpp"
+#include "groupmap.h"
+#include "listvector.hpp"
+
+class RemoveOtusCommand : public Command {
+       
+public:
+       
+       RemoveOtusCommand(string);      
+       RemoveOtusCommand();
+       ~RemoveOtusCommand(){}
+       vector<string> getRequiredParameters();
+       vector<string> getValidParameters();
+       vector<string> getRequiredFiles();
+       map<string, vector<string> > getOutputFiles() { return outputTypes; }
+       int execute();
+       void help();    
+       
+private:
+       string accnosfile, groupfile, listfile, outputDir, groups, label;
+       bool abort;
+       vector<string> outputNames, Groups;
+       map<string, vector<string> > outputTypes;
+       GroupMap* groupMap;
+       
+       void readAccnos();
+       int readListGroup();
+       int processList(ListVector*&, GroupMap*&, ofstream&, ofstream&, bool&);
+       
+};
+
+#endif
+
+
+
+
index 24c7b548946a0715e49a7d3818142d50032691fc..b80a3abb4bb76bd0a5da3afa5548df0771797e45 100644 (file)
@@ -31,6 +31,7 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(){
                outputTypes["unweighted"] = tempOutNames;
                outputTypes["uwsummary"] = tempOutNames;
                outputTypes["phylip"] = tempOutNames;
+               outputTypes["column"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "UnifracUnweightedCommand", "UnifracUnweightedCommand");
@@ -91,6 +92,7 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
                        outputTypes["unweighted"] = tempOutNames;
                        outputTypes["uwsummary"] = tempOutNames;
                        outputTypes["phylip"] = tempOutNames;
+                       outputTypes["column"] = tempOutNames;
                        
                        if (globaldata->gTree.size() == 0) {//no trees were read
                                m->mothurOut("You must execute the read.tree command, before you may execute the unifrac.unweighted command."); m->mothurOutEndLine(); abort = true;  }
@@ -113,8 +115,12 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option)  {
                        itersString = validParameter.validFile(parameters, "iters", false);                             if (itersString == "not found") { itersString = "1000"; }
                        convert(itersString, iters); 
                        
-                       string temp = validParameter.validFile(parameters, "distance", false);                  if (temp == "not found") { temp = "false"; }
-                       phylip = m->isTrue(temp);
+                       string temp = validParameter.validFile(parameters, "distance", false);                  
+                       if (temp == "not found") { phylip = false; outputForm = ""; }
+                       else{
+                               if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
+                               else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
+                       }
                        
                        temp = validParameter.validFile(parameters, "random", false);                                   if (temp == "not found") { temp = "f"; }
                        random = m->isTrue(temp);
@@ -165,7 +171,7 @@ void UnifracUnweightedCommand::help(){
                m->mothurOut("The unifrac.unweighted command parameters are groups, iters, distance, processors and random.  No parameters are required.\n");
                m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed.  You must enter at least 1 valid group.\n");
                m->mothurOut("The group names are separated by dashes.  The iters parameter allows you to specify how many random trees you would like compared to your tree.\n");
-               m->mothurOut("The distance parameter allows you to create a distance file from the results. The default is false.\n");
+               m->mothurOut("The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n");
                m->mothurOut("The random parameter allows you to shut off the comparison to random trees. The default is false, meaning compare don't your trees with randomly generated trees.\n");
                m->mothurOut("The processors parameter allows you to specify the number of processors to use. The default is 1.\n");
                m->mothurOut("The unifrac.unweighted command should be in the following format: unifrac.unweighted(groups=yourGroups, iters=yourIters).\n");
@@ -374,15 +380,23 @@ void UnifracUnweightedCommand::printUWSummaryFile(int i) {
 /***********************************************************/
 void UnifracUnweightedCommand::createPhylipFile(int i) {
        try {
-               string phylipFileName = outputDir + m->getSimpleName(globaldata->getTreeFile())  + toString(i+1) + ".unweighted.dist";
-               outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
+               string phylipFileName;
+               if ((outputForm == "lt") || (outputForm == "square")) {
+                       phylipFileName = outputDir + m->getSimpleName(globaldata->getTreeFile())  + toString(i+1) + ".unweighted.phylip.dist";
+                       outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
+               }else { //column
+                       phylipFileName = outputDir + m->getSimpleName(globaldata->getTreeFile())  + toString(i+1) + ".unweighted.column.dist";
+                       outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
+               }
                
                ofstream out;
                m->openOutputFile(phylipFileName, out);
-                       
-               //output numSeqs
-               out << globaldata->Groups.size() << endl;
-                       
+               
+               if ((outputForm == "lt") || (outputForm == "square")) {
+                       //output numSeqs
+                       out << globaldata->Groups.size() << endl;
+               }
+               
                //make matrix with scores in it
                vector< vector<float> > dists;  dists.resize(globaldata->Groups.size());
                for (int i = 0; i < globaldata->Groups.size(); i++) {
@@ -406,11 +420,30 @@ void UnifracUnweightedCommand::createPhylipFile(int i) {
                        if (name.length() < 10) { //pad with spaces to make compatible
                                while (name.length() < 10) {  name += " ";  }
                        }
-                       out << name << '\t';
                        
-                       //output distances
-                       for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
-                       out << endl;
+                       if (outputForm == "lt") {
+                               out << name << '\t';
+                       
+                               //output distances
+                               for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
+                               out << endl;
+                       }else if (outputForm == "square") {
+                               out << name << '\t';
+                               
+                               //output distances
+                               for (int l = 0; l < globaldata->Groups.size(); l++) {   out  << dists[r][l] << '\t';  }
+                               out << endl;
+                       }else{
+                               //output distances
+                               for (int l = 0; l < r; l++) {   
+                                       string otherName = globaldata->Groups[l];
+                                       if (otherName.length() < 10) { //pad with spaces to make compatible
+                                               while (otherName.length() < 10) {  otherName += " ";  }
+                                       }
+                                       
+                                       out  << name << '\t' << otherName << dists[r][l] << endl;  
+                               }
+                       }
                }
                out.close();
        }
index 91f005c2500b9011b9149f3b7a18996c00af5c6f..a1f2bf37480acc446b94e2f96ab6647b5c9e07a8 100644 (file)
@@ -51,7 +51,7 @@ class UnifracUnweightedCommand : public Command {
                vector< map<float, float> > rCumul;  //map <unweighted score, cumulative percentage of number of random trees with that score or higher.> -vector entry for each combination.
                
                bool abort, phylip, random;
-               string groups, itersString, outputDir;
+               string groups, itersString, outputDir, outputForm;
                vector<string> Groups, outputNames; //holds groups to be used
                map<string, vector<string> > outputTypes;
 
index df73c9389b99b7ba18725d418d62b36184efa398..4ded9c51d399de1bf07950da88e0cb3a19e333d0 100644 (file)
@@ -30,6 +30,7 @@ UnifracWeightedCommand::UnifracWeightedCommand(){
                outputTypes["weighted"] = tempOutNames;
                outputTypes["wsummary"] = tempOutNames;
                outputTypes["phylip"] = tempOutNames;
+               outputTypes["column"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "UnifracWeightedCommand", "UnifracWeightedCommand");
@@ -90,6 +91,7 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) {
                        outputTypes["weighted"] = tempOutNames;
                        outputTypes["wsummary"] = tempOutNames;
                        outputTypes["phylip"] = tempOutNames;
+                       outputTypes["column"] = tempOutNames;
                        
                        if (globaldata->gTree.size() == 0) {//no trees were read
                                m->mothurOut("You must execute the read.tree command, before you may execute the unifrac.weighted command."); m->mothurOutEndLine(); abort = true;  }
@@ -112,9 +114,13 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) {
                        itersString = validParameter.validFile(parameters, "iters", false);                     if (itersString == "not found") { itersString = "1000"; }
                        convert(itersString, iters); 
                        
-                       string temp = validParameter.validFile(parameters, "distance", false);                  if (temp == "not found") { temp = "false"; }
-                       phylip = m->isTrue(temp);
-               
+                       string temp = validParameter.validFile(parameters, "distance", false);                  
+                       if (temp == "not found") { phylip = false; outputForm = ""; }
+                       else{
+                               if ((temp == "lt") || (temp == "column") || (temp == "square")) {  phylip = true;  outputForm = temp; }
+                               else { m->mothurOut("Options for distance are: lt, square, or column. Using lt."); m->mothurOutEndLine(); phylip = true; outputForm = "lt"; }
+                       }
+                       
                        temp = validParameter.validFile(parameters, "random", false);                                   if (temp == "not found") { temp = "F"; }
                        random = m->isTrue(temp);
                        
@@ -497,15 +503,23 @@ void UnifracWeightedCommand::createPhylipFile() {
                //for each tree
                for (int i = 0; i < T.size(); i++) { 
                
-                       string phylipFileName = outputDir + m->getSimpleName(globaldata->getTreeFile())  + toString(i+1) + ".weighted.dist";
-                       outputNames.push_back(phylipFileName);
-                       outputTypes["phylip"].push_back(phylipFileName);
+                       string phylipFileName;
+                       if ((outputForm == "lt") || (outputForm == "square")) {
+                               phylipFileName = outputDir + m->getSimpleName(globaldata->getTreeFile())  + toString(i+1) + ".weighted.phylip.dist";
+                               outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); 
+                       }else { //column
+                               phylipFileName = outputDir + m->getSimpleName(globaldata->getTreeFile())  + toString(i+1) + ".weighted.column.dist";
+                               outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); 
+                       }
+                       
                        ofstream out;
                        m->openOutputFile(phylipFileName, out);
                        
-                       //output numSeqs
-                       out << globaldata->Groups.size() << endl;
-                       
+                       if ((outputForm == "lt") || (outputForm == "square")) {
+                               //output numSeqs
+                               out << globaldata->Groups.size() << endl;
+                       }
+
                        //make matrix with scores in it
                        vector< vector<float> > dists;  dists.resize(globaldata->Groups.size());
                        for (int i = 0; i < globaldata->Groups.size(); i++) {
@@ -528,11 +542,30 @@ void UnifracWeightedCommand::createPhylipFile() {
                                if (name.length() < 10) { //pad with spaces to make compatible
                                        while (name.length() < 10) {  name += " ";  }
                                }
-                               out << name << '\t';
                                
-                               //output distances
-                               for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
-                               out << endl;
+                               if (outputForm == "lt") {
+                                       out << name << '\t';
+                                       
+                                       //output distances
+                                       for (int l = 0; l < r; l++) {   out  << dists[r][l] << '\t';  }
+                                       out << endl;
+                               }else if (outputForm == "square") {
+                                       out << name << '\t';
+                                       
+                                       //output distances
+                                       for (int l = 0; l < globaldata->Groups.size(); l++) {   out  << dists[r][l] << '\t';  }
+                                       out << endl;
+                               }else{
+                                       //output distances
+                                       for (int l = 0; l < r; l++) {   
+                                               string otherName = globaldata->Groups[l];
+                                               if (otherName.length() < 10) { //pad with spaces to make compatible
+                                                       while (otherName.length() < 10) {  otherName += " ";  }
+                                               }
+                                               
+                                               out  << name << '\t' << otherName << dists[r][l] << endl;  
+                                       }
+                               }
                        }
                        out.close();
                }
index 547e35235a24ac1ab169c918da544301aea7bd13..5bb690a854ed5c13f62a18fedc51cd39eee93f3a 100644 (file)
@@ -61,7 +61,7 @@ class UnifracWeightedCommand : public Command {
                map<float, float>  validScores;  //map contains scores from random
                
                bool abort, phylip, random;
-               string groups, itersString;
+               string groups, itersString, outputForm;
                vector<string> Groups, outputNames; //holds groups to be used
                map<string, vector<string> > outputTypes;
                int processors;