]> git.donarmstrong.com Git - mothur.git/commitdiff
added split.groups command
authorwestcott <westcott>
Tue, 21 Sep 2010 10:53:52 +0000 (10:53 +0000)
committerwestcott <westcott>
Tue, 21 Sep 2010 10:53:52 +0000 (10:53 +0000)
Mothur.xcodeproj/project.pbxproj
commandfactory.cpp
mothurout.cpp
splitgroupscommand.cpp [new file with mode: 0644]
splitgroupscommand.h [new file with mode: 0644]

index c59b63772f9f8b2cccda2ce0cd4f23bdaaa1b419..1ac6e0b5a1c87cfdc844e7137da940de80bca6bb 100644 (file)
                A7DF0AE2121EBB14004A03EA /* prng.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = prng.h; sourceTree = "<group>"; };
                A7E8338B115BBDAA00739EC4 /* parsesffcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parsesffcommand.cpp; sourceTree = "<group>"; };
                A7E8338C115BBDAA00739EC4 /* parsesffcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parsesffcommand.h; sourceTree = "<group>"; };
+               A7F139481247C3CB0033324C /* splitgroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = splitgroupscommand.h; sourceTree = "<group>"; };
+               A7F139491247C3CB0033324C /* splitgroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = splitgroupscommand.cpp; sourceTree = "<group>"; };
                A7F6C8E1124229F900299875 /* fisher2.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = fisher2.c; sourceTree = "<group>"; };
                A7F6C8E2124229F900299875 /* fisher2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fisher2.h; sourceTree = "<group>"; };
                A7F6C8E3124229F900299875 /* metastats2.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = metastats2.c; sourceTree = "<group>"; };
                                A7DA210F113FECD400BF472F /* sharedcommand.cpp */,
                                A71D924511AEB42400D00CBC /* splitabundcommand.h */,
                                A71D924411AEB42400D00CBC /* splitabundcommand.cpp */,
+                               A7F139481247C3CB0033324C /* splitgroupscommand.h */,
+                               A7F139491247C3CB0033324C /* splitgroupscommand.cpp */,
                                A7DA2155113FECD400BF472F /* summarycommand.h */,
                                A7DA2154113FECD400BF472F /* summarycommand.cpp */,
                                A7DA2159113FECD400BF472F /* summarysharedcommand.h */,
index bbdd61692b7e328937d526e887ef5f0590528f2e..45f89060e62307a463a6637ec89913577f450b72 100644 (file)
@@ -87,6 +87,7 @@
 #include "seqerrorcommand.h"
 #include "normalizesharedcommand.h"
 #include "metastatscommand.h"
+#include "splitgroupscommand.h"
 
 /*******************************************************/
 
@@ -178,6 +179,7 @@ CommandFactory::CommandFactory(){
        commands["sffinfo"]                             = "sffinfo";
        commands["normalize.shared"]    = "normalize.shared";
        commands["metastats"]                   = "metastats";
+       commands["split.groups"]                = "split.groups";
        commands["classify.seqs"]               = "MPIEnabled"; 
        commands["dist.seqs"]                   = "MPIEnabled";
        commands["filter.seqs"]                 = "MPIEnabled";
@@ -312,6 +314,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "sffinfo")                               {       command = new SffInfoCommand(optionString);                                     }
                else if(commandName == "normalize.shared")              {       command = new NormalizeSharedCommand(optionString);                     }
                else if(commandName == "metastats")                             {       command = new MetaStatsCommand(optionString);                           }
+               else if(commandName == "split.groups")                  {       command = new SplitGroupCommand(optionString);                          }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
index 1231700a42e03b3d733e27141762a6c1e5708f00..aab9945230f680e4ce106e4266ff66ba5ee7c34e 100644 (file)
@@ -1182,7 +1182,20 @@ void MothurOut::getNumSeqs(ifstream& file, int& numSeqs){
 //This function parses the estimator options and puts them in a vector
 void MothurOut::splitAtChar(string& estim, vector<string>& container, char symbol) {
        try {
-               string individual;
+               string individual = "";
+               int estimLength = estim.size();
+               for(int i=0;i<estimLength;i++){
+                       if(estim[i] == symbol){
+                               container.push_back(individual);
+                               individual = "";                                
+                       }
+                       else{
+                               individual += estim[i];
+                       }
+               }
+               container.push_back(individual);
+
+               /*
                
                while (estim.find_first_of(symbol) != -1) {
                        individual = estim.substr(0,estim.find_first_of(symbol));
@@ -1192,7 +1205,7 @@ void MothurOut::splitAtChar(string& estim, vector<string>& container, char symbo
                        }
                }
                //get last one
-               container.push_back(estim);
+               container.push_back(estim); */
        }
        catch(exception& e) {
                errorOut(e, "MothurOut", "splitAtChar");
diff --git a/splitgroupscommand.cpp b/splitgroupscommand.cpp
new file mode 100644 (file)
index 0000000..6f8788a
--- /dev/null
@@ -0,0 +1,280 @@
+/*
+ *  splitgroupscommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 9/20/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "splitgroupscommand.h"
+#include "sharedutilities.h"
+
+//**********************************************************************************************************************
+SplitGroupCommand::SplitGroupCommand(string option)  {
+       try {
+               abort = false;
+                       
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"name","group","groups","fasta","outputdir","inputdir"}; 
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string, string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string, string>::iterator it;
+               
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                                               
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("group");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("fasta");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("name");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["name"] = inputDir + it->second;             }
+                               }
+
+                       }
+
+                       
+                       namefile = validParameter.validFile(parameters, "name", true);
+                       if (namefile == "not open") { abort = true; }
+                       else if (namefile == "not found") { namefile = ""; }    
+               
+                       fastafile = validParameter.validFile(parameters, "fasta", true);
+                       if (fastafile == "not open") { abort = true; }
+                       else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the split.group command. "); m->mothurOutEndLine(); abort = true;  }       
+                       
+                       groupfile = validParameter.validFile(parameters, "group", true);
+                       if (groupfile == "not open") {  groupfile = ""; abort = true; } 
+                       else if (groupfile == "not found") { groupfile = ""; m->mothurOut("group is a required parameter for the split.group command. "); m->mothurOutEndLine(); abort = true; }
+                       
+                       groups = validParameter.validFile(parameters, "groups", false);         
+                       if (groups == "not found") { groups = ""; }
+                       else { m->splitAtDash(groups, Groups);  }
+                                               
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(groupfile);      }
+               }
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SplitGroupCommand", "SplitAbundCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+void SplitGroupCommand::help(){
+       try {
+               m->mothurOut("The split.group command reads a group file, and parses your fasta and names files by groups. \n");
+               m->mothurOut("The split.group command parameters are fasta, name, group and groups.\n");
+               m->mothurOut("The fasta and group parameters are required.\n");
+               m->mothurOut("The groups parameter allows you to select groups to create files for.  \n");
+               m->mothurOut("For example if you set groups=A-B-C, you will get a .A.fasta, .A.names, .B.fasta, .B.names, .C.fasta, .C.names files.  \n");
+               m->mothurOut("If you want .fasta and .names files for all groups, set groups=all.  \n");
+               m->mothurOut("The split.group command should be used in the following format: split.group(fasta=yourFasta, group=yourGroupFile).\n");
+               m->mothurOut("Example: split.group(fasta=abrecovery.fasta, group=abrecovery.groups).\n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SplitGroupCommand", "help");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+SplitGroupCommand::~SplitGroupCommand(){ }
+//**********************************************************************************************************************
+int SplitGroupCommand::execute(){
+       try {
+       
+               if (abort == true) {    return 0;       }
+               
+               groupMap = new GroupMap(groupfile);
+               groupMap->readMap();
+               
+               SharedUtil util;  util.setGroups(Groups, groupMap->namesOfGroups);  
+               
+               if (namefile != "") {  readNames();  }
+               splitFasta();
+               
+               delete groupMap;
+               
+               if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); } return 0; }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SplitGroupCommand", "execute");
+               exit(1);
+       }
+}
+/**********************************************************************************************************************/
+int SplitGroupCommand::readNames() { 
+       try {
+               //open input file
+               ifstream in;
+               m->openInputFile(namefile, in);
+               
+               while (!in.eof()) {
+                       if (m->control_pressed) { break; }
+                       
+                       string firstCol, secondCol;
+                       in >> firstCol >> secondCol; m->gobble(in);
+                       
+                       vector<string> names;
+                       m->splitAtComma(secondCol, names);
+                       
+                       nameMap[firstCol] = names;
+               }
+               in.close();
+                               
+               return 0;
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SplitGroupCommand", "readNames");
+               exit(1);
+       }
+}
+
+/**********************************************************************************************************************/
+int SplitGroupCommand::splitFasta() { 
+       try {
+               
+               string filerootFasta =  outputDir + m->getRootName(m->getSimpleName(fastafile));
+               string filerootName =  outputDir + m->getRootName(m->getSimpleName(namefile));
+               ofstream* temp;
+               ofstream* temp2;
+               map<string, ofstream*> filehandles;
+               map<string, ofstream*>::iterator it3;
+
+               for (int i=0; i<Groups.size(); i++) {
+                       temp = new ofstream;
+                       filehandles[Groups[i]+"fasta"] = temp;
+                       m->openOutputFile(filerootFasta + Groups[i] + ".fasta", *(filehandles[Groups[i]+"fasta"]));
+                       outputNames.push_back(filerootFasta + Groups[i] + ".fasta");
+                       
+                       if (namefile != "") {
+                               temp2 = new ofstream;
+                               filehandles[Groups[i]+"name"] = temp2;
+                               m->openOutputFile(filerootName + Groups[i] + ".names", *(filehandles[Groups[i]+"name"]));
+                               outputNames.push_back(filerootName + Groups[i] + ".names");
+                       }
+               }
+                       
+               //open input file
+               ifstream in;
+               m->openInputFile(fastafile, in);
+       
+               while (!in.eof()) {
+                       if (m->control_pressed) { break; }
+               
+                       Sequence seq(in); m->gobble(in);
+                               
+                       if (seq.getName() != "") { 
+                               
+                               itNames = nameMap.find(seq.getName());
+                               
+                               if (itNames == nameMap.end()) {
+                                       if (namefile != "") {  m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();  }
+                                       else { //no names file given
+                                               string group = groupMap->getGroup(seq.getName());
+                                               
+                                               if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
+                                                       seq.printSequence(*(filehandles[group+"fasta"]));
+                                               }else if(group == "not found") {
+                                                       m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
+                                               }
+                                       }
+                               }else{
+                                       vector<string> names = itNames->second;
+                                       map<string, string> group2Names;
+                                       map<string, string>::iterator it;
+                                       
+                                       for (int i = 0; i < names.size(); i++) {  //build strings for new group names file, will select rep below
+                                               string group = groupMap->getGroup(names[i]);
+                                               
+                                               if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
+                                                       it = group2Names.find(group);
+                                                       if (it == group2Names.end()) {
+                                                               group2Names[group] = names[i] + ",";
+                                                       }else{
+                                                               group2Names[group] += names[i] + ",";
+                                                       }
+                                               }else if(group == "not found") {
+                                                       m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
+                                               }
+                                       }
+                               
+                                       for (map<string, string>::iterator itGroups = group2Names.begin(); itGroups != group2Names.end(); itGroups++) {
+                                               //edit names string, by grabbing the first guy as the rep and removing the last comma
+                                               string newNames = itGroups->second;
+                                               newNames = newNames.substr(0, newNames.length()-1); 
+                                               string repName = "";
+                                               
+                                               int pos = newNames.find_first_of(',');
+                                               if (pos == newNames.npos) { //only one sequence so it represents itself
+                                                       repName = newNames;
+                                               }else{
+                                                       repName = newNames.substr(0, pos);
+                                               }
+                                               
+                                               *(filehandles[itGroups->first+"name"]) << repName << '\t' << newNames << endl;
+                                               *(filehandles[itGroups->first+"fasta"]) << ">" << repName << endl << seq.getAligned() << endl;
+                                       }
+                               }
+                       }
+               }
+                       
+               in.close();
+                       
+               for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
+                       (*(filehandles[it3->first])).close();
+                       delete it3->second;
+               }
+                               
+               return 0;
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "SplitGroupCommand", "splitFasta");
+               exit(1);
+       }
+}
+/**********************************************************************************************************************/
+
diff --git a/splitgroupscommand.h b/splitgroupscommand.h
new file mode 100644 (file)
index 0000000..f6e8760
--- /dev/null
@@ -0,0 +1,51 @@
+#ifndef SPLITGROUPSCOMMAND_H
+#define SPLITGROUPSCOMMAND_H
+
+/*
+ *  splitgroupscommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 9/20/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+
+/* split.groups - given a group file, split sequences and names files in to separate files *.group1.fasta and .group1.names. */
+
+
+#include "command.hpp"
+#include "groupmap.h"
+#include "sequence.hpp"
+
+/***************************************************************************************/
+
+class SplitGroupCommand : public Command {
+       
+public:
+       SplitGroupCommand(string);      
+       ~SplitGroupCommand();
+       int execute();  
+       void help();
+
+       
+private:
+       int readNames(); 
+       int splitFasta(); 
+       
+       vector<string> outputNames;
+       map<string, vector<string> > nameMap;
+       map<string, vector<string> >::iterator itNames;
+       GroupMap* groupMap;
+       
+       string outputDir, namefile, groupfile, groups, fastafile;
+       vector<string> Groups;
+       bool abort;
+};
+
+/***************************************************************************************/
+
+#endif
+
+
+