]> git.donarmstrong.com Git - mothur.git/commitdiff
added group option to trim.seqs so you can provide your own groupfile instead of...
authorwestcott <westcott>
Mon, 22 Nov 2010 13:24:01 +0000 (13:24 +0000)
committerwestcott <westcott>
Mon, 22 Nov 2010 13:24:01 +0000 (13:24 +0000)
indicatorcommand.cpp
mothur
trimseqscommand.cpp
trimseqscommand.h

index 7f6dc21d21b1d3b0ebf3e68e91011e53549bd46d..4c9baee4697e8cabbe313627196b780d8f26fec3 100644 (file)
@@ -164,13 +164,14 @@ IndicatorCommand::IndicatorCommand(string option)  {
 
 void IndicatorCommand::help(){
        try {
-               /*m->mothurOut("The read.tree command must be run before you execute a unifrac.weighted, unifrac.unweighted. \n");
-               m->mothurOut("It also must be run before using the parsimony command, unless you are using the randomtree parameter.\n");
-               m->mothurOut("The read.tree command parameters are tree, group and name.\n");
-               m->mothurOut("The read.tree command should be in the following format: read.tree(tree=yourTreeFile, group=yourGroupFile).\n");
-               m->mothurOut("The tree and group parameters are both required, if no group file is given then one group is assumed.\n");
-               m->mothurOut("The name parameter allows you to enter a namefile.\n");
-               m->mothurOut("Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n\n"); */
+               m->mothurOut("The indicator command reads a shared or relabund file and a tree file, and outputs a .indicator.tre and .indicator.summary file. \n");
+               m->mothurOut("The new tree contains labels at each internal node.  The label is the OTU number of the indicator OTU.\n");
+               m->mothurOut("The summary file lists the indicator value for each OTU for each node.\n");
+               m->mothurOut("The indicator command parameters are tree, groups, shared, relabund and label. The tree and label parameter are required as well as either shared or relabund.\n");
+               m->mothurOut("The groups parameter allows you to specify which of the groups in your shared or relabund you would like analyzed.  The groups may be entered separated by dashes.\n");
+               m->mothurOut("The label parameter indicates at what distance your tree relates to the shared or relabund.\n");
+               m->mothurOut("The indicator command should be used in the following format: indicator(tree=test.tre, shared=test.shared, label=0.03)\n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n\n"); 
        }
        catch(exception& e) {
                m->errorOut(e, "IndicatorCommand", "help");     
diff --git a/mothur b/mothur
index bb3b522fd6de1b23408e4a593490bca2db0337fc..ff1c87512ae671899e4be9c14a9f8710b2061016 100755 (executable)
Binary files a/mothur and b/mothur differ
index 8aacd9d7c78b767acbf7c00ab92f945dff11dc2d..a09e40246ef1c71594b7023410f35c7fbb52d22f 100644 (file)
@@ -13,7 +13,7 @@
 //**********************************************************************************************************************
 vector<string> TrimSeqsCommand::getValidParameters(){  
        try {
-               string Array[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", 
+               string Array[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "group","minlength", "maxlength", "qfile", 
                                                                        "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
                vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                return myArray;
@@ -74,7 +74,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                
                else {
                        //valid paramters for this command
-                       string AlignArray[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", 
+                       string AlignArray[] =  {"fasta", "flip", "group","oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", 
                                                                        "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
                        
                        vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
@@ -124,6 +124,14 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
                                }
+                               
+                               it = parameters.find("group");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                               }
                        }
 
                        
@@ -151,6 +159,11 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                        else if(temp == "not open"){    abort = true;   } 
                        else                                    {       oligoFile = temp;               }
                        
+                       temp = validParameter.validFile(parameters, "group", true);
+                       if (temp == "not found"){       groupfile = "";         }
+                       else if(temp == "not open"){    abort = true;   } 
+                       else                                    {       groupfile = temp;               }
+                       
                        temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
                        convert(temp, maxAmbig);  
 
@@ -206,8 +219,13 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found") { temp = "1"; }
                        convert(temp, processors); 
                        
-                       if(allFiles && oligoFile == ""){
-                               m->mothurOut("You selected allfiles, but didn't enter an oligos file.  Ignoring the allfiles request."); m->mothurOutEndLine();
+                       if ((oligoFile != "") && (groupfile != "")) {
+                               m->mothurOut("You given both a oligos file and a groupfile, only one is allowed."); m->mothurOutEndLine(); abort = true;
+                       }
+                                                                                               
+                       
+                       if(allFiles && (oligoFile == "") && (groupfile == "")){
+                               m->mothurOut("You selected allfiles, but didn't enter an oligos or group file.  Ignoring the allfiles request."); m->mothurOutEndLine();
                        }
                        if((qAverage != 0 && qThreshold != 0) && qFileName == ""){
                                m->mothurOut("You didn't provide a quality file name, quality criteria will be ignored."); m->mothurOutEndLine();
@@ -231,8 +249,9 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
 void TrimSeqsCommand::help(){
        try {
                m->mothurOut("The trim.seqs command reads a fastaFile and creates .....\n");
-               m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n");
+               m->mothurOut("The trim.seqs command parameters are fasta, flip, oligos, group, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim and allfiles.\n");
                m->mothurOut("The fasta parameter is required.\n");
+               m->mothurOut("The group parameter allows you to enter a group file for your fasta file.\n");
                m->mothurOut("The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n");
                m->mothurOut("The oligos parameter .... The default is "".\n");
                m->mothurOut("The maxambig parameter .... The default is -1.\n");
@@ -275,6 +294,8 @@ int TrimSeqsCommand::execute(){
                
                numFPrimers = 0;  //this needs to be initialized
                numRPrimers = 0;
+               vector<string> fastaFileNames;
+               vector<string> qualFileNames;
                
                string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
                outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
@@ -283,10 +304,35 @@ int TrimSeqsCommand::execute(){
                string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
                string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
                if (qFileName != "") {  outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile);  outputTypes["qual"].push_back(trimQualFile); outputTypes["qual"].push_back(scrapQualFile); }
-               string groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
+               string groupFile = "";
+               if (groupfile == "") { groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups"; }
+               else{
+                       groupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "trim.groups";
+                       outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile);
+                       groupMap = new GroupMap(groupfile);
+                       groupMap->readMap();
+                       
+                       if(allFiles){
+                               for (int i = 0; i < groupMap->namesOfGroups.size(); i++) {
+                                       groupToIndex[groupMap->namesOfGroups[i]] = i;
+                                       groupVector.push_back(groupMap->namesOfGroups[i]);
+                                       fastaFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) +  groupMap->namesOfGroups[i] + ".fasta"));
+                                       
+                                       //we append later, so we want to clear file
+                                       ofstream outRemove;
+                                       m->openOutputFile(fastaFileNames[i], outRemove);
+                                       outRemove.close();
+                                       if(qFileName != ""){
+                                               qualFileNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) +  groupMap->namesOfGroups[i] + ".qual"));
+                                               ofstream outRemove2;
+                                               m->openOutputFile(qualFileNames[i], outRemove2);
+                                               outRemove2.close();
+                                       }
+                               }
+                       }
+                       comboStarts = fastaFileNames.size()-1;
+               }
                
-               vector<string> fastaFileNames;
-               vector<string> qualFileNames;
                if(oligoFile != ""){
                        outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile);
                        getOligos(fastaFileNames, qualFileNames);
@@ -384,7 +430,7 @@ int TrimSeqsCommand::execute(){
                                                                        
                                if (m->control_pressed) {  return 0; }
                #endif
-                                               
+                                       
                                                                                
                for(int i=0;i<fastaFileNames.size();i++){
                        if (m->isBlank(fastaFileNames[i])) { remove(fastaFileNames[i].c_str()); }
@@ -643,6 +689,32 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                                        }
                                                }
                                        }
+                                       
+                                       if (groupfile != "") {
+                                               string thisGroup = groupMap->getGroup(currSeq.getName());
+                                               
+                                               if (thisGroup != "not found") {
+                                                       outGroups << currSeq.getName() << '\t' << thisGroup << endl;
+                                                       if (allFiles) {
+                                                               ofstream outTemp;
+                                                               m->openOutputFileAppend(fastaNames[groupToIndex[thisGroup]], outTemp);
+                                                               currSeq.printSequence(outTemp);
+                                                               outTemp.close();
+                                                               if(qFileName != ""){
+                                                                       ofstream outTemp2;
+                                                                       m->openOutputFileAppend(qualNames[groupToIndex[thisGroup]], outTemp2);
+                                                                       currQual.printQScores(outTemp2);
+                                                                       outTemp2.close();                                                       
+                                                               }
+                                                       }
+                                               }else{
+                                                       m->mothurOut(currSeq.getName() + " is not in your groupfile, adding to group XXX."); m->mothurOutEndLine();
+                                                       outGroups << currSeq.getName() << '\t' << "XXX" << endl;
+                                                       if (allFiles) {  
+                                                               m->mothurOut("[ERROR]: " + currSeq.getName() + " will not be added to any .group.fasta or .group.qual file."); m->mothurOutEndLine();
+                                                       }
+                                               }
+                                       }
                                }
                                else{
                                        currSeq.setName(currSeq.getName() + '|' + trashCode);
index 20b11a85ac05e57b453906475d73ab4fd2ecb2e0..2f8fbd7c499559bf65f9f5393982bd00dddfbcc3 100644 (file)
@@ -14,6 +14,7 @@
 #include "command.hpp"
 #include "sequence.hpp"
 #include "qualityscores.h"
+#include "groupmap.h"
 
 class TrimSeqsCommand : public Command {
 public:
@@ -28,7 +29,9 @@ public:
        void help();
        
 private:
-
+       
+       GroupMap* groupMap;
+       
        struct linePair {
                unsigned long int start;
                unsigned long int end;
@@ -47,7 +50,7 @@ private:
        map<string, vector<string> > outputTypes;
 
        bool abort;
-       string fastaFile, oligoFile, qFileName, outputDir;
+       string fastaFile, oligoFile, qFileName, groupfile, outputDir;
        
        bool flip, allFiles, qtrim;
        int numFPrimers, numRPrimers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, comboStarts;
@@ -59,6 +62,7 @@ private:
        vector<string> groupVector;
        map<string, int> primers;
        map<string, int> combos;
+       map<string, int> groupToIndex;
        
        vector<int> processIDS;   //processid
        vector<linePair*> lines;