]> git.donarmstrong.com Git - mothur.git/commitdiff
added create.database command. fixed help in get.sharedseqs. added new constructor...
authorSarah Westcott <mothur.westcott@gmail.com>
Mon, 2 Apr 2012 14:14:38 +0000 (10:14 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Mon, 2 Apr 2012 14:14:38 +0000 (10:14 -0400)
commandfactory.cpp
createdatabasecommand.cpp
createdatabasecommand.h
getsharedotucommand.cpp
mothurout.cpp
sequence.cpp
sequence.hpp
sffinfocommand.cpp
trimflowscommand.cpp

index 49dcd2cb6f42edfc7208738966a119147192b415..b61c2d2dc820c7cf05b7f82d3983017f2d6d4c55 100644 (file)
 #include "classifytreecommand.h"
 #include "cooccurrencecommand.h"
 #include "pcrseqscommand.h"
+#include "createdatabasecommand.h"
 
 /*******************************************************/
 
@@ -283,6 +284,7 @@ CommandFactory::CommandFactory(){
     commands["classify.tree"]       = "classify.tree";
     commands["cooccurrence"]        = "cooccurrence";
     commands["pcr.seqs"]            = "pcr.seqs";
+    commands["create.database"]     = "create.database";
        commands["quit"]                                = "MPIEnabled"; 
 
 }
@@ -449,6 +451,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
         else if(commandName == "classify.tree")         {      command = new ClassifyTreeCommand(optionString);            }
         else if(commandName == "cooccurrence")          {      command = new CooccurrenceCommand(optionString);            }
         else if(commandName == "pcr.seqs")              {      command = new PcrSeqsCommand(optionString);                 }
+        else if(commandName == "create.database")       {      command = new CreateDatabaseCommand(optionString);          }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
@@ -598,6 +601,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
         else if(commandName == "classify.tree")         {      pipecommand = new ClassifyTreeCommand(optionString);            }
         else if(commandName == "cooccurrence")          {      pipecommand = new CooccurrenceCommand(optionString);            }
         else if(commandName == "pcr.seqs")              {      pipecommand = new PcrSeqsCommand(optionString);                 }
+        else if(commandName == "create.database")       {      pipecommand = new CreateDatabaseCommand(optionString);          }
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
@@ -735,6 +739,7 @@ Command* CommandFactory::getCommand(string commandName){
         else if(commandName == "classify.tree")         {      shellcommand = new ClassifyTreeCommand();           }
         else if(commandName == "cooccurrence")          {      shellcommand = new CooccurrenceCommand();           }
         else if(commandName == "pcr.seqs")              {      shellcommand = new PcrSeqsCommand();                }
+        else if(commandName == "create.database")       {      shellcommand = new CreateDatabaseCommand();         }
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
index 9d90e74509ab1bc4f555686fb166caeeb52e8195..1da67e6d8694096da74ce8f855b27cf3b96d2f89 100644 (file)
@@ -7,7 +7,6 @@
 //
 
 #include "createdatabasecommand.h"
-#include "sequence.hpp"
 #include "inputdata.h"
 
 //**********************************************************************************************************************
@@ -37,7 +36,12 @@ string CreateDatabaseCommand::getHelpString(){
                string helpString = "";
                helpString += "The create.database command reads a listfile, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, and creates a database file.\n";
                helpString += "The create.database command parameters are repfasta, list, repname, contaxonomy, group and label. List, repfasta, repnames, and contaxonomy are required.\n";
-        
+        helpString += "The repfasta file is fasta file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
+        helpString += "The repname file is the name file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
+        helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile).\n";
+        helpString += "The group file is optional and will just give you the abundance breakdown by group.\n";
+        helpString += "The label parameter allows you to specify a label to be used from your listfile.\n";
+        helpString += "NOTE: Make SURE the repfasta, repnames and contaxonomy are for the same label as the listfile.\n";
         helpString += "The create.database command should be in the following format: \n";
                helpString += "create.database(repfasta=yourFastaFileFromGetOTURep, repname=yourNameFileFromGetOTURep, contaxonomy=yourConTaxFileFromClassifyOTU, list=yourListFile) \n";       
                helpString += "Example: create.database(repfasta=final.an.0.03.rep.fasta, name=final.an.0.03.rep.names, list=fina.an.list, label=0.03, contaxonomy=final.an.0.03.cons.taxonomy) \n";
@@ -50,5 +54,449 @@ string CreateDatabaseCommand::getHelpString(){
        }
 }
 //**********************************************************************************************************************
+CreateDatabaseCommand::CreateDatabaseCommand(){        
+       try {
+               abort = true; calledHelp = true; 
+               setParameters();
+               vector<string> tempOutNames;
+               outputTypes["database"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "CreateDatabaseCommand", "CreateDatabaseCommand");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+CreateDatabaseCommand::CreateDatabaseCommand(string option)  {
+       try{
+               abort = false; calledHelp = false;   
+        
+               //allow user to run help
+               if (option == "help") { 
+                       help(); abort = true; calledHelp = true;
+               }else if(option == "citation") { citation(); abort = true; calledHelp = true;} 
+               else {
+                       vector<string> myArray = setParameters();
+                       
+                       OptionParser parser(option);
+                       map<string, string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string, string>::iterator it;
+            
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["database"] = tempOutNames;
+            
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("list");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["list"] = inputDir + it->second;             }
+                               }
+                               
+                               it = parameters.find("repname");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["repname"] = inputDir + it->second;          }
+                               }
+                               
+                               it = parameters.find("contaxonomy");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["contaxonomy"] = inputDir + it->second;              }
+                               }
+                               
+                               it = parameters.find("repfasta");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["repfasta"] = inputDir + it->second;         }
+                               }
+                               
+                               it = parameters.find("group");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                               }
+                       }
+            
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
+                       
+                       //check for required parameters
+                       listfile = validParameter.validFile(parameters, "list", true);
+                       if (listfile == "not found") {                          
+                               //if there is a current list file, use it
+                               listfile = m->getListFile(); 
+                               if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
+                               else {  m->mothurOut("You have no current listfile and the list parameter is required."); m->mothurOutEndLine(); abort = true; }
+                       }
+                       else if (listfile == "not open") { abort = true; }      
+                       else { m->setListFile(listfile); }
+                       
+                       contaxonomyfile = validParameter.validFile(parameters, "contaxonomy", true);
+                       if (contaxonomyfile == "not found") {  //if there is a current list file, use it
+               contaxonomyfile = "";  m->mothurOut("The contaxonomy parameter is required, aborting."); m->mothurOutEndLine(); abort = true; 
+                       }
+                       else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; }
+
+            repfastafile = validParameter.validFile(parameters, "repfasta", true);
+                       if (repfastafile == "not found") {  //if there is a current list file, use it
+                repfastafile = "";  m->mothurOut("The repfasta parameter is required, aborting."); m->mothurOutEndLine(); abort = true; 
+                       }
+                       else if (repfastafile == "not open") { repfastafile = ""; abort = true; }
+
+            repnamesfile = validParameter.validFile(parameters, "repname", true);
+                       if (repnamesfile == "not found") {  //if there is a current list file, use it
+                repnamesfile = "";  m->mothurOut("The repnames parameter is required, aborting."); m->mothurOutEndLine(); abort = true; 
+                       }
+                       else if (repnamesfile == "not open") { repnamesfile = ""; abort = true; }
+
+                       groupfile = validParameter.validFile(parameters, "group", true);
+                       if (groupfile == "not open") { groupfile = ""; abort = true; }  
+                       else if (groupfile == "not found") { groupfile = ""; }
+                       else { m->setGroupFile(groupfile); }
+                       
+                       //check for optional parameter and set defaults
+                       // ...at some point should added some additional type checking...
+            label = validParameter.validFile(parameters, "label", false);                      
+                       if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your listfile.\n");}
+        }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "CreateDatabaseCommand", "CreateDatabaseCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int CreateDatabaseCommand::execute(){
+       try {
+               
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+        
+        //taxonomies holds the taxonomy info for each Otu
+        //classifyOtuSizes holds the size info of each Otu to help with error checking
+        vector<string> taxonomies;
+        vector<int> classifyOtuSizes = readTax(taxonomies);
+        
+        if (m->control_pressed) { return 0; }
+        
+        vector<Sequence> seqs;
+        vector<int> repOtusSizes = readFasta(seqs);
+        
+        if (m->control_pressed) { return 0; }
+        
+        //names redundants to uniques. backwards to how we normally do it, but each bin is the list file will be a key entry in the map.
+        map<string, string> repNames;
+        int numUniqueNamesFile = readNames(repNames);
+        
+        //are there the same number of otus in the fasta and name files
+        if (repOtusSizes.size() != numUniqueNamesFile) { m->mothurOut("[ERROR]: you have " + toString(numUniqueNamesFile) + " unique seqs in your repname file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file.  These should match.\n"); m->control_pressed = true; }
+        
+        if (m->control_pressed) { return 0; }
+        
+        //are there the same number of OTUs in the tax and fasta file
+        if (classifyOtuSizes.size() != repOtusSizes.size()) { m->mothurOut("[ERROR]: you have " + toString(classifyOtuSizes.size()) + " taxonomies in your contaxonomy file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file.  These should match.\n"); m->control_pressed = true; }
+
+        if (m->control_pressed) { return 0; }
+        
+        //at this point we have the same number of OTUs. Are the sizes we have found so far accurate?
+        for (int i = 0; i < classifyOtuSizes.size(); i++) {
+            if (classifyOtuSizes[i] != repOtusSizes[i]) {
+               m->mothurOut("[ERROR]: OTU size info does not match for bin " + toString(i+1) + ". The contaxonomy file indicated the OTU represented " + toString(classifyOtuSizes[i]) + " sequences, but the repfasta file had " + toString(repOtusSizes[i]) + ".  These should match. Make sure you are using files for the same distance.\n"); m->control_pressed = true; 
+            }
+        }
+        
+        if (m->control_pressed) { return 0; }
+        
+        //at this point we are fairly sure the repfasta, repnames and contaxonomy files match so lets proceed with the listfile
+        ListVector* list = getList();
+        
+        if (m->control_pressed) { delete list; return 0; }
+        
+        GroupMap* groupmap = NULL;
+        if (groupfile != "") {
+                       groupmap = new GroupMap(groupfile);
+                       groupmap->readMap();
+               }
+        
+        if (m->control_pressed) { delete list; if (groupfile != "") { delete groupmap; } return 0; }
+        
+        if (outputDir == "") { outputDir += m->hasPath(listfile); }
+        string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + "database";
+        outputNames.push_back(outputFileName); outputTypes["database"].push_back(outputFileName);
+        
+        ofstream out;
+        m->openOutputFile(outputFileName, out);
+        
+        string header = "OTUNumber\tAbundance\t";
+        if (groupfile != "") { 
+            header = "OTUNumber\t";
+            for (int i = 0; i < groupmap->getNamesOfGroups().size(); i++) { header += (groupmap->getNamesOfGroups())[i] + '\t'; }
+        }
+        header += "repSeqName\trepSeq\tOTUConTaxonomy";
+        out << header << endl;
+        
+        for (int i = 0; i < list->getNumBins(); i++) {
+            
+            if (m->control_pressed) { break; }
+            
+            out << (i+1) << '\t';
+            
+            vector<string> binNames;
+            string bin = list->get(i);
+            
+            map<string, string>::iterator it = repNames.find(bin);
+            if (it == repNames.end()) {
+                m->mothurOut("[ERROR: OTU " + toString(i+1) + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
+            }
+            
+            m->splitAtComma(bin, binNames);
+            
+            //sanity check
+            if (binNames.size() != classifyOtuSizes[i]) {
+                 m->mothurOut("[ERROR: OTU " + toString(i+1) + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
+            }
+            
+            //output abundances
+            if (groupfile != "") {
+                string groupAbunds = "";
+                map<string, int> counts;
+                //initialize counts to 0
+                for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { counts[(groupmap->getNamesOfGroups())[j]] = 0; }
+                
+                //find abundances by group
+                bool error = false;
+                for (int j = 0; j < binNames.size(); j++) {
+                    string group = groupmap->getGroup(binNames[j]);
+                    if (group == "not found") {
+                        m->mothurOut("[ERROR]: " + binNames[j] + " is not in your groupfile, please correct.\n");
+                        error = true;
+                    }else { counts[group]++; }
+                }
+                
+                //output counts
+                for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { out << counts[(groupmap->getNamesOfGroups())[j]] << '\t';  }
+                
+                if (error) { m->control_pressed = true; }
+            }else { out << binNames.size() << '\t'; }
+            
+            //output repSeq
+            out << it->second << '\t' << seqs[i].getAligned() << '\t' << taxonomies[i] << endl;
+        }
+        out.close();
+        
+        delete list;
+        if (groupfile != "") { delete groupmap; }
+        
+        if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
+        
+        m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               m->mothurOut(outputFileName); m->mothurOutEndLine();    
+               m->mothurOutEndLine();
+        
+        return 0;
+        
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CreateDatabaseCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<int> CreateDatabaseCommand::readTax(vector<string>& taxonomies){
+       try {
+               
+        vector<int> sizes; 
+        
+        ifstream in;
+        m->openInputFile(contaxonomyfile, in);
+        
+        //read headers
+        m->getline(in);
+        
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { break; }
+            
+            string otu = ""; string tax = "unknown";
+            int size = 0;
+            
+            in >> otu >> size >> tax; m->gobble(in);
+            
+            sizes.push_back(size);
+            taxonomies.push_back(tax);
+        }
+        in.close();
+        
+        return sizes;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CreateDatabaseCommand", "readTax");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<int> CreateDatabaseCommand::readFasta(vector<Sequence>& seqs){
+       try {
+               
+        vector<int> sizes; 
+        
+        ifstream in;
+        m->openInputFile(repfastafile, in);
+        
+        while (!in.eof()) {
+            
+            if (m->control_pressed) { break; }
+            
+            string binInfo;
+            Sequence seq(in, binInfo, true);  m->gobble(in);
+            
+            //the binInfo should look like - binNumber|size ie. 1|200 if it is binNumber|size|group then the user gave us the wrong repfasta file
+            vector<string> info;
+            m->splitAtChar(binInfo, info, '|');
+            if (info.size() != 2) { m->mothurOut("[ERROR]: your repfasta file is not the right format.  The create database command is designed to be used with the output from get.oturep.  When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n");  m->control_pressed = true; break;}
+            
+            int size = 0;
+            m->mothurConvert(info[1], size);
+            
+            sizes.push_back(size);
+            seqs.push_back(seq);
+        }
+        in.close();
+        
+        return sizes;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CreateDatabaseCommand", "readFasta");
+               exit(1);
+       }
+}
+/**********************************************************************************************************************/
+int CreateDatabaseCommand::readNames(map<string, string>& nameMap) { 
+       try {
+               
+               //open input file
+               ifstream in;
+               m->openInputFile(repnamesfile, in);
+               
+               while (!in.eof()) {
+                       if (m->control_pressed) { break; }
+                       
+                       string firstCol, secondCol;
+                       in >> firstCol >> secondCol; m->gobble(in);
+                       
+                       nameMap[secondCol] = firstCol;
+               }
+               in.close();
+               
+               return nameMap.size();
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "CreateDatabaseCommand", "readNames");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+ListVector* CreateDatabaseCommand::getList(){
+       try {
+               InputData* input = new InputData(listfile, "list");
+               ListVector* list = input->getListVector();
+               string lastLabel = list->getLabel();
+               
+               if (label == "") { label = lastLabel; delete input; return list; }
+               
+               //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+               set<string> labels; labels.insert(label);
+               set<string> processedLabels;
+               set<string> userLabels = labels;
+               
+               //as long as you are not at the end of the file or done wih the lines you want
+               while((list != NULL) && (userLabels.size() != 0)) {
+                       if (m->control_pressed) {  delete input; return list;  }
+                       
+                       if(labels.count(list->getLabel()) == 1){
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                               break;
+                       }
+                       
+                       if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+                               string saveLabel = list->getLabel();
+                               
+                               delete list;
+                               list = input->getListVector(lastLabel);
+                               
+                               processedLabels.insert(list->getLabel());
+                               userLabels.erase(list->getLabel());
+                               
+                               //restore real lastlabel to save below
+                               list->setLabel(saveLabel);
+                               break;
+                       }
+                       
+                       lastLabel = list->getLabel();                   
+                       
+                       //get next line to process
+                       //prevent memory leak
+                       delete list; 
+                       list = input->getListVector();
+               }
+               
+               
+               if (m->control_pressed) { delete input; return list;  }
+               
+               //output error messages about any remaining user labels
+               set<string>::iterator it;
+               bool needToRun = false;
+               for (it = userLabels.begin(); it != userLabels.end(); it++) {  
+                       m->mothurOut("Your file does not include the label " + *it); 
+                       if (processedLabels.count(lastLabel) != 1) {
+                               m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+                               needToRun = true;
+                       }else {
+                               m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+                       }
+               }
+               
+               //run last label if you need to
+               if (needToRun == true)  {
+                       delete list;
+                       list = input->getListVector(lastLabel);
+               }       
+               
+               delete input;
+
+        return list;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "CreateDatabaseCommand", "getList");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 
 
index 9fc84d7740ebfa4098dce2ee33dc6fb902c1c003..643ff6ec9fc3691c4a9de5f146cbfb1fdcaff354 100644 (file)
@@ -11,6 +11,7 @@
 
 #include "command.hpp"
 #include "listvector.hpp"
+#include "sequence.hpp"
 
 class CreateDatabaseCommand : public Command {
 public:
@@ -26,20 +27,20 @@ public:
        string getDescription()         { return "creates database file that includes, abundances across groups, representative sequences, and taxonomy for each OTU"; }
     
        
-       int execute() {}
+       int execute(); 
        void help() { m->mothurOut(getHelpString()); }  
        
 private:
        
        bool abort;
-       string listfile, groupfile, repfastafile, repnamesfile, constaxonomyfile, label, outputDir;
+       string listfile, groupfile, repfastafile, repnamesfile, contaxonomyfile, label, outputDir;
        
        vector<string> outputNames;
                
-       int readFasta();
-       int readNames();
-    int readTax();
-       int processList(ListVector*&);
+       vector<int> readFasta(vector<Sequence>&);
+    vector<int> readTax(vector<string>&);
+    int readNames(map<string, string>&); 
+       ListVector* getList();
        
 };
 
index 2312649d1cff739956638ba2ad8788949669f1d5..02d8413675bade0cf79f8781987cc45078b74db2 100644 (file)
@@ -46,8 +46,8 @@ string GetSharedOTUCommand::getHelpString(){
                helpString += "The output parameter allows you to output the list of names without the group and bin number added. \n";
                helpString += "With this option you can use the names file as an input in get.seqs and remove.seqs commands. To do this enter output=accnos. \n";
                helpString += "The get.sharedseqs command outputs a .names file for each distance level containing a list of sequences in the OTUs shared by the groups specified.\n";
-               helpString += "The get.sharedseqs command should be in the following format: get.sharedseqs(label=yourLabels, groups=yourGroups, fasta=yourFastafile, output=yourOutput).\n";
-               helpString += "Example get.sharedseqs(list=amazon.fn.list, label=unique-0.01, group=forest-pasture, fasta=amazon.fasta, output=accnos).\n";
+               helpString += "The get.sharedseqs command should be in the following format: get.sharedseqs(list=yourListFile, group=yourGroupFile, label=yourLabels, unique=yourGroups, fasta=yourFastafile, output=yourOutput).\n";
+               helpString += "Example get.sharedseqs(list=amazon.fn.list, label=unique-0.01, group= amazon.groups, unique=forest-pasture, fasta=amazon.fasta, output=accnos).\n";
                helpString += "The output to the screen is the distance and the number of otus at that distance for the groups you specified.\n";
                helpString += "The default value for label is all labels in your inputfile. The default for groups is all groups in your file.\n";
                helpString += "Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabel).\n";
index c89d580d64f6c69a6afedada448143ae114453a3..4df5f96eb6086e8d2e8e96e3f448948540a1c8b4 100644 (file)
@@ -1351,7 +1351,7 @@ int MothurOut::readNames(string namefile, map<string, string>& nameMap) {
                }
                in.close();
                
-               return 0;
+               return nameMap.size();
                
        }
        catch(exception& e) {
@@ -1380,7 +1380,7 @@ int MothurOut::readNames(string namefile, map<string, vector<string> >& nameMap)
                }
                in.close();
                
-               return 0;
+               return nameMap.size();
                
        }
        catch(exception& e) {
index 9cdbfb91cf6ba148dbf4e2f5b4252b33604c906c..d877f4b93f33d8123ff6a1a9f7f839dc680fcf61 100644 (file)
@@ -191,6 +191,59 @@ Sequence::Sequence(ifstream& fastaFile){
 }
 //********************************************************************************************************************
 //this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
+Sequence::Sequence(ifstream& fastaFile, string& extraInfo, bool getInfo){
+       try {
+               m = MothurOut::getInstance();
+               initialize();
+               fastaFile >> name;
+        extraInfo = "";
+               
+               if (name.length() != 0) { 
+            
+                       name = name.substr(1); 
+                       
+                       string sequence;
+            
+                       //read comments
+                       while ((name[0] == '#') && fastaFile) { 
+                               while (!fastaFile.eof())        {       char c = fastaFile.get(); if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
+                               sequence = getCommentString(fastaFile);
+                               
+                               if (fastaFile) {  
+                                       fastaFile >> name;  
+                                       name = name.substr(1);  
+                               }else { 
+                                       name = "";
+                                       break;
+                               }
+                       }
+                       
+                       //read info after sequence name
+                       while (!fastaFile.eof())        {       
+                char c = fastaFile.get(); 
+                if (c == 10 || c == 13){  break;       }       
+                extraInfo += c;
+            } 
+                       
+                       int numAmbig = 0;
+                       sequence = getSequenceString(fastaFile, numAmbig);
+                       
+                       setAligned(sequence);   
+                       //setUnaligned removes any gap characters for us                                                
+                       setUnaligned(sequence); 
+                       
+                       if ((numAmbig / (float) numBases) > 0.25) { m->mothurOut("[WARNING]: We found more than 25% of the bases in sequence " + name + " to be ambiguous. Mothur is not setup to process protein sequences."); m->mothurOutEndLine(); }
+                       
+               }else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); }
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Sequence", "Sequence");
+               exit(1);
+       }                                                       
+}
+//********************************************************************************************************************
+//this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
 Sequence::Sequence(ifstream& fastaFile, string JustUnaligned){
        try {
                m = MothurOut::getInstance();
index 0a62e0c1ff6b60f20d58ed260847fb39eec44659..db4c4f32b9992a27f63e15908d881a77d5507980 100644 (file)
@@ -27,6 +27,7 @@ public:
        Sequence();
        Sequence(string, string);
        Sequence(ifstream&);
+    Sequence(ifstream&, string&, bool);
        Sequence(istringstream&);
        //these constructors just set the unaligned string to save space
        Sequence(string, string, string);  
index 40dd09b9820b9689ab4d30ba9f5666ceb2e9573b..20caead668196fc75d10de1275956b7955bdd3b6 100644 (file)
@@ -361,6 +361,9 @@ int SffInfoCommand::extractSffInfo(string input, string accnos){
 
                ofstream outSfftxt, outFasta, outQual, outFlow;
                string outFastaFileName, outQualFileName;
+        string rootName = outputDir + m->getRootName(m->getSimpleName(input));
+        if(rootName.find_last_of(".") == rootName.npos){ rootName += "."; }
+        
                string sfftxtFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "sff.txt";
                string outFlowFileName = outputDir + m->getRootName(m->getSimpleName(input)) + "flow";
                if (trim) {
index 346902c38b36a2b7565493810e503467ada284dd..0557c71bda3453068175fe3d405ff87972df80e1 100644 (file)
@@ -409,7 +409,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                                success = 0;
                                trashCode += 'l';
                        }
-                       cout << currSeq.getName() << endl << currSeq.getUnaligned() << endl;
+                       
                        int primerIndex = 0;
                        int barcodeIndex = 0;