]> git.donarmstrong.com Git - mothur.git/commitdiff
fixed bug with aligner and longest base. added deunique.seqs command.
authorwestcott <westcott>
Tue, 19 Oct 2010 16:51:27 +0000 (16:51 +0000)
committerwestcott <westcott>
Tue, 19 Oct 2010 16:51:27 +0000 (16:51 +0000)
Mothur.xcodeproj/project.pbxproj
aligncommand.cpp
alignmentdb.cpp
commandfactory.cpp
deuniqueseqscommand.cpp [new file with mode: 0644]
deuniqueseqscommand.h [new file with mode: 0644]
mothur
nast.cpp
needlemanoverlap.cpp
pipelinepdscommand.cpp

index 5e84c7eabc213ba337255d55f61034df9e7384fa..18269cf4190351debefa4353c9461f8285006d38 100644 (file)
@@ -56,6 +56,8 @@
                A74A9C03124B72DB000D5D25 /* clusterfragmentscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clusterfragmentscommand.h; sourceTree = "<group>"; };
                A74A9C04124B72DB000D5D25 /* clusterfragmentscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clusterfragmentscommand.cpp; sourceTree = "<group>"; };
                A7639F8D1175DF35008F5578 /* makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = makefile; sourceTree = "<group>"; };
+               A76714DD126DE45A003F359A /* deuniqueseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deuniqueseqscommand.h; sourceTree = "<group>"; };
+               A76714DE126DE45A003F359A /* deuniqueseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deuniqueseqscommand.cpp; sourceTree = "<group>"; };
                A768D95D1248FEAA008AB1D0 /* mothur */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.executable"; path = mothur; sourceTree = "<group>"; };
                A76AAD02117F322B003D8DA1 /* phylosummary.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylosummary.h; sourceTree = "<group>"; };
                A76AAD03117F322B003D8DA1 /* phylosummary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = phylosummary.cpp; sourceTree = "<group>"; };
                                A7DA203A113FECD400BF472F /* deconvolutecommand.cpp */,
                                A7DA203F113FECD400BF472F /* distancecommand.h */,
                                A7DA203E113FECD400BF472F /* distancecommand.cpp */,
+                               A76714DD126DE45A003F359A /* deuniqueseqscommand.h */,
+                               A76714DE126DE45A003F359A /* deuniqueseqscommand.cpp */,
                                A7DA2050113FECD400BF472F /* filterseqscommand.h */,
                                A7DA204F113FECD400BF472F /* filterseqscommand.cpp */,
                                A7DA205B113FECD400BF472F /* getgroupcommand.h */,
index b4af28a2990a44d69527d4c5cb7e5984d8b73033..63a117d5064cc8178051017173bacdfd80cb6b3a 100644 (file)
@@ -562,10 +562,11 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam
                                                                
                                Sequence temp = templateDB->findClosestSequence(candidateSeq);
                                Sequence* templateSeq = &temp;
-                               
+                       
                                float searchScore = templateDB->getSearchScore();
                                                                
                                Nast* nast = new Nast(alignment, candidateSeq, templateSeq);
+               
                                Sequence* copy;
                                
                                Nast* nast2;
@@ -580,6 +581,7 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam
                                        string wasBetter =  "";
                                        //if the user wants you to try the reverse
                                        if (flip) {
+                               
                                                //get reverse compliment
                                                copy = new Sequence(candidateSeq->getName(), originalUnaligned);
                                                copy->reverseComplement();
index e2b1dc197eb8d3399156a3ca752bff5da267101f..5a3027229bb8e91bdacfa25dea566a5273ca9d9d 100644 (file)
@@ -74,7 +74,7 @@ AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gap
                                if (temp.getName() != "") {
                                        templateSequences.push_back(temp);
                                        //save longest base
-                                       if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }
+                                       if (temp.getUnaligned().length() >= longest)  { longest = temp.getUnaligned().length()+1; }
                                }
                        }
                        
@@ -94,11 +94,10 @@ AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gap
                        if (temp.getName() != "") {
                                templateSequences.push_back(temp);
                                //save longest base
-                               if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length()+1; }
+                               if (temp.getUnaligned().length() >= longest)  { longest = (temp.getUnaligned().length()+1); }
                        }
                }
                fastaFile.close();
-               
        #endif
        
                numSeqs = templateSequences.size();
index 917ab96e69afa7cf7ad76d2cf85bbb79401e8db4..a4d6506df3128579158f2f1ff5ee99f67bfc9bc2 100644 (file)
@@ -93,6 +93,8 @@
 #include "removelineagecommand.h"
 #include "parsefastaqcommand.h"
 #include "pipelinepdscommand.h"
+#include "deuniqueseqscommand.h"
+
 
 /*******************************************************/
 
@@ -191,6 +193,7 @@ CommandFactory::CommandFactory(){
        commands["get.lineage"]                 = "get.lineage";
        commands["remove.lineage"]              = "remove.lineage";
        commands["fastq.info"]                  = "fastq.info";
+       commands["deunique.seqs"]               = "deunique.seqs";
        commands["pipeline.pds"]                = "MPIEnabled";
        commands["classify.seqs"]               = "MPIEnabled"; 
        commands["dist.seqs"]                   = "MPIEnabled";
@@ -334,6 +337,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "remove.lineage")                {       command = new RemoveLineageCommand(optionString);                       }
                else if(commandName == "fastq.info")                    {       command = new ParseFastaQCommand(optionString);                         }
                else if(commandName == "pipeline.pds")                  {       command = new PipelineCommand(optionString);                            }
+               else if(commandName == "deunique.seqs")                 {       command = new DeUniqueSeqsCommand(optionString);                        }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
@@ -445,6 +449,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
                else if(commandName == "get.lineage")                   {       pipecommand = new GetLineageCommand(optionString);                              }
                else if(commandName == "remove.lineage")                {       pipecommand = new RemoveLineageCommand(optionString);                   }
                else if(commandName == "fastq.info")                    {       pipecommand = new ParseFastaQCommand(optionString);                             }
+               else if(commandName == "deunique.seqs")                 {       pipecommand = new DeUniqueSeqsCommand(optionString);                    }
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
@@ -544,6 +549,7 @@ Command* CommandFactory::getCommand(string commandName){
                else if(commandName == "get.lineage")                   {       shellcommand = new GetLineageCommand();                         }
                else if(commandName == "remove.lineage")                {       shellcommand = new RemoveLineageCommand();                      }
                else if(commandName == "fastq.info")                    {       shellcommand = new ParseFastaQCommand();                        }
+               else if(commandName == "deunique.seqs")                 {       shellcommand = new DeUniqueSeqsCommand();                       }
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
diff --git a/deuniqueseqscommand.cpp b/deuniqueseqscommand.cpp
new file mode 100644 (file)
index 0000000..a4395a7
--- /dev/null
@@ -0,0 +1,260 @@
+/*
+ *  deuniqueseqscommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 10/19/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "deuniqueseqscommand.h"
+#include "sequence.hpp"
+
+//**********************************************************************************************************************
+vector<string> DeUniqueSeqsCommand::getValidParameters(){      
+       try {
+               string Array[] =  {"fasta", "name","outputdir","inputdir"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "DeUniqueSeqsCommand", "getValidParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+DeUniqueSeqsCommand::DeUniqueSeqsCommand(){    
+       try {
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["fasta"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "DeUniqueSeqsCommand", "DeconvoluteCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> DeUniqueSeqsCommand::getRequiredParameters(){   
+       try {
+               string Array[] =  {"fasta","name"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "DeUniqueSeqsCommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> DeUniqueSeqsCommand::getRequiredFiles(){        
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "DeUniqueSeqsCommand", "getRequiredFiles");
+               exit(1);
+       }
+}
+/**************************************************************************************/
+DeUniqueSeqsCommand::DeUniqueSeqsCommand(string option)  {     
+       try {
+               abort = false;
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; }
+               
+               else {
+                       //valid paramters for this command
+                       string Array[] =  {"fasta", "name","outputdir","inputdir"};
+                       vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+                       
+                       OptionParser parser(option);
+                       map<string,string> parameters = parser.getParameters();
+                       
+                       ValidParameters validParameter;
+                       map<string, string>::iterator it;
+               
+                       //check to make sure all parameters are valid for command
+                       for (it = parameters.begin(); it != parameters.end(); it++) { 
+                               if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
+                       }
+                       
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["fasta"] = tempOutNames;
+               
+                       //if the user changes the input directory command factory will send this info to us in the output parameter 
+                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       if (inputDir == "not found"){   inputDir = "";          }
+                       else {
+                               string path;
+                               it = parameters.find("fasta");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
+                               }
+                               
+                               it = parameters.find("name");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["name"] = inputDir + it->second;             }
+                               }
+                       }
+
+                       
+                       //check for required parameters
+                       fastaFile = validParameter.validFile(parameters, "fasta", true);
+                       if (fastaFile == "not open") { abort = true; }
+                       else if (fastaFile == "not found") { fastaFile = ""; m->mothurOut("fasta is a required parameter for the deunique.seqs command."); m->mothurOutEndLine(); abort = true;  }      
+                       
+                       //if the user changes the output directory command factory will send this info to us in the output parameter 
+                       outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
+                               outputDir = ""; 
+                               outputDir += m->hasPath(fastaFile); //if user entered a file with a path then preserve it       
+                       }
+                       
+                       nameFile = validParameter.validFile(parameters, "name", true);
+                       if (nameFile == "not open") { abort = true; }
+                       else if (nameFile == "not found"){      nameFile = "";  m->mothurOut("name is a required parameter for the deunique.seqs command."); m->mothurOutEndLine(); abort = true;  }
+               }
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "DeUniqueSeqsCommand", "DeUniqueSeqsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+void DeUniqueSeqsCommand::help(){
+       try {
+               m->mothurOut("The deunique.seqs command reads a fastafile and namefile, and creates a fastafile containing all the sequences.\n");
+               m->mothurOut("The deunique.seqs command parameters are fasta and name, both are required.\n");
+               m->mothurOut("The deunique.seqs command should be in the following format: \n");
+               m->mothurOut("deunique.seqs(fasta=yourFastaFile, name=yourNameFile) \n");       
+               m->mothurOut("Example deunique.seqs(fasta=abrecovery.unique.fasta, name=abrecovery.names).\n");
+               m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "DeUniqueSeqsCommand", "help");
+               exit(1);
+       }
+}
+
+/**************************************************************************************/
+int DeUniqueSeqsCommand::execute() {   
+       try {
+               
+               if (abort == true) { return 0; }
+
+               //prepare filenames and open files
+               ofstream out;
+               string outFastaFile = m->getRootName(m->getSimpleName(fastaFile));
+               int pos = outFastaFile.find("unique");
+               if (pos != string::npos) {
+                       outFastaFile = outputDir + outFastaFile.substr(0, pos) + "redundant" + m->getExtension(fastaFile);
+               }else{
+                       outFastaFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "redundant" + m->getExtension(fastaFile);
+               }
+               m->openOutputFile(outFastaFile, out);
+               
+               readNamesFile();
+               if (m->control_pressed) {  out.close(); outputTypes.clear(); remove(outFastaFile.c_str()); return 0; }
+               
+               ifstream in;
+               m->openInputFile(fastaFile, in);
+               
+               while (!in.eof()) {
+               
+                       if (m->control_pressed) { in.close(); out.close(); outputTypes.clear(); remove(outFastaFile.c_str()); return 0; }
+                       
+                       Sequence seq(in); m->gobble(in);
+                       
+                       if (seq.getName() != "") {
+                               
+                               //look for sequence name in nameMap
+                               map<string, string>::iterator it = nameMap.find(seq.getName());
+                               
+                               if (it == nameMap.end()) {      m->mothurOut("[ERROR]: Your namefile does not contain " + seq.getName() + ", aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
+                               else {
+                                       vector<string> names;
+                                       m->splitAtComma(it->second, names);
+                                       
+                                       //output sequences
+                                       for (int i = 0; i < names.size(); i++) {
+                                               out << ">" << names[i] << endl;
+                                               out << seq.getAligned() << endl;
+                                       }
+                                       
+                                       //remove seq from name map so we can check for seqs in namefile not in fastafile later
+                                       nameMap.erase(it);
+                               }
+                       }
+               }
+               in.close();
+               out.close(); 
+               
+               if (nameMap.size() != 0) { //then there are names in the namefile not in the fastafile
+                       for (map<string, string>::iterator it = nameMap.begin(); it != nameMap.end(); it++) {  
+                               m->mothurOut(it->first + " is not in your fasta file, but is in your name file. Please correct."); m->mothurOutEndLine();
+                       }
+               }
+                               
+               if (m->control_pressed) { outputTypes.clear(); remove(outFastaFile.c_str()); return 0; }
+               
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               m->mothurOut(outFastaFile); m->mothurOutEndLine();      
+               outputNames.push_back(outFastaFile);  outputTypes["fasta"].push_back(outFastaFile);  
+               m->mothurOutEndLine();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "DeUniqueSeqsCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int DeUniqueSeqsCommand::readNamesFile() {
+       try {
+               
+               ifstream inNames;
+               m->openInputFile(nameFile, inNames);
+               
+               string name, names;
+               map<string, string>::iterator it;
+       
+               while(inNames){
+                       
+                       if(m->control_pressed) { break; }
+                       
+                       inNames >> name;        m->gobble(inNames);             
+                       inNames >> names;               
+                       
+                       it = nameMap.find(name);
+                       
+                       if (it == nameMap.end()) {      nameMap[name] = names; }
+                       else { m->mothurOut("[ERROR]: Your namefile already contains " + name + ", aborting."); m->mothurOutEndLine(); m->control_pressed = true; }
+                                       
+                       m->gobble(inNames);
+               }
+               inNames.close();
+               
+               return 0;
+
+       }
+       catch(exception& e) {
+               m->errorOut(e, "DeUniqueSeqsCommand", "readNamesFile");
+               exit(1);
+       }
+}
+
+/**************************************************************************************/
diff --git a/deuniqueseqscommand.h b/deuniqueseqscommand.h
new file mode 100644 (file)
index 0000000..13176a8
--- /dev/null
@@ -0,0 +1,43 @@
+#ifndef DEUNIQUESEQSCOMMAND_H
+#define DEUNIQUESEQSCOMMAND_H
+/*
+ *  deuniqueseqscommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 10/19/10.
+ *  Copyright 2010 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+
+/* This command is the reverse of unique.seqs */ 
+
+class DeUniqueSeqsCommand : public Command {
+
+public:
+       DeUniqueSeqsCommand(string);
+       DeUniqueSeqsCommand();
+       ~DeUniqueSeqsCommand() {}
+       vector<string> getRequiredParameters();
+       vector<string> getValidParameters();
+       vector<string> getRequiredFiles();
+       map<string, vector<string> > getOutputFiles() { return outputTypes; }
+       int execute();
+       void help();    
+       
+private:
+
+       string fastaFile, nameFile, outputDir;
+       vector<string> outputNames;
+       map<string, vector<string> > outputTypes;
+       bool abort;
+       
+       map<string, string> nameMap;
+       
+       int readNamesFile();
+       
+};
+
+#endif
+
diff --git a/mothur b/mothur
index 4499d7e5174e5842d39e0eab63fea42495397922..6480cd46241419dfd2fdc3f244c93bb2b6ce456a 100755 (executable)
Binary files a/mothur and b/mothur differ
index b2887523532c6b6bcaef1ca61b2c748a9ad77d14..28cc659e9c5a6090bcd6cb5edecdc7cf9d6c9ec0 100644 (file)
--- a/nast.cpp
+++ b/nast.cpp
@@ -25,8 +25,10 @@ Nast::Nast(Alignment* method, Sequence* cand, Sequence* temp) : alignment(method
        try {
                m = MothurOut::getInstance();
                maxInsertLength = 0;
+       
                pairwiseAlignSeqs();    //      This is part A in Fig. 2 of DeSantis et al.
                regapSequences();               //      This is parts B-F in Fig. 2 of DeSantis et al.
+               
        }
        catch(exception& e) {
                m->errorOut(e, "Nast", "Nast");
index d13925e3cff9f54c61df9435b7f9cb36f9468e34..1239beb1a2a9f61d3f8c2cec178161fb36a7b865 100644 (file)
@@ -51,18 +51,21 @@ NeedlemanOverlap::~NeedlemanOverlap(){      /*      do nothing      */      }
 
 void NeedlemanOverlap::align(string A, string B){
        try {
+       
                seqA = ' ' + A; lA = seqA.length();             //      algorithm requires a dummy space at the beginning of each string
                seqB = ' ' + B; lB = seqB.length();             //      algorithm requires a dummy space at the beginning of each string
 
                if (lA > nRows) { m->mothurOut("One of your candidate sequences is longer than you longest template sequence. Your longest template sequence is " + toString(nRows) + ". Your candidate is " + toString(lA) + "."); m->mothurOutEndLine();  }
                
                for(int i=1;i<lB;i++){                                  //      This code was largely translated from Perl code provided in Ex 3.1 
+               
                        for(int j=1;j<lA;j++){                          //      of the O'Reilly BLAST book.  I found that the example output had a
+       
                                //      number of errors
                                float diagonal;
                                if(seqB[i] == seqA[j])  {       diagonal = alignment[i-1][j-1].cValue + match;          }
                                else                                    {       diagonal = alignment[i-1][j-1].cValue + mismatch;       }
-                               
+                       
                                float up        = alignment[i-1][j].cValue + gap;
                                float left      = alignment[i][j-1].cValue + gap;
                                
index a747aa752389ccb3dd0339c0b33df5d1c8bd37ef..0217d392a1e0454be1319b13e9f750d3a1e1d1fc 100644 (file)
@@ -590,13 +590,13 @@ void PipelineCommand::createPatsPipeline(){
                
                //sff.info command
                string thisCommand = "sffinfo(sff=" + sffFile + ")";
-               commands.push_back(thisCommand);
+               //commands.push_back(thisCommand);
                
                //trim.seqs command
                string fastaFile = m->getRootName(m->getSimpleName(sffFile)) + "fasta";
                string qualFile = m->getRootName(m->getSimpleName(sffFile)) + "qual";
-               thisCommand = "trim.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, oligos=" + oligosFile + ", qfile=" + qualFile + ")";
-               commands.push_back(thisCommand);
+               //thisCommand = "trim.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, oligos=" + oligosFile + ", qfile=" + qualFile + ")";
+               //commands.push_back(thisCommand);
                
                //unique.seqs
                string groupFile = m->getRootName(m->getSimpleName(fastaFile)) + "groups";