]> git.donarmstrong.com Git - mothur.git/commitdiff
added save parameter to align.seqs, chimera commands, classify.seqs, and seq.error...
authorwestcott <westcott>
Wed, 6 Jul 2011 14:43:57 +0000 (14:43 +0000)
committerwestcott <westcott>
Wed, 6 Jul 2011 14:43:57 +0000 (14:43 +0000)
33 files changed:
Mothur.xcodeproj/project.pbxproj
aligncommand.cpp
aligncommand.h
alignmentdb.cpp
bayesian.cpp
ccode.cpp
chimera.cpp
chimeraccodecommand.cpp
chimeraccodecommand.h
chimeracheckcommand.cpp
chimeracheckcommand.h
chimerapintailcommand.cpp
chimerapintailcommand.h
chimeraslayer.cpp
chimeraslayercommand.cpp
chimeraslayercommand.h
chimerauchimecommand.cpp
classify.cpp
classifyseqscommand.cpp
classifyseqscommand.h
clearmemorycommand.cpp [new file with mode: 0644]
clearmemorycommand.h [new file with mode: 0644]
commandfactory.cpp
getsharedotucommand.cpp
makefile
mothur.cpp
mothurout.h
myutils.cpp
referencedb.cpp [new file with mode: 0644]
referencedb.h [new file with mode: 0644]
seqerrorcommand.cpp
seqerrorcommand.h
uchime_main.cpp

index e920d62a02eb43c6f898d74b301014b006e5889d..c693c0746414d813cbd59824db42f972e96d5d31 100644 (file)
@@ -14,7 +14,9 @@
                A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */; };
                A71CB160130B04A2001E7287 /* anosimcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71CB15E130B04A2001E7287 /* anosimcommand.cpp */; };
                A71FE12C12EDF72400963CA7 /* mergegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */; };
+               A721765713BB9F7D0014DAAE /* referencedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721765613BB9F7D0014DAAE /* referencedb.cpp */; };
                A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727864312E9E28C00F86ABA /* removerarecommand.cpp */; };
+               A73DDBBA13C4A0D1006AAE38 /* clearmemorycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDBB913C4A0D1006AAE38 /* clearmemorycommand.cpp */; };
                A74D3687137DAB8300332B0C /* addtargets2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3655137DAB8300332B0C /* addtargets2.cpp */; };
                A74D3688137DAB8400332B0C /* alignchime.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3656137DAB8300332B0C /* alignchime.cpp */; };
                A74D3689137DAB8400332B0C /* alignchimel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3657137DAB8300332B0C /* alignchimel.cpp */; };
                A71CB15F130B04A2001E7287 /* anosimcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = anosimcommand.h; sourceTree = "<group>"; };
                A71FE12A12EDF72400963CA7 /* mergegroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mergegroupscommand.h; sourceTree = "<group>"; };
                A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mergegroupscommand.cpp; sourceTree = "<group>"; };
+               A721765513BB9F7D0014DAAE /* referencedb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = referencedb.h; sourceTree = "<group>"; };
+               A721765613BB9F7D0014DAAE /* referencedb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = referencedb.cpp; sourceTree = "<group>"; };
                A727864212E9E28C00F86ABA /* removerarecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removerarecommand.h; sourceTree = "<group>"; };
                A727864312E9E28C00F86ABA /* removerarecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removerarecommand.cpp; sourceTree = "<group>"; };
+               A73DDBB813C4A0D1006AAE38 /* clearmemorycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clearmemorycommand.h; sourceTree = "<group>"; };
+               A73DDBB913C4A0D1006AAE38 /* clearmemorycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clearmemorycommand.cpp; sourceTree = "<group>"; };
                A74D3655137DAB8300332B0C /* addtargets2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = addtargets2.cpp; sourceTree = "<group>"; };
                A74D3656137DAB8300332B0C /* alignchime.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignchime.cpp; sourceTree = "<group>"; };
                A74D3657137DAB8300332B0C /* alignchimel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignchimel.cpp; sourceTree = "<group>"; };
                                A7E9B69212D37EC400DA6239 /* classifyseqscommand.cpp */,
                                A7E9B69712D37EC400DA6239 /* clearcutcommand.h */,
                                A7E9B69612D37EC400DA6239 /* clearcutcommand.cpp */,
+                               A73DDBB813C4A0D1006AAE38 /* clearmemorycommand.h */,
+                               A73DDBB913C4A0D1006AAE38 /* clearmemorycommand.cpp */,
                                A7E9B69D12D37EC400DA6239 /* clustercommand.h */,
                                A7E9B69C12D37EC400DA6239 /* clustercommand.cpp */,
                                A7E9B69F12D37EC400DA6239 /* clusterdoturcommand.h */,
                                A7E9B7A012D37EC400DA6239 /* qualityscores.h */,
                                A7E9B7A312D37EC400DA6239 /* rabundvector.cpp */,
                                A7E9B7A412D37EC400DA6239 /* rabundvector.hpp */,
+                               A721765513BB9F7D0014DAAE /* referencedb.h */,
+                               A721765613BB9F7D0014DAAE /* referencedb.cpp */,
                                A7E9B7CB12D37EC400DA6239 /* reportfile.cpp */,
                                A7E9B7CC12D37EC400DA6239 /* reportfile.h */,
                                A7E9B7CF12D37EC400DA6239 /* sabundvector.cpp */,
                                A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */,
                                A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */,
                                A7730EFF13967241007433A3 /* countseqscommand.cpp in Sources */,
+                               A721765713BB9F7D0014DAAE /* referencedb.cpp in Sources */,
+                               A73DDBBA13C4A0D1006AAE38 /* clearmemorycommand.cpp in Sources */,
                        );
                        runOnlyForDeploymentPostprocessing = 0;
                };
                                GCC_OPTIMIZATION_LEVEL = 0;
                                GCC_PREPROCESSOR_DEFINITIONS = (
                                        "MOTHUR_FILES=\"\\\"../release\\\"\"",
-                                       "VERSION=\"\\\"1.20.0\\\"\"",
-                                       "RELEASE_DATE=\"\\\"6/20/2011\\\"\"",
+                                       "VERSION=\"\\\"1.20.2\\\"\"",
+                                       "RELEASE_DATE=\"\\\"7/05/2011\\\"\"",
                                );
                                GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
                                GCC_WARN_ABOUT_RETURN_TYPE = YES;
index 5da0ee8d0f091b8ce07acdedd48d2069e53ec1e9..e3db3dda8db657d23e671cc704e90f78833cfca2 100644 (file)
@@ -24,6 +24,7 @@
 
 #include "nast.hpp"
 #include "nastreport.hpp"
+#include "referencedb.h"
 
 //**********************************************************************************************************************
 vector<string> AlignCommand::setParameters(){  
@@ -39,6 +40,7 @@ vector<string> AlignCommand::setParameters(){
                CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
+               CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
                CommandParameter pthreshold("threshold", "Number", "", "0.50", "", "", "",false,false); parameters.push_back(pthreshold);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
@@ -69,6 +71,7 @@ string AlignCommand::getHelpString(){
                helpString += "The flip parameter is used to specify whether or not you want mothur to try the reverse complement if a sequence falls below the threshold.  The default is false.";
                helpString += "The threshold is used to specify a cutoff at which an alignment is deemed 'bad' and the reverse complement may be tried. The default threshold is 0.50, meaning 50% of the bases are removed in the alignment.";
                helpString += "If the flip parameter is set to true the reverse complement of the sequence is aligned and the better alignment is reported.";
+               helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
                helpString += "The default for the threshold parameter is 0.50, meaning at least 50% of the bases must remain or the sequence is reported as potentially reversed.";
                helpString += "The align.seqs command should be in the following format:";
                helpString += "align.seqs(reference=yourTemplateFile, fasta=yourCandidateFile, align=yourAlignmentMethod, search=yourSearchmethod, ksize=yourKmerSize, match=yourMatchBonus, mismatch=yourMismatchpenalty, gapopen=yourGapopenPenalty, gapextend=yourGapExtendPenalty)";
@@ -99,8 +102,9 @@ AlignCommand::AlignCommand(){
 //**********************************************************************************************************************
 AlignCommand::AlignCommand(string option)  {
        try {
-               abort = false; calledHelp = false;   
-       
+               abort = false; calledHelp = false;  
+               ReferenceDB* rdb = ReferenceDB::getInstance();
+               
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true;}
                else if(option == "citation") { citation(); abort = true; calledHelp = true;}
@@ -146,15 +150,6 @@ AlignCommand::AlignCommand(string option)  {
                                }
                        }
 
-                       //check for required parameters
-                       templateFileName = validParameter.validFile(parameters, "reference", true);
-                       
-                       if (templateFileName == "not found") { 
-                               m->mothurOut("reference is a required parameter for the align.seqs command."); 
-                               m->mothurOutEndLine();
-                               abort = true; 
-                       }else if (templateFileName == "not open") { abort = true; }     
-                       
                        candidateFileName = validParameter.validFile(parameters, "fasta", false);
                        if (candidateFileName == "not found") { 
                                //if there is a current fasta file, use it
@@ -259,6 +254,27 @@ AlignCommand::AlignCommand(string option)  {
                        temp = validParameter.validFile(parameters, "flip", false);                     if (temp == "not found"){       temp = "f";                             }
                        flip = m->isTrue(temp); 
                        
+                       temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
+                       save = m->isTrue(temp); 
+                       rdb->save = save; 
+                       if (save) { //clear out old references
+                               rdb->clearMemory();
+                       }
+                       
+                       //this has to go after save so that if the user sets save=t and provides no reference we abort
+                       templateFileName = validParameter.validFile(parameters, "reference", true);
+                       if (templateFileName == "not found") { 
+                               //check for saved reference sequences
+                               if (rdb->referenceSeqs.size() != 0) {
+                                       templateFileName = "saved";
+                               }else {
+                                       m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required for the align.seqs command."); 
+                                       m->mothurOutEndLine();
+                                       abort = true; 
+                               }
+                       }else if (templateFileName == "not open") { abort = true; }     
+                       else {  if (save) {     rdb->setSavedReference(templateFileName);       }       }
+                       
                        temp = validParameter.validFile(parameters, "threshold", false);        if (temp == "not found"){       temp = "0.50";                  }
                        convert(temp, threshold); 
                        
index 952b9364dda6fe79e162e6577fb2d7c0e251c5b4..75b9c2ba8ffa727b84dadbc4ebdd25479e11674c 100644 (file)
@@ -61,7 +61,7 @@ private:
        vector<string> candidateFileNames;
        vector<string> outputNames;
        
-       bool abort, flip, calledHelp;
+       bool abort, flip, calledHelp, save;
 
 };
 
index 6909f47b1235c72b67ae409907af97346dfa8007..0fd2ac4dca4ff4fcfc770488ab96f40dc21c38d7 100644 (file)
@@ -11,6 +11,7 @@
 #include "kmerdb.hpp"
 #include "suffixdb.hpp"
 #include "blastdb.hpp"
+#include "referencedb.h"
 
 
 /**************************************************************************************************/
@@ -20,91 +21,118 @@ AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gap
                longest = 0;
                method = s;
                bool needToGenerate = true;
+               ReferenceDB* rdb = ReferenceDB::getInstance();
                
-               m->mothurOutEndLine();
-               m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t");   cout.flush();
+               if (fastaFileName == "saved") {
+                       int start = time(NULL);
+                       m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory.");        m->mothurOutEndLine();
+
+                       for (int i = 0; i < rdb->referenceSeqs.size(); i++) {
+                               templateSequences.push_back(rdb->referenceSeqs[i]);
+                               //save longest base
+                               if (rdb->referenceSeqs[i].getUnaligned().length() >= longest)  { longest = (rdb->referenceSeqs[i].getUnaligned().length()+1); }
+                       }
+                       fastaFileName = rdb->getSavedReference();
+                       
+                       numSeqs = templateSequences.size();
+                       m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences.");m->mothurOutEndLine();  
+
+               }else {
+                       int start = time(NULL);
+                       m->mothurOutEndLine();
+                       m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t");   cout.flush();
+                       
+                       #ifdef USE_MPI  
+                               int pid, processors;
+                               vector<unsigned long int> positions;
+                       
+                               MPI_Status status; 
+                               MPI_File inMPI;
+                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                               MPI_Comm_size(MPI_COMM_WORLD, &processors);
+                               int tag = 2001;
                
-               #ifdef USE_MPI  
-                       int pid, processors;
-                       vector<unsigned long int> positions;
+                               char inFileName[1024];
+                               strcpy(inFileName, fastaFileName.c_str());
                
-                       MPI_Status status; 
-                       MPI_File inMPI;
-                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
-                       MPI_Comm_size(MPI_COMM_WORLD, &processors);
-                       int tag = 2001;
-       
-                       char inFileName[1024];
-                       strcpy(inFileName, fastaFileName.c_str());
-       
-                       MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+                               MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+                               
+                               if (pid == 0) {
+                                       positions = m->setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs
+
+                                       //send file positions to all processes
+                                       for(int i = 1; i < processors; i++) { 
+                                               MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                               MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                                       }
+                               }else{
+                                       MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                                       positions.resize(numSeqs+1);
+                                       MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+                               }
+                       
+                               //read file 
+                               for(int i=0;i<numSeqs;i++){
+                                       
+                                       if (m->control_pressed) {  templateSequences.clear(); break;  }
+                                       
+                                       //read next sequence
+                                       int length = positions[i+1] - positions[i];
+                                       char* buf4 = new char[length];
+                               
+                                       MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
                        
-                       if (pid == 0) {
-                               positions = m->setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs
+                                       string tempBuf = buf4;
+                                       if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
+                                       delete buf4;
 
-                               //send file positions to all processes
-                               for(int i = 1; i < processors; i++) { 
-                                       MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
-                                       MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                                       istringstream iss (tempBuf,istringstream::in);
+                       
+                                       Sequence temp(iss);  
+                                       if (temp.getName() != "") {
+                                               templateSequences.push_back(temp);
+                                               
+                                               if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
+                                               
+                                               //save longest base
+                                               if (temp.getUnaligned().length() >= longest)  { longest = temp.getUnaligned().length()+1; }
+                                       }
                                }
-                       }else{
-                               MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
-                               positions.resize(numSeqs+1);
-                               MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
-                       }
-               
-                       //read file 
-                       for(int i=0;i<numSeqs;i++){
                                
-                               if (m->control_pressed) {  templateSequences.clear(); break;  }
+                               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                                
-                               //read next sequence
-                               int length = positions[i+1] - positions[i];
-                               char* buf4 = new char[length];
+                               MPI_File_close(&inMPI);
                        
-                               MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
-               
-                               string tempBuf = buf4;
-                               if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
-                               delete buf4;
+               #else
+                       ifstream fastaFile;
+                       m->openInputFile(fastaFileName, fastaFile);
 
-                               istringstream iss (tempBuf,istringstream::in);
-               
-                               Sequence temp(iss);  
+                       while (!fastaFile.eof()) {
+                               Sequence temp(fastaFile);  m->gobble(fastaFile);
+                               
+                               if (m->control_pressed) {  templateSequences.clear(); break;  }
+                               
                                if (temp.getName() != "") {
                                        templateSequences.push_back(temp);
+                                       
+                                       if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
+                                       
                                        //save longest base
-                                       if (temp.getUnaligned().length() >= longest)  { longest = temp.getUnaligned().length()+1; }
+                                       if (temp.getUnaligned().length() >= longest)  { longest = (temp.getUnaligned().length()+1); }
                                }
                        }
-                       
-                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
-                       
-                       MPI_File_close(&inMPI);
+                       fastaFile.close();
+               #endif
                
-       #else
-               ifstream fastaFile;
-               m->openInputFile(fastaFileName, fastaFile);
-
-               while (!fastaFile.eof()) {
-                       Sequence temp(fastaFile);  m->gobble(fastaFile);
-                       
-                       if (m->control_pressed) {  templateSequences.clear(); break;  }
+                       numSeqs = templateSequences.size();
+                       //all of this is elsewhere already!
                        
-                       if (temp.getName() != "") {
-                               templateSequences.push_back(temp);
-                               //save longest base
-                               if (temp.getUnaligned().length() >= longest)  { longest = (temp.getUnaligned().length()+1); }
-                       }
+                       m->mothurOut("DONE.");
+                       m->mothurOutEndLine();  cout.flush();
+                       m->mothurOut("It took " + toString(time(NULL) - start) + " to read  " + toString(rdb->referenceSeqs.size()) + " sequences."); m->mothurOutEndLine();  
+
                }
-               fastaFile.close();
-       #endif
-       
-               numSeqs = templateSequences.size();
-               //all of this is elsewhere already!
                
-               m->mothurOut("DONE.");
-               m->mothurOutEndLine();  cout.flush();
                
                //in case you delete the seqs and then ask for them
                emptySequence = Sequence();
@@ -156,6 +184,7 @@ AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gap
                
                        search->setNumSeqs(numSeqs);
                }
+               
        }
        catch(exception& e) {
                m->errorOut(e, "AlignmentDB", "AlignmentDB");
@@ -192,7 +221,7 @@ Sequence AlignmentDB::findClosestSequence(Sequence* seq) {
        try{
        
                vector<int> spot = search->findClosestSequences(seq, 1);
-
+       
                if (spot.size() != 0)   {               return templateSequences[spot[0]];      }
                else                                    {               return emptySequence;                           }
                
index cf55d5c585ee2c7c2ca6534f7192996f2ef34d02..0b55efd3258fe036ec0d12955786f6d61b798c5c 100644 (file)
 #include "bayesian.h"
 #include "kmer.hpp"
 #include "phylosummary.h"
-
+#include "referencedb.h"
 /**************************************************************************************************/
 Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, int i) : 
 Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
        try {
+               ReferenceDB* rdb = ReferenceDB::getInstance();
+               
+               string baseName = tempFile;
+               if (baseName == "saved") { baseName = rdb->getSavedReference(); }
+               
+               string baseTName = tfile;
+               if (baseTName == "saved") { baseTName = rdb->getSavedTaxonomy(); }
                
                /************calculate the probablity that each word will be in a specific taxonomy*************/
-               string tfileroot = tfile.substr(0,tfile.find_last_of(".")+1);
-               string tempfileroot = m->getRootName(m->getSimpleName(tempFile));
+               string tfileroot = baseTName.substr(0,baseTName.find_last_of(".")+1);
+               string tempfileroot = m->getRootName(m->getSimpleName(baseName));
                string phyloTreeName = tfileroot + "tree.train";
                string phyloTreeSumName = tfileroot + "tree.sum";
                string probFileName = tfileroot + tempfileroot + char('0'+ kmerSize) + "mer.prob";
@@ -40,7 +47,23 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                        FilesGood = checkReleaseDate(probFileTest, probFileTest2, phyloTreeTest, probFileTest3);
                }
                
+               //if you want to save, but you dont need to calculate then just read
+               if (rdb->save && probFileTest && probFileTest2 && phyloTreeTest && probFileTest3 && FilesGood) {  
+                       ifstream saveIn;
+                       m->openInputFile(tempFile, saveIn);
+                       
+                       while (!saveIn.eof()) {
+                               Sequence temp(saveIn);
+                               m->gobble(saveIn);
+                               
+                               rdb->referenceSeqs.push_back(temp); 
+                       }
+                       saveIn.close();                 
+               }
+               
                if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3 && FilesGood){       
+                       if (tempFile == "saved") { m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory.");     m->mothurOutEndLine(); }
+                       
                        m->mothurOut("Reading template taxonomy...     "); cout.flush();
                        
                        phyloTree = new PhyloTree(phyloTreeTest, phyloTreeName);
@@ -49,10 +72,17 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                        
                        genusNodes = phyloTree->getGenusNodes(); 
                        genusTotals = phyloTree->getGenusTotals();
-               
-                       m->mothurOut("Reading template probabilities...     "); cout.flush();
-                       readProbFile(probFileTest, probFileTest2, probFileName, probFileName2); 
                        
+                       if (tfile == "saved") { 
+                               m->mothurOutEndLine();  m->mothurOut("Using probabilties from " + rdb->getSavedTaxonomy() + " that are saved in memory...    ");        cout.flush();; 
+                               wordGenusProb = rdb->wordGenusProb;
+                       }else {
+                               m->mothurOut("Reading template probabilities...     "); cout.flush();
+                               readProbFile(probFileTest, probFileTest2, probFileName, probFileName2);
+                       }       
+                       
+                       //save probabilities
+                       if (rdb->save) { rdb->wordGenusProb = wordGenusProb; }
                }else{
                
                        //create search database and names vector
@@ -194,6 +224,9 @@ Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i)  {
                                delete phyloTree;
                                
                                phyloTree = new PhyloTree(phyloTreeTest, phyloTreeName);
+                               
+                               //save probabilities
+                               if (rdb->save) { rdb->wordGenusProb = wordGenusProb; }
                        }
                }
        
index ee88ba1c84838b2b9d6e4edaef2899ca81d31dee..00cd3f1948e009a86456e32bbcde2a40c6f889ac 100644 (file)
--- a/ccode.cpp
+++ b/ccode.cpp
 #include "ignoregaps.h"
 #include "eachgapdist.h"
 
-
 //***************************************************************************************************************
 Ccode::Ccode(string filename, string temp, bool f, string mask, int win, int numW, string o) : Chimera() {  
+ try { 
+       
        fastafile = filename;  
        outputDir = o; 
-       templateFileName = temp;  templateSeqs = readSeqs(temp);
+       templateFileName = temp;  templateSeqs = readSeqs(temp); 
        setMask(mask);
        filter = f;
        window = win;
@@ -63,6 +64,11 @@ Ccode::Ccode(string filename, string temp, bool f, string mask, int win, int num
                out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl;
                out2.close();
        #endif
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Ccode", "Ccode");
+               exit(1);
+       }
 }
 //***************************************************************************************************************
 Ccode::~Ccode() {
index 54124872b7a5e87ce4aa545585fcb4b94c759044..ed843971ac7d4ab229595262631d548494f27d1c 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include "chimera.h"
+#include "referencedb.h"
 
 //***************************************************************************************************************
 //this is a vertical soft filter
@@ -94,99 +95,123 @@ map<int, int> Chimera::runFilter(Sequence* seq) {
 //***************************************************************************************************************
 vector<Sequence*> Chimera::readSeqs(string file) {
        try {
-       
+               
                vector<Sequence*> container;
                int count = 0;
                length = 0;
                unaligned = false;
-
-               m->mothurOut("Reading sequences from " + file + "..."); cout.flush();
+               ReferenceDB* rdb = ReferenceDB::getInstance();
                
-               #ifdef USE_MPI  
-                       int pid, processors;
-                       vector<unsigned long int> positions;
-                       int numSeqs;
-                       int tag = 2001;
-               
-                       MPI_Status status; 
-                       MPI_File inMPI;
-                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
-                       MPI_Comm_size(MPI_COMM_WORLD, &processors);
-
-                       //char* inFileName = new char[file.length()];
-                       //memcpy(inFileName, file.c_str(), file.length());
+               if (file == "saved") {
                        
-                       char inFileName[1024];
-                       strcpy(inFileName, file.c_str());
-       
-                       MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
-                       //delete inFileName;
-
-                       if (pid == 0) {
-                               positions = m->setFilePosFasta(file, numSeqs); //fills MPIPos, returns numSeqs
-
-                               //send file positions to all processes
-                               for(int i = 1; i < processors; i++) { 
-                                       MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
-                                       MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
-                               }
-                       }else{
-                               MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
-                               positions.resize(numSeqs+1);
-                               MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+                       
+                       m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory.");        m->mothurOutEndLine();
+                       
+                       for (int i = 0; i < rdb->referenceSeqs.size(); i++) {
+                               Sequence* temp = new Sequence(rdb->referenceSeqs[i].getName(), rdb->referenceSeqs[i].getAligned());
+                               
+                               if (count == 0) {  length = temp->getAligned().length();  count++;  } //gets first seqs length
+                               else if (length != temp->getAligned().length()) {       unaligned = true;       }
+                               
+                               if (temp->getName() != "") {  container.push_back(temp);  }
                        }
                        
-                       //read file 
-                       for(int i=0;i<numSeqs;i++){
+                       templateFileName = rdb->getSavedReference();
                        
-                               if (m->control_pressed) { MPI_File_close(&inMPI); return container; }
-       
-                               //read next sequence
-                               int seqlength = positions[i+1] - positions[i];
-                               char* buf4 = new char[seqlength];
+               }else {
+                       
+                       m->mothurOut("Reading sequences from " + file + "..."); cout.flush();
+                       
+                       #ifdef USE_MPI  
+                               int pid, processors;
+                               vector<unsigned long int> positions;
+                               int numSeqs;
+                               int tag = 2001;
+                       
+                               MPI_Status status; 
+                               MPI_File inMPI;
+                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                               MPI_Comm_size(MPI_COMM_WORLD, &processors);
 
-                               MPI_File_read_at(inMPI, positions[i], buf4, seqlength, MPI_CHAR, &status);
+                               //char* inFileName = new char[file.length()];
+                               //memcpy(inFileName, file.c_str(), file.length());
                                
-                               string tempBuf = buf4;
-                               if (tempBuf.length() > seqlength) { tempBuf = tempBuf.substr(0, seqlength); }
-                               delete buf4;
+                               char inFileName[1024];
+                               strcpy(inFileName, file.c_str());
+               
+                               MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+                               //delete inFileName;
+
+                               if (pid == 0) {
+                                       positions = m->setFilePosFasta(file, numSeqs); //fills MPIPos, returns numSeqs
 
-                               istringstream iss (tempBuf,istringstream::in);
+                                       //send file positions to all processes
+                                       for(int i = 1; i < processors; i++) { 
+                                               MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                               MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                                       }
+                               }else{
+                                       MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                                       positions.resize(numSeqs+1);
+                                       MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+                               }
+                               
+                               //read file 
+                               for(int i=0;i<numSeqs;i++){
+                               
+                                       if (m->control_pressed) { MPI_File_close(&inMPI); return container; }
                
-                               Sequence* current = new Sequence(iss);   
-                               if (current->getName() != "") {
-                                       if (count == 0) {  length = current->getAligned().length();  count++;  } //gets first seqs length
-                                       else if (length != current->getAligned().length()) {    unaligned = true;       }
+                                       //read next sequence
+                                       int seqlength = positions[i+1] - positions[i];
+                                       char* buf4 = new char[seqlength];
+
+                                       MPI_File_read_at(inMPI, positions[i], buf4, seqlength, MPI_CHAR, &status);
+                                       
+                                       string tempBuf = buf4;
+                                       if (tempBuf.length() > seqlength) { tempBuf = tempBuf.substr(0, seqlength); }
+                                       delete buf4;
+
+                                       istringstream iss (tempBuf,istringstream::in);
                        
+                                       Sequence* current = new Sequence(iss);   
+                                       if (current->getName() != "") {
+                                               if (count == 0) {  length = current->getAligned().length();  count++;  } //gets first seqs length
+                                               else if (length != current->getAligned().length()) {    unaligned = true;       }
+                               
+                                               container.push_back(current);  
+                                               if (rdb->save) { rdb->referenceSeqs.push_back(*current); }
+                                       }
+                               }
+                               
+                               MPI_File_close(&inMPI);
+                               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+               #else
+
+                       ifstream in;
+                       m->openInputFile(file, in);
+                       
+                       //read in seqs and store in vector
+                       while(!in.eof()){
+                               
+                               if (m->control_pressed) { return container; }
+                               
+                               Sequence* current = new Sequence(in);  m->gobble(in);
+                               
+                               if (count == 0) {  length = current->getAligned().length();  count++;  } //gets first seqs length
+                               else if (length != current->getAligned().length()) {    unaligned = true;       }
+                                                       
+                               if (current->getName() != "") {  
                                        container.push_back(current);  
+                                       if (rdb->save) { rdb->referenceSeqs.push_back(*current); }
                                }
                        }
-                       
-                       MPI_File_close(&inMPI);
-                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
-       #else
-
-               ifstream in;
-               m->openInputFile(file, in);
+                       in.close();
+               #endif
                
-               //read in seqs and store in vector
-               while(!in.eof()){
+                       m->mothurOut("Done."); m->mothurOutEndLine();
                        
-                       if (m->control_pressed) { return container; }
-                       
-                       Sequence* current = new Sequence(in);  m->gobble(in);
-                       
-                       if (count == 0) {  length = current->getAligned().length();  count++;  } //gets first seqs length
-                       else if (length != current->getAligned().length()) {    unaligned = true;       }
-                                               
-                       if (current->getName() != "") {  container.push_back(current);  }
+                       filterString = (string(container[0]->getAligned().length(), '1'));
                }
-               in.close();
-       #endif
-       
-               m->mothurOut("Done."); m->mothurOutEndLine();
-               
-               filterString = (string(container[0]->getAligned().length(), '1'));
                
                return container;
        }
index bc96e0530993e63cf099381a1ccd8829f9cc7714..fd80ec965b4040f78dce01579e1974760fbb2a89 100644 (file)
@@ -9,6 +9,7 @@
 
 #include "chimeraccodecommand.h"
 #include "ccode.h"
+#include "referencedb.h"
 //**********************************************************************************************************************
 vector<string> ChimeraCcodeCommand::setParameters(){   
        try {
@@ -21,6 +22,7 @@ vector<string> ChimeraCcodeCommand::setParameters(){
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+               CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
                
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
@@ -49,6 +51,7 @@ string ChimeraCcodeCommand::getHelpString(){
                helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n";
                helpString += "The window parameter allows you to specify the window size for searching for chimeras. \n";
                helpString += "The numwanted parameter allows you to specify how many sequences you would each query sequence compared with.\n";
+               helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
                helpString += "The chimera.ccode command should be in the following format: \n";
                helpString += "chimera.ccode(fasta=yourFastaFile, reference=yourTemplate) \n";
                helpString += "Example: chimera.ccode(fasta=AD.align, reference=core_set_aligned.imputed.fasta) \n";
@@ -79,6 +82,7 @@ ChimeraCcodeCommand::ChimeraCcodeCommand(){
 ChimeraCcodeCommand::ChimeraCcodeCommand(string option)  {
        try {
                abort = false; calledHelp = false;   
+               ReferenceDB* rdb = ReferenceDB::getInstance();
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
@@ -197,10 +201,6 @@ ChimeraCcodeCommand::ChimeraCcodeCommand(string option)  {
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
-
-                       templatefile = validParameter.validFile(parameters, "reference", true);
-                       if (templatefile == "not open") { abort = true; }
-                       else if (templatefile == "not found") { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.ccode command."); m->mothurOutEndLine(); abort = true;  }            
                        
                        maskfile = validParameter.validFile(parameters, "mask", false);
                        if (maskfile == "not found") { maskfile = "";  }        
@@ -230,6 +230,28 @@ ChimeraCcodeCommand::ChimeraCcodeCommand(string option)  {
                        
                        temp = validParameter.validFile(parameters, "numwanted", false);                if (temp == "not found") { temp = "20"; }
                        convert(temp, numwanted);
+                       
+                       temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
+                       save = m->isTrue(temp); 
+                       rdb->save = save; 
+                       if (save) { //clear out old references
+                               rdb->clearMemory();     
+                       }
+                       
+                       //this has to go after save so that if the user sets save=t and provides no reference we abort
+                       templatefile = validParameter.validFile(parameters, "reference", true);
+                       if (templatefile == "not found") { 
+                               //check for saved reference sequences
+                               if (rdb->referenceSeqs.size() != 0) {
+                                       templatefile = "saved";
+                               }else {
+                                       m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
+                                       m->mothurOutEndLine();
+                                       abort = true; 
+                               }
+                       }else if (templatefile == "not open") { abort = true; } 
+                       else {  if (save) {     rdb->setSavedReference(templatefile);   }       }
+                       
 
                }
        }
index 2bf7a70fd159a93fcc813c3236aed97a8c18c916..87f5ee87f7df50625be6a7a6407070e73dfc5b8c 100644 (file)
@@ -50,7 +50,7 @@ private:
        int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long int>&);
        #endif
 
-       bool abort, filter;
+       bool abort, filter, save;
        string fastafile, templatefile, outputDir, maskfile;
        int processors, window, numwanted, numSeqs, templateSeqsLength;
        Chimera* chimera;
index 5b249d7ff6892a53ce0493c3fa5319468681a8bd..584e9874fd4cea4136d45c184a4c0662973c3b25 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include "chimeracheckcommand.h"
+#include "referencedb.h"
 
 //**********************************************************************************************************************
 vector<string> ChimeraCheckCommand::setParameters(){   
@@ -21,7 +22,8 @@ vector<string> ChimeraCheckCommand::setParameters(){
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
-               
+               CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
+
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                return myArray;
@@ -50,6 +52,7 @@ string ChimeraCheckCommand::getHelpString(){
                helpString += "The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence, default is False.\n";
                helpString += "The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n";
                helpString += "You may enter multiple name files by separating their names with dashes. ie. fasta=abrecovery.svg.names-amzon.svg.names \n";
+               helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
                helpString += "The chimera.check command should be in the following format: \n";
                helpString += "chimera.check(fasta=yourFastaFile, reference=yourTemplateFile, processors=yourProcessors, ksize=yourKmerSize) \n";
                helpString += "Example: chimera.check(fasta=AD.fasta, reference=core_set_aligned,imputed.fasta, processors=4, ksize=8) \n";
@@ -77,7 +80,8 @@ ChimeraCheckCommand::ChimeraCheckCommand(){
 //***************************************************************************************************************
 ChimeraCheckCommand::ChimeraCheckCommand(string option)  {
        try {
-               abort = false; calledHelp = false;   
+               abort = false; calledHelp = false;  
+               ReferenceDB* rdb = ReferenceDB::getInstance();
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
@@ -195,10 +199,6 @@ ChimeraCheckCommand::ChimeraCheckCommand(string option)  {
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
-
-                       templatefile = validParameter.validFile(parameters, "reference", true);
-                       if (templatefile == "not open") { abort = true; }
-                       else if (templatefile == "not found") { templatefile = "";  m->mothurOut("reference is a required parameter for the chimera.check command."); m->mothurOutEndLine(); abort = true;  }   
                        
                        namefile = validParameter.validFile(parameters, "name", false);
                        if (namefile == "not found") { namefile = ""; }
@@ -283,6 +283,28 @@ ChimeraCheckCommand::ChimeraCheckCommand(string option)  {
                        m->setProcessors(temp);
                        convert(temp, processors);
                        
+                       temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
+                       save = m->isTrue(temp); 
+                       rdb->save = save; 
+                       if (save) { //clear out old references
+                               rdb->clearMemory();     
+                       }
+                       
+                       //this has to go after save so that if the user sets save=t and provides no reference we abort
+                       templatefile = validParameter.validFile(parameters, "reference", true);
+                       if (templatefile == "not found") { 
+                               //check for saved reference sequences
+                               if (rdb->referenceSeqs.size() != 0) {
+                                       templatefile = "saved";
+                               }else {
+                                       m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
+                                       m->mothurOutEndLine();
+                                       abort = true; 
+                               }
+                       }else if (templatefile == "not open") { abort = true; } 
+                       else {  if (save) {     rdb->setSavedReference(templatefile);   }       }
+                       
+                       
                        temp = validParameter.validFile(parameters, "ksize", false);                    if (temp == "not found") { temp = "7"; }
                        convert(temp, ksize);
                        
index d29744ba39ba1791e27c306c7fd09f950ac15c0f..350e10ea4bbbfc5bee07906e93edb029e996dc5f 100644 (file)
@@ -53,7 +53,7 @@ private:
        int driverMPI(int, int, MPI_File&, MPI_File&, vector<unsigned long int>&);
        #endif
 
-       bool abort, svg;
+       bool abort, svg, save;
        string fastafile, templatefile, namefile, outputDir;
        int processors, increment, ksize, numSeqs, templateSeqsLength;
        Chimera* chimera;
index 482bf7b5c2a67e57aaefc4cf4fc93c76a2b78611..66fe65d2318642610d77e1325c96c1fb2135ac1f 100644 (file)
@@ -10,6 +10,7 @@
 #include "chimerapintailcommand.h"
 #include "pintail.h"
 
+
 //**********************************************************************************************************************
 vector<string> ChimeraPintailCommand::setParameters(){ 
        try {
@@ -24,7 +25,8 @@ vector<string> ChimeraPintailCommand::setParameters(){
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
-               
+               CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
+
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                return myArray;
@@ -50,6 +52,7 @@ string ChimeraPintailCommand::getHelpString(){
 #ifdef USE_MPI
                helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
 #endif
+               helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
                helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
                helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
                helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
@@ -83,6 +86,7 @@ ChimeraPintailCommand::ChimeraPintailCommand(){
 ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
        try {
                abort = false; calledHelp = false;   
+               rdb = ReferenceDB::getInstance();
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
@@ -229,6 +233,28 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
                        temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
                        convert(temp, increment);
                        
+                       temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
+                       save = m->isTrue(temp); 
+                       rdb->save = save; 
+                       if (save) { //clear out old references
+                               rdb->clearMemory();     
+                       }
+                       
+                       //this has to go after save so that if the user sets save=t and provides no reference we abort
+                       templatefile = validParameter.validFile(parameters, "reference", true);
+                       if (templatefile == "not found") { 
+                               //check for saved reference sequences
+                               if (rdb->referenceSeqs.size() != 0) {
+                                       templatefile = "saved";
+                               }else {
+                                       m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
+                                       m->mothurOutEndLine();
+                                       abort = true; 
+                               }
+                       }else if (templatefile == "not open") { abort = true; } 
+                       else {  if (save) {     rdb->setSavedReference(templatefile);   }       }
+                       
+                       
                        maskfile = validParameter.validFile(parameters, "mask", false);
                        if (maskfile == "not found") { maskfile = "";  }        
                        else if (maskfile != "default")  { 
@@ -274,10 +300,6 @@ ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
                
-                       templatefile = validParameter.validFile(parameters, "reference", true);
-                       if (templatefile == "not open") { abort = true; }
-                       else if (templatefile == "not found") { templatefile = "";  m->mothurOut("reference is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
-                       
                        consfile = validParameter.validFile(parameters, "conservation", true);
                        if (consfile == "not open") { abort = true; }
                        else if (consfile == "not found") { 
@@ -329,15 +351,18 @@ int ChimeraPintailCommand::execute(){
                        if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
                        
                        //check for quantile to save the time
+                       string baseName = templatefile;
+                       if (templatefile == "saved") { baseName = rdb->getSavedReference(); }
+                       
                        string tempQuan = "";
                        if ((!filter) && (maskfile == "")) {
-                               tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
+                               tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
                        }else if ((!filter) && (maskfile != "")) { 
-                               tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
+                               tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
                        }else if ((filter) && (maskfile != "")) { 
-                               tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
+                               tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
                        }else if ((filter) && (maskfile == "")) { 
-                               tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
+                               tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
                        }
                        
                        ifstream FileTest(tempQuan.c_str());
@@ -350,13 +375,13 @@ int ChimeraPintailCommand::execute(){
                                string tryPath = m->getDefaultPath();
                                string tempQuan = "";
                                if ((!filter) && (maskfile == "")) {
-                                       tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
+                                       tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
                                }else if ((!filter) && (maskfile != "")) { 
-                                       tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
+                                       tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
                                }else if ((filter) && (maskfile != "")) { 
-                                       tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
+                                       tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
                                }else if ((filter) && (maskfile == "")) { 
-                                       tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
+                                       tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
                                }
                                
                                ifstream FileTest2(tempQuan.c_str());
index acb078173501a7d7cf48cc88f5c305e12eccc225..9e1d6f8d253e61cc22e208eaf4d68259c06d7b02 100644 (file)
@@ -13,7 +13,7 @@
 #include "mothur.h"
 #include "command.hpp"
 #include "chimera.h"
-
+#include "referencedb.h"
 
 /***********************************************************/
 
@@ -35,7 +35,8 @@ public:
        int execute(); 
        void help() { m->mothurOut(getHelpString()); }          
 private:
-
+       ReferenceDB* rdb;
+       
        struct linePair {
                unsigned long int start;
                unsigned long int end;
@@ -52,7 +53,7 @@ private:
        int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long int>&);
        #endif
 
-       bool abort, filter;
+       bool abort, filter, save;
        string fastafile, templatefile, consfile, quanfile, maskfile, outputDir, inputDir;
        int processors, window, increment, numSeqs, templateSeqsLength;
        Chimera* chimera;
index 5d714fd1ee88ff18a00619e96ae253847883a151..57528b4026e538f4b9ad85b706182e162bca2567 100644 (file)
@@ -44,6 +44,7 @@ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int num
        }
 }
 //***************************************************************************************************************
+//template=self
 ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div, 
                                                         int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera()  {      
        try {
@@ -256,6 +257,11 @@ vector<Sequence*> ChimeraSlayer::getTemplate(Sequence q, vector<Sequence*>& user
                        }
                }
                
+               //avoids nuisance error from formatdb for making blank blast database
+               if (userTemplate.size() == 0) {
+                       return userTemplate;
+               }
+               
                string  kmerDBNameLeft;
                string  kmerDBNameRight;
                
@@ -749,7 +755,6 @@ int ChimeraSlayer::getChimeras(Sequence* query) {
                else {  thisTemplate = getTemplate(*query, thisFilteredTemplate);  } //fills this template and creates the databases
                
                if (m->control_pressed) {  return 0;  }
-               
                if (thisTemplate.size() == 0) {  return 0; } //not chimeric
                
                //moved this out of maligner - 4/29/11
index 636e17536a024f04476fc04ce431bb6524e29839..c846a83155b75443d68803443ca9db904467bcc6 100644 (file)
@@ -10,6 +10,7 @@
 #include "chimeraslayercommand.h"
 #include "chimeraslayer.h"
 #include "deconvolutecommand.h"
+#include "referencedb.h"
 
 //**********************************************************************************************************************
 vector<string> ChimeraSlayerCommand::setParameters(){  
@@ -37,7 +38,8 @@ vector<string> ChimeraSlayerCommand::setParameters(){
                CommandParameter pincrement("increment", "Number", "", "5", "", "", "",false,false); parameters.push_back(pincrement);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
-               
+               CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
+
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                return myArray;
@@ -55,7 +57,7 @@ string ChimeraSlayerCommand::getHelpString(){
                helpString += "This command was modeled after the chimeraSlayer written by the Broad Institute.\n";
                helpString += "The chimera.slayer command parameters are fasta, name, template, processors, trim, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment, numwanted, and realign.\n";
                helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
-               helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
+               helpString += "The name parameter allows you to provide a name file, if you are using reference=self. \n";
                helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
                helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
                helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
@@ -79,6 +81,7 @@ string ChimeraSlayerCommand::getHelpString(){
                helpString += "The minsnp parameter allows you to specify percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default: 10) \n";
                helpString += "The search parameter allows you to specify search method for finding the closest parent. Choices are blast, and kmer, default blast. \n";
                helpString += "The realign parameter allows you to realign the query to the potential parents. Choices are true or false, default true.  \n";
+               helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
                helpString += "The chimera.slayer command should be in the following format: \n";
                helpString += "chimera.slayer(fasta=yourFastaFile, reference=yourTemplate, search=yourSearch) \n";
                helpString += "Example: chimera.slayer(fasta=AD.align, reference=core_set_aligned.imputed.fasta, search=kmer) \n";
@@ -109,6 +112,7 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(){
 ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
        try {
                abort = false; calledHelp = false;   
+               ReferenceDB* rdb = ReferenceDB::getInstance();
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
@@ -296,12 +300,29 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
                        
+                       string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
+                       m->setProcessors(temp);
+                       convert(temp, processors);
+                       
+                       temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
+                       save = m->isTrue(temp); 
+                       rdb->save = save; 
+                       if (save) { //clear out old references
+                               rdb->clearMemory();     
+                       }
                        
                        string path;
                        it = parameters.find("reference");
                        //user has given a template file
                        if(it != parameters.end()){ 
-                               if (it->second == "self") { templatefile = "self"; }
+                               if (it->second == "self") { 
+                                       templatefile = "self"; 
+                                       if (save) {
+                                               m->mothurOut("[WARNING]: You can't save reference=self, ignoring save."); 
+                                               m->mothurOutEndLine();
+                                               save = false;
+                                       }
+                               }
                                else {
                                        path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
@@ -309,14 +330,34 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
                                        
                                        templatefile = validParameter.validFile(parameters, "reference", true);
                                        if (templatefile == "not open") { abort = true; }
-                                       else if (templatefile == "not found") { templatefile = "";  m->mothurOut("reference is a required parameter for the chimera.slayer command, unless and namefile is given."); m->mothurOutEndLine(); abort = true;  }    
+                                       else if (templatefile == "not found") { //check for saved reference sequences
+                                               if (rdb->referenceSeqs.size() != 0) {
+                                                       templatefile = "saved";
+                                               }else {
+                                                       m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
+                                                       m->mothurOutEndLine();
+                                                       abort = true; 
+                                               }
+                                       }else { if (save) {     rdb->setSavedReference(templatefile);   }       }       
+                               }
+                       }else if (hasName) {  templatefile = "self"; 
+                               if (save) {
+                                       m->mothurOut("[WARNING]: You can't save reference=self, ignoring save."); 
+                                       m->mothurOutEndLine();
+                                       save = false;
                                }
-                       }else if (hasName) {  templatefile = "self"; }
-                       else { templatefile = "";  m->mothurOut("reference is a required parameter for the chimera.slayer command."); m->mothurOutEndLine(); abort = true;  }   
+                       }
+                       else { 
+                               if (rdb->referenceSeqs.size() != 0) {
+                                       templatefile = "saved";
+                               }else {
+                                       m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
+                                       m->mothurOutEndLine();
+                                       templatefile = ""; abort = true; 
+                               } 
+                       }
+                       
                        
-                       string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
-                       m->setProcessors(temp);
-                       convert(temp, processors);
                        
                        temp = validParameter.validFile(parameters, "ksize", false);                    if (temp == "not found") { temp = "7"; }
                        convert(temp, ksize);
@@ -369,6 +410,8 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
                        convert(temp, numwanted);
 
                        if ((search != "blast") && (search != "kmer")) { m->mothurOut(search + " is not a valid search."); m->mothurOutEndLine(); abort = true;  }
+                       
+                       if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
                }
        }
        catch(exception& e) {
@@ -381,7 +424,7 @@ ChimeraSlayerCommand::ChimeraSlayerCommand(string option)  {
 int ChimeraSlayerCommand::execute(){
        try{
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
-               
+                       
                for (int s = 0; s < fastaFileNames.size(); s++) {
                                
                        m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
index 35ff7627552d2e2d15fe4e03adcd9c634e8b8b3c..cfa934abf6655e9605d104300b90e0d590ce179f 100644 (file)
@@ -52,7 +52,7 @@ private:
        int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long int>&);
        #endif
 
-       bool abort, realign, trim, trimera;
+       bool abort, realign, trim, trimera, save;
        string fastafile, templatefile, outputDir, search, namefile;
        int processors, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength;
        float divR;
index 05ddaf6459959ad39bb83720547a1b458c7ae807..b42a491fb4f8af800cb193eb71af2fa6692afe17 100644 (file)
@@ -11,6 +11,7 @@
 #include "deconvolutecommand.h"
 #include "uc.h"
 #include "sequence.hpp"
+#include "referencedb.h"
 
 
 //**********************************************************************************************************************
@@ -40,7 +41,7 @@ vector<string> ChimeraUchimeCommand::setParameters(){
                CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
                CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
                CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
-               
+
                vector<string> myArray;
                for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                return myArray;
@@ -112,7 +113,8 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(){
 //***************************************************************************************************************
 ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
        try {
-               abort = false; calledHelp = false;   
+               abort = false; calledHelp = false; 
+               ReferenceDB* rdb = ReferenceDB::getInstance();
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
@@ -300,7 +302,6 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
                        
-                       
                        string path;
                        it = parameters.find("reference");
                        //user has given a template file
@@ -313,12 +314,29 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
                                        
                                        templatefile = validParameter.validFile(parameters, "reference", true);
                                        if (templatefile == "not open") { abort = true; }
-                                       else if (templatefile == "not found") { templatefile = "";  m->mothurOut("reference is a required parameter for the chimera.uchime command."); m->mothurOutEndLine(); abort = true;  }  
+                                       else if (templatefile == "not found") { //check for saved reference sequences
+                                               if (rdb->getSavedReference() != "") {
+                                                       templatefile = rdb->getSavedReference();
+                                                       m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
+                                               }else {
+                                                       m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
+                                                       m->mothurOutEndLine();
+                                                       abort = true; 
+                                               }
+                                       }
                                }
                        }else if (hasName) {  templatefile = "self"; }
-                       else { templatefile = "";  m->mothurOut("reference is a required parameter for the chimera.uchime command, unless you have a namefile."); m->mothurOutEndLine(); abort = true;  }       
-
-                       
+                       else { 
+                               if (rdb->getSavedReference() != "") {
+                                       templatefile = rdb->getSavedReference();
+                                       m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
+                               }else {
+                                       m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
+                                       m->mothurOutEndLine();
+                                       templatefile = ""; abort = true; 
+                               } 
+                       }
+                               
                        string temp = validParameter.validFile(parameters, "processors", false);        if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
                        convert(temp, processors);
@@ -353,7 +371,8 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option)  {
 
                        temp = validParameter.validFile(parameters, "skipgaps2", false);                                if (temp == "not found") { temp = "t"; }
                        skipgaps2 = m->isTrue(temp); 
-
+                       
+                       if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
                }
        }
        catch(exception& e) {
index 925dedacbf0edee93af4d89b4cf6386169b6cfc0..7770999e779917996a916e87836a03c3a2a8f26b 100644 (file)
 #include "suffixdb.hpp"
 #include "blastdb.hpp"
 #include "distancedb.hpp"
+#include "referencedb.h"
 
 /**************************************************************************************************/
 void Classify::generateDatabaseAndNames(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch)  {            
        try {   
-               taxFile = tfile;
-               readTaxonomy(taxFile);  
+               ReferenceDB* rdb = ReferenceDB::getInstance();
                
-               templateFile = tempFile;        
+               if (tfile == "saved") { tfile = rdb->getSavedTaxonomy(); }
                
-               int start = time(NULL);
+               taxFile = tfile;
+               readTaxonomy(taxFile);  
                int numSeqs = 0;
                
-               m->mothurOut("Generating search database...    "); cout.flush();
-#ifdef USE_MPI 
-                       int pid, processors;
-                       vector<unsigned long int> positions;
-                       int tag = 2001;
-               
-                       MPI_Status status; 
-                       MPI_File inMPI;
-                       MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
-                       MPI_Comm_size(MPI_COMM_WORLD, &processors);
-
-                       //char* inFileName = new char[tempFile.length()];
-                       //memcpy(inFileName, tempFile.c_str(), tempFile.length());
+               if (tempFile == "saved") {
+                       int start = time(NULL);
+                       m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory.");        m->mothurOutEndLine();
+                       
+                       numSeqs = rdb->referenceSeqs.size();
+                       templateFile = rdb->getSavedReference();
+                       tempFile = rdb->getSavedReference();
+                       
+                       bool needToGenerate = true;
+                       string kmerDBName;
+                       if(method == "kmer")                    {       
+                               database = new KmerDB(tempFile, kmerSize);                      
+                               
+                               kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
+                               ifstream kmerFileTest(kmerDBName.c_str());
+                               if(kmerFileTest){       
+                                       bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion());
+                                       if (GoodFile) {  needToGenerate = false;        }
+                               }
+                       }
+                       else if(method == "suffix")             {       database = new SuffixDB(numSeqs);                                                               }
+                       else if(method == "blast")              {       database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch);   }
+                       else if(method == "distance")   {       database = new DistanceDB();    }
+                       else {
+                               m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
+                               m->mothurOutEndLine();
+                               database = new KmerDB(tempFile, 8);
+                       }
+                       
+                       if (needToGenerate) {
+                               for (int k = 0; k < rdb->referenceSeqs.size(); k++) {
+                                       Sequence temp(rdb->referenceSeqs[k].getName(), rdb->referenceSeqs[k].getAligned());
+                                       names.push_back(temp.getName());
+                                       database->addSequence(temp);    
+                               }
+                               database->generateDB();
+                       }else if ((method == "kmer") && (!needToGenerate)) {    
+                               ifstream kmerFileTest(kmerDBName.c_str());
+                               database->readKmerDB(kmerFileTest);     
+                               
+                               for (int k = 0; k < rdb->referenceSeqs.size(); k++) {
+                                       names.push_back(rdb->referenceSeqs[k].getName());
+                               }                       
+                       }       
+                       
+                       database->setNumSeqs(numSeqs);
+                       
+                       //sanity check
+                       bool okay = phyloTree->ErrorCheck(names);
+                       
+                       if (!okay) { m->control_pressed = true; }
                        
-                       char inFileName[1024];
-                       strcpy(inFileName, tempFile.c_str());
+                       m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences and generate the search databases.");m->mothurOutEndLine();  
+                       
+               }else {
+                       
+                       templateFile = tempFile;        
+                       
+                       int start = time(NULL);
+                       
+                       m->mothurOut("Generating search database...    "); cout.flush();
+       #ifdef USE_MPI  
+                               int pid, processors;
+                               vector<unsigned long int> positions;
+                               int tag = 2001;
+                       
+                               MPI_Status status; 
+                               MPI_File inMPI;
+                               MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+                               MPI_Comm_size(MPI_COMM_WORLD, &processors);
+
+                               //char* inFileName = new char[tempFile.length()];
+                               //memcpy(inFileName, tempFile.c_str(), tempFile.length());
+                               
+                               char inFileName[1024];
+                               strcpy(inFileName, tempFile.c_str());
 
-                       MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
-                       //delete inFileName;
+                               MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
+                               //delete inFileName;
 
-                       if (pid == 0) { //only one process needs to scan file
-                               positions = m->setFilePosFasta(tempFile, numSeqs); //fills MPIPos, returns numSeqs
+                               if (pid == 0) { //only one process needs to scan file
+                                       positions = m->setFilePosFasta(tempFile, numSeqs); //fills MPIPos, returns numSeqs
 
-                               //send file positions to all processes
-                               for(int i = 1; i < processors; i++) { 
-                                       MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
-                                       MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                                       //send file positions to all processes
+                                       for(int i = 1; i < processors; i++) { 
+                                               MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+                                               MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+                                       }
+                               }else{
+                                       MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+                                       positions.resize(numSeqs+1);
+                                       MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
                                }
-                       }else{
-                               MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
-                               positions.resize(numSeqs+1);
-                               MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
-                       }
+                               
+                               //create database
+                               if(method == "kmer")                    {       database = new KmerDB(tempFile, kmerSize);                      }
+                               else if(method == "suffix")             {       database = new SuffixDB(numSeqs);                                                               }
+                               else if(method == "blast")              {       database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch);   }
+                               else if(method == "distance")   {       database = new DistanceDB();    }
+                               else {
+                                       m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); m->mothurOutEndLine();
+                                       database = new KmerDB(tempFile, 8);
+                               }
+
+                               //read file 
+                               for(int i=0;i<numSeqs;i++){
+                                       //read next sequence
+                                       int length = positions[i+1] - positions[i];
+                                       char* buf4 = new char[length];
+                                       MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
+                                       
+                                       string tempBuf = buf4;
+                                       if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
+                                       delete buf4;
+                                       istringstream iss (tempBuf,istringstream::in);
+                                       
+                                       Sequence temp(iss);  
+                                       if (temp.getName() != "") {
+                                               if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
+                                               names.push_back(temp.getName());
+                                               database->addSequence(temp);    
+                                       }
+                               }
+                               
+                               database->generateDB();
+                               MPI_File_close(&inMPI);
+                               MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+               #else
                        
-                       //create database
-                       if(method == "kmer")                    {       database = new KmerDB(tempFile, kmerSize);                      }
+                       //need to know number of template seqs for suffixdb
+                       if (method == "suffix") {
+                               ifstream inFASTA;
+                               m->openInputFile(tempFile, inFASTA);
+                               m->getNumSeqs(inFASTA, numSeqs);
+                               inFASTA.close();
+                       }
+
+                       bool needToGenerate = true;
+                       string kmerDBName;
+                       if(method == "kmer")                    {       
+                               database = new KmerDB(tempFile, kmerSize);                      
+                               
+                               kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
+                               ifstream kmerFileTest(kmerDBName.c_str());
+                               if(kmerFileTest){       
+                                       bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion());
+                                       if (GoodFile) {  needToGenerate = false;        }
+                               }
+                       }
                        else if(method == "suffix")             {       database = new SuffixDB(numSeqs);                                                               }
                        else if(method == "blast")              {       database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch);   }
                        else if(method == "distance")   {       database = new DistanceDB();    }
                        else {
-                               m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); m->mothurOutEndLine();
+                               m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
+                               m->mothurOutEndLine();
                                database = new KmerDB(tempFile, 8);
                        }
-
-                       //read file 
-                       for(int i=0;i<numSeqs;i++){
-                               //read next sequence
-                               int length = positions[i+1] - positions[i];
-                               char* buf4 = new char[length];
-                               MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
-                               
-                               string tempBuf = buf4;
-                               if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
-                               delete buf4;
-                               istringstream iss (tempBuf,istringstream::in);
+                       
+                       if (needToGenerate) {
+                               ifstream fastaFile;
+                               m->openInputFile(tempFile, fastaFile);
                                
-                               Sequence temp(iss);  
-                               if (temp.getName() != "") {
+                               while (!fastaFile.eof()) {
+                                       Sequence temp(fastaFile);
+                                       m->gobble(fastaFile);
+                                       
+                                       if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
+                                       
                                        names.push_back(temp.getName());
+                                                               
                                        database->addSequence(temp);    
                                }
-                       }
-                       
-                       database->generateDB();
-                       MPI_File_close(&inMPI);
-                       MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
-       #else
-               
-               //need to know number of template seqs for suffixdb
-               if (method == "suffix") {
-                       ifstream inFASTA;
-                       m->openInputFile(tempFile, inFASTA);
-                       m->getNumSeqs(inFASTA, numSeqs);
-                       inFASTA.close();
-               }
+                               fastaFile.close();
 
-               bool needToGenerate = true;
-               string kmerDBName;
-               if(method == "kmer")                    {       
-                       database = new KmerDB(tempFile, kmerSize);                      
+                               database->generateDB();
+                               
+                       }else if ((method == "kmer") && (!needToGenerate)) {    
+                               ifstream kmerFileTest(kmerDBName.c_str());
+                               database->readKmerDB(kmerFileTest);     
                        
-                       kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
-                       ifstream kmerFileTest(kmerDBName.c_str());
-                       if(kmerFileTest){       
-                               bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion());
-                               if (GoodFile) {  needToGenerate = false;        }
+                               ifstream fastaFile;
+                               m->openInputFile(tempFile, fastaFile);
+                               
+                               while (!fastaFile.eof()) {
+                                       Sequence temp(fastaFile);
+                                       m->gobble(fastaFile);
+                                       
+                                       if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
+                                       names.push_back(temp.getName());
+                               }
+                               fastaFile.close();
                        }
-               }
-               else if(method == "suffix")             {       database = new SuffixDB(numSeqs);                                                               }
-               else if(method == "blast")              {       database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch);   }
-               else if(method == "distance")   {       database = new DistanceDB();    }
-               else {
-                       m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
-                       m->mothurOutEndLine();
-                       database = new KmerDB(tempFile, 8);
-               }
+       #endif  
                
-               if (needToGenerate) {
-                       ifstream fastaFile;
-                       m->openInputFile(tempFile, fastaFile);
+                       database->setNumSeqs(names.size());
                        
-                       while (!fastaFile.eof()) {
-                               Sequence temp(fastaFile);
-                               m->gobble(fastaFile);
+                       //sanity check
+                       bool okay = phyloTree->ErrorCheck(names);
                        
-                               names.push_back(temp.getName());
-                                                       
-                               database->addSequence(temp);    
-                       }
-                       fastaFile.close();
-
-                       database->generateDB();
+                       if (!okay) { m->control_pressed = true; }
                        
-               }else if ((method == "kmer") && (!needToGenerate)) {    
-                       ifstream kmerFileTest(kmerDBName.c_str());
-                       database->readKmerDB(kmerFileTest);     
-               
-                       ifstream fastaFile;
-                       m->openInputFile(tempFile, fastaFile);
-                       
-                       while (!fastaFile.eof()) {
-                               Sequence temp(fastaFile);
-                               m->gobble(fastaFile);
-
-                               names.push_back(temp.getName());
-                       }
-                       fastaFile.close();
+                       m->mothurOut("DONE."); m->mothurOutEndLine();
+                       m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine();
                }
-#endif 
-       
-               database->setNumSeqs(names.size());
-               
-               //sanity check
-               bool okay = phyloTree->ErrorCheck(names);
-               
-               if (!okay) { m->control_pressed = true; }
-               
-               m->mothurOut("DONE."); m->mothurOutEndLine();
-               m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine();
 
        }
        catch(exception& e) {
index 72ca1d00fed7815ef68a8f1dc321c8c911eaeff0..f1b3a0e60186261bff404cfa4e5a1605e1c38f08 100644 (file)
@@ -15,6 +15,7 @@
 #include "knn.h"
 
 
+
 //**********************************************************************************************************************
 vector<string> ClassifySeqsCommand::setParameters(){   
        try {
@@ -34,6 +35,7 @@ vector<string> ClassifySeqsCommand::setParameters(){
                CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "",false,true); parameters.push_back(pcutoff);
                CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pprobs);
                CommandParameter piters("iters", "Number", "", "100", "", "", "",false,true); parameters.push_back(piters);
+               CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
                CommandParameter pnumwanted("numwanted", "Number", "", "10", "", "", "",false,true); parameters.push_back(pnumwanted);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
@@ -63,6 +65,7 @@ string ClassifySeqsCommand::getHelpString(){
 #ifdef USE_MPI
                helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
 #endif
+               helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
                helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
                helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
                helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
@@ -103,6 +106,7 @@ ClassifySeqsCommand::ClassifySeqsCommand(){
 ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
        try {
                abort = false; calledHelp = false;   
+               rdb = ReferenceDB::getInstance();
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
@@ -161,16 +165,6 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                                }
                        }
 
-                       //check for required parameters
-                       templateFileName = validParameter.validFile(parameters, "reference", true);
-                       if (templateFileName == "not found") { 
-                               m->mothurOut("reference is a required parameter for the classify.seqs command."); 
-                               m->mothurOutEndLine();
-                               abort = true; 
-                       }
-                       else if (templateFileName == "not open") { abort = true; }      
-                       
-                                               
                        fastaFileName = validParameter.validFile(parameters, "fasta", false);
                        if (fastaFileName == "not found") {                             
                                //if there is a current fasta file, use it
@@ -250,16 +244,6 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                                if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
                        }
 
-                       
-                       taxonomyFileName = validParameter.validFile(parameters, "taxonomy", true);
-                       if (taxonomyFileName == "not found") { 
-                               m->mothurOut("taxonomy is a required parameter for the classify.seqs command."); 
-                               m->mothurOutEndLine();
-                               abort = true; 
-                       }
-                       else if (taxonomyFileName == "not open") { abort = true; }      
-                       
-                       
                        namefile = validParameter.validFile(parameters, "name", false);
                        if (namefile == "not found") { namefile = "";  }
 
@@ -397,6 +381,41 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option)  {
                        temp = validParameter.validFile(parameters, "ksize", false);            if (temp == "not found"){       temp = "8";                             }
                        convert(temp, kmerSize); 
                        
+                       temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
+                       save = m->isTrue(temp); 
+                       rdb->save = save; 
+                       if (save) { //clear out old references
+                               rdb->clearMemory();     
+                       }
+                       
+                       //this has to go after save so that if the user sets save=t and provides no reference we abort
+                       templateFileName = validParameter.validFile(parameters, "reference", true);
+                       if (templateFileName == "not found") { 
+                               //check for saved reference sequences
+                               if (rdb->referenceSeqs.size() != 0) {
+                                       templateFileName = "saved";
+                               }else {
+                                       m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required for the classify.seqs command."); 
+                                       m->mothurOutEndLine();
+                                       abort = true; 
+                               }
+                       }else if (templateFileName == "not open") { abort = true; }     
+                       else {  if (save) {     rdb->setSavedReference(templateFileName);       }       }
+                       
+                       //this has to go after save so that if the user sets save=t and provides no reference we abort
+                       taxonomyFileName = validParameter.validFile(parameters, "taxonomy", true);
+                       if (taxonomyFileName == "not found") { 
+                               //check for saved reference sequences
+                               if (rdb->wordGenusProb.size() != 0) {
+                                       taxonomyFileName = "saved";
+                               }else {
+                                       m->mothurOut("[ERROR]: You don't have any saved taxonomy information and the taxonomy parameter is a required for the classify.seqs command."); 
+                                       m->mothurOutEndLine();
+                                       abort = true; 
+                               }
+                       }else if (taxonomyFileName == "not open") { abort = true; }     
+                       else {  if (save) {     rdb->setSavedTaxonomy(taxonomyFileName);        }       }
+                       
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
                        convert(temp, processors); 
@@ -471,7 +490,10 @@ int ClassifySeqsCommand::execute(){
                
                        m->mothurOut("Classifying sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
                        
-                       string RippedTaxName = m->getRootName(m->getSimpleName(taxonomyFileName));
+                       string baseTName = taxonomyFileName;
+                       if (taxonomyFileName == "saved") {baseTName = rdb->getSavedTaxonomy();  }
+                       
+                       string RippedTaxName = m->getRootName(m->getSimpleName(baseTName));
                        RippedTaxName = m->getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1));
                        if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); }
                        RippedTaxName +=  "."; 
@@ -636,7 +658,7 @@ int ClassifySeqsCommand::execute(){
                        string group = "";
                        if (groupfile != "") {  group = groupfileNames[s]; }
                        
-                       PhyloSummary taxaSum(taxonomyFileName, group);
+                       PhyloSummary taxaSum(baseTName, group);
                        
                        if (m->control_pressed) { outputTypes.clear();  for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); } delete classify; return 0; }
                
index 9c8dbcac9caf436fcf44b2c416a9e063509561aa..3dbf06ee5bf18c5f6c5ee96e717ec4348c919310 100644 (file)
@@ -13,6 +13,7 @@
 #include "mothur.h"
 #include "command.hpp"
 #include "classify.h"
+#include "referencedb.h"
 
 //KNN and Bayesian methods modeled from algorithms in
 //Naı¨ve Bayesian Classifier for Rapid Assignment of rRNA Sequences 
@@ -60,11 +61,12 @@ private:
        map<string,  vector<string> >::iterator itNames;
        
        Classify* classify;
+       ReferenceDB* rdb;
        
        string fastaFileName, templateFileName, distanceFileName, namefile, search, method, taxonomyFileName, outputDir, groupfile;
        int processors, kmerSize, numWanted, cutoff, iters;
        float match, misMatch, gapOpen, gapExtend;
-       bool abort, probs;
+       bool abort, probs, save;
        
        int driver(linePair*, string, string, string);
        void appendTaxFiles(string, string);
diff --git a/clearmemorycommand.cpp b/clearmemorycommand.cpp
new file mode 100644 (file)
index 0000000..abdc555
--- /dev/null
@@ -0,0 +1,70 @@
+/*
+ *  clearmemorycommand.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 7/6/11.
+ *  Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "clearmemorycommand.h"
+#include "referencedb.h"
+
+//**********************************************************************************************************************
+vector<string> ClearMemoryCommand::setParameters(){    
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClearMemoryCommand", "setParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+string ClearMemoryCommand::getHelpString(){    
+       try {
+               string helpString = "";
+               helpString += "The clear.memory command removes saved reference data from memory.\n";
+               helpString += "The clear.memory command should be in the following format: clear.memory().\n";
+               return helpString;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClearMemoryCommand", "getHelpString");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+
+ClearMemoryCommand::ClearMemoryCommand(string option)  {
+       try {
+               abort = false; calledHelp = false;   
+               
+               //allow user to run help
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClearMemoryCommand", "ClearMemoryCommand");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+
+int ClearMemoryCommand::execute(){
+       try {
+               
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
+               
+               ReferenceDB* rdb = ReferenceDB::getInstance();
+               rdb->clearMemory();
+               
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ClearMemoryCommand", "execute");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************/
diff --git a/clearmemorycommand.h b/clearmemorycommand.h
new file mode 100644 (file)
index 0000000..9017561
--- /dev/null
@@ -0,0 +1,39 @@
+#ifndef CLEARMEMORYCOMMAND_H
+#define CLEARMEMORYCOMMAND_H
+
+/*
+ *  clearmemorycommand.h
+ *  Mothur
+ *
+ *  Created by westcott on 7/6/11.
+ *  Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+
+class ClearMemoryCommand : public Command {
+public:
+       ClearMemoryCommand(string);
+       ClearMemoryCommand(){ abort = true; calledHelp = true; }
+       ~ClearMemoryCommand(){}
+       
+       vector<string> setParameters();
+       string getCommandName()                 { return "clear.memory";                        }
+       string getCommandCategory()             { return "General";     }
+       string getHelpString(); 
+       string getCitation() { return "http://www.mothur.org/wiki/Clear.memory"; }
+       string getDescription()         { return "remove saved references from memory"; }
+       
+       
+       int execute(); 
+       void help() { m->mothurOut(getHelpString()); }  
+       
+       
+private:
+       bool abort;
+       vector<string> outputNames;
+};
+
+#endif
+
index 620536d8628931d8b9f9a5db1132ab04e28f456f..5aaa7b7b120ffdd5b0258b16643ce4bad89be93e 100644 (file)
 #include "getcommandinfocommand.h"
 #include "deuniquetreecommand.h"
 #include "countseqscommand.h"
+#include "clearmemorycommand.h"
 
 /*******************************************************/
 
@@ -243,6 +244,7 @@ CommandFactory::CommandFactory(){
        commands["get.commandinfo"]             = "get.commandinfo";
        commands["deunique.tree"]               = "deunique.tree";
        commands["count.seqs"]                  = "count.seqs";
+       commands["clear.memory"]                = "clear.memory";
        commands["pairwise.seqs"]               = "MPIEnabled";
        commands["pipeline.pds"]                = "MPIEnabled";
        commands["classify.seqs"]               = "MPIEnabled"; 
@@ -415,6 +417,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "get.commandinfo")               {       command = new GetCommandInfoCommand(optionString);                      }
                else if(commandName == "deunique.tree")                 {       command = new DeuniqueTreeCommand(optionString);                        }
                else if(commandName == "count.seqs")                    {       command = new CountSeqsCommand(optionString);                           }
+               else if(commandName == "clear.memory")                  {       command = new ClearMemoryCommand(optionString);                         }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
@@ -553,6 +556,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
                else if(commandName == "get.commandinfo")               {       pipecommand = new GetCommandInfoCommand(optionString);                  }
                else if(commandName == "deunique.tree")                 {       pipecommand = new DeuniqueTreeCommand(optionString);                    }
                else if(commandName == "count.seqs")                    {       pipecommand = new CountSeqsCommand(optionString);                               }
+               else if(commandName == "clear.memory")                  {       pipecommand = new ClearMemoryCommand(optionString);                             }
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
@@ -679,6 +683,7 @@ Command* CommandFactory::getCommand(string commandName){
                else if(commandName == "get.commandinfo")               {       shellcommand = new GetCommandInfoCommand();                     }
                else if(commandName == "deunique.tree")                 {       shellcommand = new DeuniqueTreeCommand();                       }
                else if(commandName == "count.seqs")                    {       shellcommand = new CountSeqsCommand();                          }
+               else if(commandName == "clear.memory")                  {       shellcommand = new ClearMemoryCommand();                        }
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
index 06e199f7a93c96e71ba5cb15f1cafd21c8ca2d8e..046460d0f1d11f345c14ddd99b687fddbafa8ec9 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include "getsharedotucommand.h"
+#include "sharedutilities.h"
 
 //**********************************************************************************************************************
 vector<string> GetSharedOTUCommand::setParameters(){   
@@ -226,6 +227,10 @@ int GetSharedOTUCommand::execute(){
                        userGroups = "unique.";
                        for(int i = 0; i < Groups.size(); i++) {  userGroups += Groups[i] + "-";  }
                        userGroups = userGroups.substr(0, userGroups.length()-1);
+               }else{
+                       //sanity check for group names
+                       SharedUtil util;
+                       util.setGroups(Groups, groupMap->namesOfGroups);
                }
        
                //put groups in map to find easier
index df165b3e4195420a3543c352cd3d02c8df6b7dbc..fa214c476e15f32fb6be29cfda677a064de6e1e1 100644 (file)
--- a/makefile
+++ b/makefile
@@ -14,9 +14,9 @@ USEMPI ?= no
 USEREADLINE ?= yes
 CYGWIN_BUILD ?= no
 USECOMPRESSION ?= no
-MOTHUR_FILES="\"../release\""
-RELEASE_DATE = "\"2/7/2011\""
-VERSION = "\"1.16.0\""
+MOTHUR_FILES="\"Enter_your_default_path_here\""
+RELEASE_DATE = "\"7/5/2011\""
+VERSION = "\"1.20.0\""
 
 # Optimize to level 3:
 CXXFLAGS += -O3
index b551302646c10b813740516eaa65cc46453b4a1d..5838811ac62648bfbf0b6da18ef715d2010b73c6 100644 (file)
 #include "mothur.h"
 #include "engine.hpp"
 #include "mothurout.h"
+#include "referencedb.h"
 
 /**************************************************************************************************/
 
 CommandFactory* CommandFactory::_uniqueInstance = 0;
 MothurOut* MothurOut::_uniqueInstance = 0;
-
+ReferenceDB* ReferenceDB::myInstance = 0;
 /***********************************************************************/
 volatile int ctrlc_pressed = 0;
 void ctrlc_handler ( int sig ) {
index 7bbd315b0420ce2a4d42d759aa244303f4998f9d..f68e45324d0939182be7eb1b4b373b2c502d5c80 100644 (file)
@@ -12,7 +12,6 @@
 
 #include "mothur.h"
 
-
 /***********************************************/
 
 class MothurOut {
@@ -183,7 +182,7 @@ class MothurOut {
                        gui = false;
                        printedHeaders = false;
                        sharedHeaderMode = "";
-               };
+               }
                ~MothurOut();
 
                string logFileName;
index d0d3817b631e7f0e0f4a48ec349ccabd6cdd8894..786316e13434c07f9844cb55be24e12dae84b292 100755 (executable)
@@ -954,7 +954,7 @@ void ProgressExit()
 // Skip exit(), which can be very slow in DEBUG build\r
 // VERY DANGEROUS practice, because it skips global destructors.\r
 // But if you know the rules, you can break 'em, right?\r
-       ExitProcess(0);\r
+       //ExitProcess(0);\r
 #endif\r
        }\r
 \r
@@ -1531,7 +1531,7 @@ static void GetArgsFromFile(const string &FileName, vector<string> &Args)
 \r
 void MyCmdLine(int argc, char **argv)\r
        {\r
-       g_Opts.clear();\r
+       g_Opts.clear(); g_Argv.clear();\r
        static unsigned RecurseDepth = 0;\r
        ++RecurseDepth;\r
 \r
diff --git a/referencedb.cpp b/referencedb.cpp
new file mode 100644 (file)
index 0000000..fb930ef
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ *  referencedb.cpp
+ *  Mothur
+ *
+ *  Created by westcott on 6/29/11.
+ *  Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "referencedb.h"
+
+/******************************************************/
+ReferenceDB* ReferenceDB::getInstance()  {
+        if(myInstance == NULL) {
+               myInstance = new ReferenceDB();
+        }
+        return myInstance;
+ }
+/******************************************************/
+void ReferenceDB::clearMemory()  {
+       referenceSeqs.clear();  
+       setSavedReference("");
+       for(int i = 0; i < wordGenusProb.size(); i++) { wordGenusProb[i].clear(); }
+       wordGenusProb.clear();
+       setSavedTaxonomy("");
+}
+/*******************************************************
+ReferenceDB::~ReferenceDB() { myInstance = NULL; }
+/*******************************************************/
+
diff --git a/referencedb.h b/referencedb.h
new file mode 100644 (file)
index 0000000..2f29247
--- /dev/null
@@ -0,0 +1,48 @@
+#ifndef MYREFERENCEDB_H
+#define MYREFERENCEDB_H
+
+/*
+ *  referencedb.h
+ *  Mothur
+ *
+ *  Created by westcott on 6/29/11.
+ *  Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+
+#include "mothur.h"
+#include "sequence.hpp"
+
+/***********************************************/
+
+class ReferenceDB {
+       
+       public:
+       
+               static ReferenceDB* getInstance();
+               void clearMemory();
+       
+               bool save;
+               vector<Sequence> referenceSeqs;
+               vector< vector<float> > wordGenusProb;
+       
+               string getSavedReference()                      { return referencefile;         }
+               void setSavedReference(string p)        { referencefile = p;            }
+               string getSavedTaxonomy()                       { return taxonomyfile;          }
+               void setSavedTaxonomy(string p)         { taxonomyfile = p;                     }
+       
+       private:
+       
+               static ReferenceDB* myInstance;
+               ReferenceDB() { referencefile = ""; taxonomyfile = ""; save = false; }
+               ReferenceDB(const ReferenceDB&){}// Disable copy constructor
+               void operator=(const ReferenceDB&){} // Disable assignment operator
+               ~ReferenceDB(){ myInstance = 0; }
+       
+               string referencefile, taxonomyfile;     
+};
+/***********************************************/
+
+#endif
+
index 007ac93df67d1e93371abd1898c9cce9c0823233..fc027b9261822880ac0afa1cc99929791a309df4 100644 (file)
@@ -13,6 +13,7 @@
 #include "refchimeratest.h"
 #include "filterseqscommand.h"
 
+
 //**********************************************************************************************************************
 vector<string> SeqErrorCommand::setParameters(){       
        try {
@@ -24,6 +25,7 @@ vector<string> SeqErrorCommand::setParameters(){
                CommandParameter pignorechimeras("ignorechimeras", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pignorechimeras);
                CommandParameter pthreshold("threshold", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pthreshold);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
+               CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                
@@ -41,6 +43,15 @@ string SeqErrorCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The seq.error command reads a query alignment file and a reference alignment file and creates .....\n";
+               helpString += "The fasta parameter...\n";
+               helpString += "The reference parameter...\n";
+               helpString += "The qfile parameter...\n";
+               helpString += "The report parameter...\n";
+               helpString += "The name parameter...\n";
+               helpString += "The ignorechimeras parameter...\n";
+               helpString += "The threshold parameter...\n";
+               helpString += "The processors parameter...\n";
+               helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
                helpString += "Example seq.error(...).\n";
                helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
                helpString += "For more details please check out the wiki http://www.mothur.org/wiki/seq.error .\n";
@@ -55,6 +66,7 @@ string SeqErrorCommand::getHelpString(){
 SeqErrorCommand::SeqErrorCommand(){    
        try {
                abort = true; calledHelp = true; 
+               setParameters();
                vector<string> tempOutNames;
                outputTypes["error.summary"] = tempOutNames;
                outputTypes["error.seq"] = tempOutNames;
@@ -77,6 +89,7 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
        try {
                
                abort = false; calledHelp = false;   
+               rdb = ReferenceDB::getInstance();
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
@@ -165,11 +178,6 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                        }
                        else if (queryFileName == "not open") { abort = true; } 
                        else { m->setFastaFile(queryFileName); }
-                       
-                       referenceFileName = validParameter.validFile(parameters, "reference", true);
-                       if (referenceFileName == "not found") { m->mothurOut("reference is a required parameter for the seq.error command."); m->mothurOutEndLine(); abort = true; }
-                       else if (referenceFileName == "not open") { abort = true; }     
-                       
 
                        //check for optional parameters
                        namesFileName = validParameter.validFile(parameters, "name", true);
@@ -203,6 +211,28 @@ SeqErrorCommand::SeqErrorCommand(string option)  {
                        temp = validParameter.validFile(parameters, "threshold", false);        if (temp == "not found") { temp = "1.00"; }
                        convert(temp, threshold);  
                        
+                       temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
+                       save = m->isTrue(temp); 
+                       rdb->save = save; 
+                       if (save) { //clear out old references
+                               rdb->clearMemory();     
+                       }
+                       
+                       //this has to go after save so that if the user sets save=t and provides no reference we abort
+                       referenceFileName = validParameter.validFile(parameters, "reference", true);
+                       if (referenceFileName == "not found") { 
+                               //check for saved reference sequences
+                               if (rdb->referenceSeqs.size() != 0) {
+                                       referenceFileName = "saved";
+                               }else {
+                                       m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
+                                       m->mothurOutEndLine();
+                                       abort = true; 
+                               }
+                       }else if (referenceFileName == "not open") { abort = true; }    
+                       else {  if (save) {     rdb->setSavedReference(referenceFileName);      }       }
+                       
+                       
                        temp = validParameter.validFile(parameters, "ignorechimeras", false);   if (temp == "not found") { temp = "T"; }
                        ignoreChimeras = m->isTrue(temp);
                        
@@ -728,32 +758,59 @@ int SeqErrorCommand::driver(string filename, string qFileName, string rFileName,
 
 void SeqErrorCommand::getReferences(){
        try {
-               
-               ifstream referenceFile;
-               m->openInputFile(referenceFileName, referenceFile);
-               
                int numAmbigSeqs = 0;
                
                int maxStartPos = 0;
                int minEndPos = 100000;
                
-               while(referenceFile){
-                       Sequence currentSeq(referenceFile);
-                       int numAmbigs = currentSeq.getAmbigBases();
-                       if(numAmbigs > 0){      numAmbigSeqs++; }
+               if (referenceFileName == "saved") {
+                       int start = time(NULL);
+                       m->mothurOutEndLine();  m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory.");        m->mothurOutEndLine();
+                       
+                       for (int i = 0; i < rdb->referenceSeqs.size(); i++) {
+                               int numAmbigs = rdb->referenceSeqs[i].getAmbigBases();
+                               if(numAmbigs > 0){      numAmbigSeqs++; }
+                               
+                               //                      int startPos = rdb->referenceSeqs[i].getStartPos();
+                               //                      if(startPos > maxStartPos)      {       maxStartPos = startPos; }
+                               //
+                               //                      int endPos = rdb->referenceSeqs[i].getEndPos();
+                               //                      if(endPos < minEndPos)          {       minEndPos = endPos;             }                               
+                               
+                               referenceSeqs.push_back(rdb->referenceSeqs[i]);
+                       }
+                       referenceFileName = rdb->getSavedReference();
+                       
+                       m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences.");m->mothurOutEndLine();  
+               
+               }else {
+                       int start = time(NULL);
+
+                       ifstream referenceFile;
+                       m->openInputFile(referenceFileName, referenceFile);
                        
-//                     int startPos = currentSeq.getStartPos();
-//                     if(startPos > maxStartPos)      {       maxStartPos = startPos; }
-//
-//                     int endPos = currentSeq.getEndPos();
-//                     if(endPos < minEndPos)          {       minEndPos = endPos;             }
-                       referenceSeqs.push_back(currentSeq);
+                       while(referenceFile){
+                               Sequence currentSeq(referenceFile);
+                               int numAmbigs = currentSeq.getAmbigBases();
+                               if(numAmbigs > 0){      numAmbigSeqs++; }
+                               
+       //                      int startPos = currentSeq.getStartPos();
+       //                      if(startPos > maxStartPos)      {       maxStartPos = startPos; }
+       //
+       //                      int endPos = currentSeq.getEndPos();
+       //                      if(endPos < minEndPos)          {       minEndPos = endPos;             }
+                               referenceSeqs.push_back(currentSeq);
                                
-                       m->gobble(referenceFile);
+                               if (rdb->save) { rdb->referenceSeqs.push_back(currentSeq); }
+                                       
+                               m->gobble(referenceFile);
+                       }
+                       referenceFile.close();
+                       
+                       m->mothurOut("It took " + toString(time(NULL) - start) + " to read " + toString(referenceSeqs.size()) + " sequences.");m->mothurOutEndLine();  
                }
-               referenceFile.close();
+               
                numRefs = referenceSeqs.size();
-
                
                for(int i=0;i<numRefs;i++){
                        referenceSeqs[i].padToPos(maxStartPos);
@@ -762,7 +819,7 @@ void SeqErrorCommand::getReferences(){
                
                if(numAmbigSeqs != 0){
                        m->mothurOut("Warning: " + toString(numAmbigSeqs) + " reference sequences have ambiguous bases, these bases will be ignored\n");
-               }               
+               }       
                
        }
        catch(exception& e) {
index e467df11c99d735a482328c7dbe64aa623192a95..590c554b25ec761a42ecc9593d3b6376b725f15e 100644 (file)
@@ -13,6 +13,7 @@
 #include "mothur.h"
 #include "command.hpp"
 #include "sequence.hpp"
+#include "referencedb.h"
 
 struct Compare {
        int AA, AT, AG, AC,     TA, TT, TG, TC, GA, GT, GG, GC, CA, CT, CG, CC, NA, NT, NG, NC, Ai, Ti, Gi, Ci, Ni, dA, dT, dG, dC;
@@ -58,6 +59,7 @@ public:
        
 private:
        bool abort;
+       ReferenceDB* rdb;
        
        struct linePair {
                unsigned long int start;
@@ -86,7 +88,7 @@ private:
 
        string queryFileName, referenceFileName, qualFileName, reportFileName, namesFileName, outputDir;
        double threshold;
-       bool ignoreChimeras;
+       bool ignoreChimeras, save;
        int numRefs, processors;
        int maxLength, totalBases, totalMatches;
        //ofstream errorSummaryFile, errorSeqFile;
index 0465c720a6645c1ecd5a1d29155cbb0855748436..18bd8bb4fbb8cd7cde71c4044901ff4054e55f94 100644 (file)
@@ -210,7 +210,6 @@ int uchime_main(int argc, char *argv[])
                        if((QuerySeqCount) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(QuerySeqCount) + ", " + toString(HitCount) + " chimeras found."); m->mothurOutEndLine();                }\r
                }\r
 \r
-\r
        Log("\n");\r
        Log("%s: %u/%u chimeras found (%.1f%%)\n",\r
          opt_input.c_str(), HitCount, QuerySeqCount, Pct(HitCount, QuerySeqCount));\r