modified sequence class to read fasta files with comments. this required modification...

author westcott <westcott>

Wed, 2 Dec 2009 20:11:35 +0000 (20:11 +0000)

committer westcott <westcott>

Wed, 2 Dec 2009 20:11:35 +0000 (20:11 +0000)
author westcott <westcott>
Wed, 2 Dec 2009 20:11:35 +0000 (20:11 +0000)
committer westcott <westcott>
Wed, 2 Dec 2009 20:11:35 +0000 (20:11 +0000)
diff --git a/aligncommand.cpp b/aligncommand.cpp

index 57eebb0eb096ef8cc554ba4f0eaee8f470c5b6d2..55f0b821b1392f40b1f6ecd2c9414a6f8a04fff2 100644 (file)
--- a/aligncommand.cpp
+++ b/aligncommand.cpp
@@ -39,7 +39,7 @@ AlignCommand::AlignCommand(string option){
                 else {
                         
                         //valid paramters for this command
-                       string AlignArray[] =  {"template","candidate","search","ksize","align","match","mismatch","gapopen","gapextend", "processors"};
+                       string AlignArray[] =  {"template","candidate","search","ksize","align","match","mismatch","gapopen","gapextend", "processors","flip","threshold"};
                         vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
                         
                         OptionParser parser(option);
@@ -90,6 +90,12 @@ AlignCommand::AlignCommand(string option){
                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
                         convert(temp, processors); 
                         
+                       temp = validParameter.validFile(parameters, "flip", false);                     if (temp == "not found"){       temp = "f";                             }
+                       flip = isTrue(temp); 
+                       
+                       temp = validParameter.validFile(parameters, "threshold", false);        if (temp == "not found"){       temp = "0.80";                  }
+                       convert(temp, threshold); 
+                       
                         search = validParameter.validFile(parameters, "search", false);         if (search == "not found"){     search = "kmer";                }
                         
                         align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
@@ -127,6 +133,10 @@ void AlignCommand::help(){
                 mothurOut("The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n");
                 mothurOut("The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -1.0.\n");
                 mothurOut("The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -2.0.\n");
+               mothurOut("The flip parameter is used to specify whether or not you want mothur to try the reverse compement if a sequence falls below the threshold.  The default is false.\n");
+               mothurOut("The threshold is used to specify a cutoff at which an alignment is deemed 'bad' and the reverse complement may be tried. \n");
+               mothurOut("If the flip parameter is set to true the reverse complement of the sequence is aligned and the better alignment is reported.\n");
+               mothurOut("The default for the threshold parameter is 0.80, meaning at least 80% of the bases must remain or the sequence is reported as potentially reversed.\n");
                 mothurOut("The align.seqs command should be in the following format: \n");
                 mothurOut("align.seqs(template=yourTemplateFile, candidate=yourCandidateFile, align=yourAlignmentMethod, search=yourSearchmethod, ksize=yourKmerSize, match=yourMatchBonus, mismatch=yourMismatchpenalty, gapopen=yourGapopenPenalty, gapextend=yourGapExtendPenalty) \n");
                 mothurOut("Example align.seqs(candidate=candidate.fasta, template=core.filtered, align=kmer, search=gotoh, ksize=8, match=2.0, mismatch=3.0, gapopen=-2.0, gapextend=-1.0)\n");
@@ -162,6 +172,7 @@ int AlignCommand::execute(){
                 
                 string alignFileName = candidateFileName.substr(0,candidateFileName.find_last_of(".")+1) + "align";
                 string reportFileName = candidateFileName.substr(0,candidateFileName.find_last_of(".")+1) + "align.report";
+               string accnosFileName = candidateFileName.substr(0,candidateFileName.find_last_of(".")+1) + "flip.accnos";
                 
                 int numFastaSeqs = 0;
                 int start = time(NULL);
@@ -175,8 +186,17 @@ int AlignCommand::execute(){
                         
                         lines.push_back(new linePair(0, numFastaSeqs));
                 
-                       driver(lines[0], alignFileName, reportFileName);
+                       driver(lines[0], alignFileName, reportFileName, accnosFileName);
                         
+                       //delete accnos file if its blank else report to user
+                       if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  }
+                       else { 
+                               mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
+                               if (!flip) {
+                                       mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); 
+                               }else{  mothurOut(" If the reverse compliment proved to be better it was reported.");  }
+                               mothurOutEndLine();
+                       }
                 }
                 else{
                         vector<int> positions;
@@ -205,11 +225,35 @@ int AlignCommand::execute(){
                                 }
                                 lines.push_back(new linePair(startPos, numSeqsPerProcessor));
                         }
-                       createProcesses(alignFileName, reportFileName); 
+                       createProcesses(alignFileName, reportFileName, accnosFileName); 
                         
                         rename((alignFileName + toString(processIDS[0]) + ".temp").c_str(), alignFileName.c_str());
                         rename((reportFileName + toString(processIDS[0]) + ".temp").c_str(), reportFileName.c_str());
                         
+                       vector<string> nonBlankAccnosFiles;
+                       //delete blank accnos files generated with multiple processes
+                       for(int i=0;i<processors;i++){  
+                               if (!(isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
+                                       nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp");
+                               }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());  }
+                       }
+                       
+                       //append accnos files
+                       if (nonBlankAccnosFiles.size() != 0) { 
+                               rename(nonBlankAccnosFiles[0].c_str(), accnosFileName.c_str());
+                               
+                               for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
+                                       appendAlignFiles(nonBlankAccnosFiles[h], accnosFileName);
+                                       remove(nonBlankAccnosFiles[h].c_str());
+                               }
+                               mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
+                               if (!flip) {
+                                       mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); 
+                               }else{  mothurOut(" If the reverse compliment proved to be better it was reported.");  }
+                               mothurOutEndLine();
+                       }
+                       
+                       //append other files
                         for(int i=1;i<processors;i++){
                                 appendAlignFiles((alignFileName + toString(processIDS[i]) + ".temp"), alignFileName);
                                 remove((alignFileName + toString(processIDS[i]) + ".temp").c_str());
@@ -227,9 +271,22 @@ int AlignCommand::execute(){
                 
                 lines.push_back(new linePair(0, numFastaSeqs));
                 
-               driver(lines[0], alignFileName, reportFileName);
+               driver(lines[0], alignFileName, reportFileName, accnosFileName);
+               
+               //delete accnos file if its blank else report to user
+               if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  }
+               else { 
+                       mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
+                       if (!flip) {
+                                mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); 
+                       }else{  mothurOut(" If the reverse compliment proved to be better it was reported.");  }
+                       mothurOutEndLine();
+               }
+               
  #endif
                 
+               
+               
                 mothurOut("It took " + toString(time(NULL) - start) + " secs to align " + toString(numFastaSeqs) + " sequences.");
                 mothurOutEndLine();
                 mothurOutEndLine();
@@ -244,10 +301,14 @@ int AlignCommand::execute(){
  
  //**********************************************************************************************************************
  
-int AlignCommand::driver(linePair* line, string alignFName, string reportFName){
+int AlignCommand::driver(linePair* line, string alignFName, string reportFName, string accnosFName){
         try {
                 ofstream alignmentFile;
                 openOutputFile(alignFName, alignmentFile);
+               
+               ofstream accnosFile;
+               openOutputFile(accnosFName, accnosFile);
+               
                 NastReport report(reportFName);
                 
                 ifstream inFASTA;
@@ -258,36 +319,83 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName){
                 for(int i=0;i<line->numSeqs;i++){
                         
                         Sequence* candidateSeq = new Sequence(inFASTA);
-
-                       if (candidateSeq->getUnaligned().length() > alignment->getnRows()) {
-                               alignment->resize(candidateSeq->getUnaligned().length()+1);
-                       }
-       
-                       report.setCandidate(candidateSeq);
-       
-                       Sequence temp = templateDB->findClosestSequence(candidateSeq);
-
-                       Sequence* templateSeq = &temp;
-
-                       report.setTemplate(templateSeq);
-                       report.setSearchParameters(search, templateDB->getSearchScore());
+                       int origNumBases = candidateSeq->getNumBases();
+                       string originalUnaligned = candidateSeq->getUnaligned();
+                       int numBasesNeeded = origNumBases * threshold;
+                       
+                       if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
+                               if (candidateSeq->getUnaligned().length() > alignment->getnRows()) {
+                                       alignment->resize(candidateSeq->getUnaligned().length()+1);
+                               }
+                                                               
+                               Sequence temp = templateDB->findClosestSequence(candidateSeq);
+                               Sequence* templateSeq = &temp;
                                 
-                       Nast nast(alignment, candidateSeq, templateSeq);
-
-                       report.setAlignmentParameters(align, alignment);
-       
-                       report.setNastParameters(nast);
-       
-                       alignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl;
+                               float searchScore = templateDB->getSearchScore();
+                                                               
+                               Nast* nast = new Nast(alignment, candidateSeq, templateSeq);
+                               Sequence* copy;
                                 
-                       report.print();
+                               Nast* nast2;
+                               bool needToDeleteCopy = false;  //this is needed in case you have you enter the ifs below
+                                                                                               //since nast does not make a copy of hte sequence passed, and it is used by the reporter below
+                                                                                               //you can't delete the copy sequence til after you report, but you may choose not to create it in the first place
+                                                                                               //so this bool tells you if you need to delete it
+                                                                                               
+                               //if there is a possibility that this sequence should be reversed
+                               if (candidateSeq->getNumBases() < numBasesNeeded) {
+                                       
+                                       string wasBetter = "";
+                                       //if the user wants you to try the reverse
+                                       if (flip) {
+                                               //get reverse compliment
+                                               copy = new Sequence(candidateSeq->getName(), originalUnaligned);
+                                               copy->reverseComplement();
+                                               
+                                               //rerun alignment
+                                               Sequence temp2 = templateDB->findClosestSequence(copy);
+                                               Sequence* templateSeq2 = &temp2;
+                                               
+                                               searchScore = templateDB->getSearchScore();
+                                               
+                                               nast2 = new Nast(alignment, copy, templateSeq2);
                         
+                                               //check if any better
+                                               if (copy->getNumBases() > candidateSeq->getNumBases()) {
+                                                       candidateSeq->setAligned(copy->getAligned());  //use reverse compliments alignment since its better
+                                                       templateSeq = templateSeq2; 
+                                                       delete nast;
+                                                       nast = nast2;
+                                                       needToDeleteCopy = true;
+                                               }else{  
+                                                       wasBetter = "\treverse complement did NOT produce a better alignment, please check sequence.";
+                                                       delete nast2;
+                                                       delete copy;    
+                                               }
+                                       }
+                                       
+                                       //create accnos file with names
+                                       accnosFile << candidateSeq->getName() << wasBetter << endl;
+                               }
+                               
+                               report.setCandidate(candidateSeq);
+                               report.setTemplate(templateSeq);
+                               report.setSearchParameters(search, searchScore);
+                               report.setAlignmentParameters(align, alignment);
+                               report.setNastParameters(*nast);
+       
+                               alignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl;
+                               
+                               report.print();
+                               delete nast;
+                               if (needToDeleteCopy) {   delete copy;   }
+                       }
                         delete candidateSeq;
-               
                 }
                 
                 alignmentFile.close();
                 inFASTA.close();
+               accnosFile.close();
                 
                 return 1;
         }
@@ -299,7 +407,7 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName){
  
  /**************************************************************************************************/
  
-void AlignCommand::createProcesses(string alignFileName, string reportFileName) {
+void AlignCommand::createProcesses(string alignFileName, string reportFileName, string accnosFName) {
         try {
  #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                 int process = 0;
@@ -313,7 +421,7 @@ void AlignCommand::createProcesses(string alignFileName, string reportFileName)
                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                 process++;
                         }else if (pid == 0){
-                               driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp");
+                               driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp", accnosFName + toString(getpid()) + ".temp");
                                 exit(0);
                         }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
                 }
@@ -354,8 +462,7 @@ void AlignCommand::appendAlignFiles(string temp, string filename) {
                 exit(1);
         }
  }
-
-/**************************************************************************************************/
+//**********************************************************************************************************************
  
  void AlignCommand::appendReportFiles(string temp, string filename) {
         try{
diff --git a/aligncommand.h b/aligncommand.h

index c3c244138a7b2457dd20ba5bedc152b88f0e0e2b..f7d59b0206c989558f0e11163d1f7abddcf9a784 100644 (file)
--- a/aligncommand.h
+++ b/aligncommand.h
@@ -36,16 +36,16 @@ private:
         AlignmentDB* templateDB;
         Alignment* alignment;
         
-       int driver(linePair*, string, string);
-       void createProcesses(string, string);
+       int driver(linePair*, string, string, string);
+       void createProcesses(string, string, string);
         void appendAlignFiles(string, string); 
         void appendReportFiles(string, string);
         
         string candidateFileName, templateFileName, distanceFileName, search, align;
-       float match, misMatch, gapOpen, gapExtend;
+       float match, misMatch, gapOpen, gapExtend, threshold;
         int processors, kmerSize;
         
-       bool abort;
+       bool abort, flip;
  };
  
  #endif
diff --git a/alignmentdb.cpp b/alignmentdb.cpp

index 6b05989d40a0184540a9397fb75efd79e49a14cc..e4509521e7383d305151170e2fcc96da1c2f3e88 100644 (file)
--- a/alignmentdb.cpp
+++ b/alignmentdb.cpp
@@ -25,14 +25,13 @@ AlignmentDB::AlignmentDB(string fastaFileName, string method, int kmerSize, floa
                 mothurOut("Reading in the " + fastaFileName + " template sequences...\t");      cout.flush();
                 
                 while (!fastaFile.eof()) {
-                       Sequence temp(fastaFile);
+                       Sequence temp(fastaFile);  gobble(fastaFile);
                         
-                       templateSequences.push_back(temp);
-                       
-                       //save longest base
-                       if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length(); }
-                       
-                       gobble(fastaFile);
+                       if (temp.getName() != "") {
+                               templateSequences.push_back(temp);
+                               //save longest base
+                               if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length(); }
+                       }
                 }
                 
                 numSeqs = templateSequences.size();
diff --git a/chimera.cpp b/chimera.cpp

index 8851b755a09a0b030418e7881d09caedb4753584..db153dde6e90b285f7592628789346f3bb696fb6 100644 (file)
--- a/chimera.cpp
+++ b/chimera.cpp
@@ -94,9 +94,9 @@ vector<Sequence*> Chimera::readSeqs(string file) {
                 //read in seqs and store in vector
                 while(!in.eof()){
                         
-                       Sequence* current = new Sequence(in);
-                       container.push_back(current);
-                       gobble(in);
+                       Sequence* current = new Sequence(in);  gobble(in);
+                       
+                       if (current->getName() != "") {  container.push_back(current);  }
                 }
                 
                 in.close();
diff --git a/chimeraseqscommand.cpp b/chimeraseqscommand.cpp

index 51fc0e7b3a93265eee63092adfe5ea75cdb87f6b..bf0ffa66009ba7cdc02f46ff4d2c784e58a45a1f 100644 (file)
--- a/chimeraseqscommand.cpp
+++ b/chimeraseqscommand.cpp
@@ -156,7 +156,7 @@ void ChimeraSeqsCommand::help(){
                 mothurOut("The ksize parameter allows you to input kmersize. \n");
                 mothurOut("The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence.\n");
                 mothurOut("The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n");
-               mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method.\n");
+               //mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method.\n");
                 mothurOut("NOT ALL PARAMETERS ARE USED BY ALL METHODS. Please look below for method specifics.\n\n");
                 mothurOut("Details for each method: \n"); 
                 mothurOut("\tpintail: \n"); 
@@ -168,8 +168,8 @@ void ChimeraSeqsCommand::help(){
                 mothurOut("\t\tparameters: fasta=required, template=required, filter=F, mask=no mask, processors=1, window=10% of length, numwanted=20\n"); 
                 mothurOut("\tchimeracheck: \n"); 
                 mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, ksize=7, svg=F, name=none\n\n"); 
-               mothurOut("\tchimeraslayer: \n"); 
-               mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, mask=no mask, numwanted=10, match=5, mismatch=-4, divergence=1.0, minsim=90, parents=5, iters=1000, window=100. \n\n"); 
+               //mothurOut("\tchimeraslayer: \n"); 
+               //mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, mask=no mask, numwanted=10, match=5, mismatch=-4, divergence=1.0, minsim=90, parents=5, iters=1000, window=100. \n\n"); 
                 mothurOut("The chimera.seqs command should be in the following format: \n");
                 mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n");
                 mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, method=bellerophon, window=200) \n");
diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp

index d2f8fc748f3064bd03a525ce6d9e6bb7895d55fe..2b76e5083d1e08d2d9dc29382688081cf462b953 100644 (file)
--- a/classifyseqscommand.cpp
+++ b/classifyseqscommand.cpp
@@ -349,14 +349,15 @@ int ClassifySeqsCommand::driver(linePair* line, string taxFName, string tempTFNa
                 for(int i=0;i<line->numSeqs;i++){
                         
                         Sequence* candidateSeq = new Sequence(inFASTA);
-
-                       taxonomy = classify->getTaxonomy(candidateSeq);
                         
-                       if (taxonomy != "bad seq") {
-                               outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
-                               outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
-                       }
-                                                       
+                       if (candidateSeq->getName() != "") {
+                               taxonomy = classify->getTaxonomy(candidateSeq);
+                               
+                               if (taxonomy != "bad seq") {
+                                       outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
+                                       outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
+                               }
+                       }                               
                         delete candidateSeq;
                         
                         if((i+1) % 100 == 0){
diff --git a/commandfactory.cpp b/commandfactory.cpp

index 635cff032a645b7e69c168c7b461eaca71bb364d..a3858639a1f78d633e9c1537054978bac4e4d499 100644 (file)
--- a/commandfactory.cpp
+++ b/commandfactory.cpp
@@ -181,7 +181,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                 else if(commandName == "merge.files")                   {       command = new MergeFileCommand(optionString);                   }
                 else if(commandName == "system")                                {       command = new SystemCommand(optionString);                              }
                 else if(commandName == "align.check")                   {       command = new AlignCheckCommand(optionString);                  }
-               else if(commandName == "get.sharedseqs")                        {       command = new GetSharedOTUCommand(optionString);                }
+               else if(commandName == "get.sharedseqs")                {       command = new GetSharedOTUCommand(optionString);                }
                 else if(commandName == "get.otulist")                   {       command = new GetListCountCommand(optionString);                }
                 else if(commandName == "hcluster")                              {       command = new HClusterCommand(optionString);                    }
                 else if(commandName == "classify.seqs")                 {       command = new ClassifySeqsCommand(optionString);                }
@@ -195,7 +195,22 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                 exit(1);
         }
  }
+/***********************************************************/
+//This function is used to interrupt a command
+Command* CommandFactory::getCommand(){
+       try {
+               delete command;   //delete the old command
  
+               string s = "";
+           command = new NoCommand(s);
+       
+               return command;
+       }
+       catch(exception& e) {
+               errorOut(e, "CommandFactory", "getCommand");
+               exit(1);
+       }
+}
  /***********************************************************************/
  bool CommandFactory::isValidCommand(string command) {
         try {   
diff --git a/commandfactory.hpp b/commandfactory.hpp

index 0cede38b05904dd9252f8987c8782f32f04cc297..55f70eaabf1440488c46679472dd86c36693b088 100644 (file)
--- a/commandfactory.hpp
+++ b/commandfactory.hpp
@@ -19,6 +19,7 @@ public:
         CommandFactory();
         ~CommandFactory();
         Command* getCommand(string, string);
+       Command* getCommand();
         bool isValidCommand(string);
         void printCommands(ostream&);
  
diff --git a/engine.cpp b/engine.cpp

index 0a0f5a0ce7f0f4f0a120509e2c3d5c0e51471ecb..371bda8fb83f2b8ceb4793c49f067ed5987f2094 100644 (file)
--- a/engine.cpp
+++ b/engine.cpp
@@ -20,14 +20,11 @@ InteractEngine::InteractEngine(string path){
  
         globaldata = GlobalData::getInstance();
         globaldata->argv = path;
-       
-       
  }
  
  /***********************************************************************/
  
-InteractEngine::~InteractEngine(){
-       }
+InteractEngine::~InteractEngine(){}
  
  /***********************************************************************/
  //This function allows the user to input commands one line at a time until they quit.
@@ -44,9 +41,47 @@ bool InteractEngine::getInput(){
                         
                         mothurOutEndLine();
                         mothurOut("mothur > ");
+                       
+                       //get input char by char so you can check for use of arrow keys
+                       //if (_kbhit()){
+                       //      _getch(); // edit : if you want to check the arrow-keys you must call getch twice because special-keys have two values
+                       //      return _getch();
+                       //}
+                       //return 0; // if no key is pressed
+                       //setbuf(stdin, NULL); //no buffering
+/*if(ch==0)
+{
+ch=getch();
+if(ch==72) cout<<"up";
+else if(ch==75) cout<<"left";
+else if(ch==77) cout<<"right";
+else if(ch==80) cout<<"down";
+cout<<endl;
+}
+else break;
+}
+delay(2000);
+return 0;
+}*/
+                       
+                       //int letter = 0;
+                       //while ((letter != 10) && (letter != 13)) {
+                       //      letter = getch();
+                               
+                       //      cout << "char code = " << letter << endl;
+                               
+                       //      input += char(letter);
+                       //}
+               //      input = input.substr(0, input.length()-1); //cut off newline char
+               
+       //cout << input << endl;                
+                       
                         getline(cin, input);
                         if (cin.eof()) { input = "quit()"; }
                         
+                       //save command
+                       //previousInputs.push_back(input);
+                       
                         mothurOutJustToLog(input);
                         mothurOutEndLine();
                         
@@ -55,13 +90,13 @@ bool InteractEngine::getInput(){
                         
                         CommandOptionParser parser(input);
                         commandName = parser.getCommandString();
+       //cout << " command = " << commandName << endl;
                         options = parser.getOptionString();
                         
                         if (commandName != "") {
                         
                                 //executes valid command
-                               CommandFactory cFactory;
-                               Command* command = cFactory.getCommand(commandName, options);
+                               Command* command = cFactory->getCommand(commandName, options);
                                 quitCommandCalled = command->execute();
                                 
                         }else {
@@ -76,7 +111,17 @@ bool InteractEngine::getInput(){
                 exit(1);
         }
  }
-
+/***********************************************************************/
+void Engine::terminateCommand(int dummy)  {
+       try {
+               mothurOut("Stopping command."); mothurOutEndLine();
+               cFactory->getCommand();  //terminates command
+       }
+       catch(exception& e) {
+               errorOut(e, "InteractEngine", "terminateCommand");
+               exit(1);
+       }
+}
  /***********************************************************************/
  //This function opens the batchfile to be used by BatchEngine::getInput.
  BatchEngine::BatchEngine(string path, string batchFileName){
@@ -95,8 +140,7 @@ BatchEngine::BatchEngine(string path, string batchFileName){
  
  /***********************************************************************/
  
-BatchEngine::~BatchEngine(){
-       }
+BatchEngine::~BatchEngine(){   }
  
  /***********************************************************************/
  //This Function allows the user to run a batchfile containing several commands on Dotur
@@ -138,8 +182,7 @@ bool BatchEngine::getInput(){
                                 if (commandName != "") {
  
                                         //executes valid command
-                                       CommandFactory cFactory;
-                                       Command* command = cFactory.getCommand(commandName, options);
+                                       Command* command = cFactory->getCommand(commandName, options);
                                         quitCommandCalled = command->execute();
                                 }else {         
                                         mothurOut("Invalid."); 
@@ -172,8 +215,6 @@ ScriptEngine::ScriptEngine(string path, string commandString){
  
                 globaldata->argv = path;
                 
-               
-       
         }
         catch(exception& e) {
                 errorOut(e, "ScriptEngine", "ScriptEngine");
@@ -183,8 +224,7 @@ ScriptEngine::ScriptEngine(string path, string commandString){
  
  /***********************************************************************/
  
-ScriptEngine::~ScriptEngine(){
-       }
+ScriptEngine::~ScriptEngine(){         }
  
  /***********************************************************************/
  //This Function allows the user to run a batchfile containing several commands on mothur
@@ -221,8 +261,7 @@ bool ScriptEngine::getInput(){
                         if (commandName != "") {
  
                                 //executes valid command
-                               CommandFactory cFactory;
-                               Command* command = cFactory.getCommand(commandName, options);
+                               Command* command = cFactory->getCommand(commandName, options);
                                 quitCommandCalled = command->execute();
                         }else {         
                                 mothurOut("Invalid."); 
diff --git a/engine.hpp b/engine.hpp

index 603ff0a1c65fa528439b7a8f456991ae600c26ef..d4ec3b65a5cdd428c44967ed66ea454d7bc5b2f8 100644 (file)
--- a/engine.hpp
+++ b/engine.hpp
@@ -22,13 +22,16 @@ class GlobalData;
  
  class Engine {
  public:
-       virtual ~Engine(){};
+       Engine() {  cFactory = new CommandFactory();    }
+       virtual ~Engine(){  delete cFactory;  }
         virtual bool getInput() = 0;
  //     string getCommand()                     {       return command;         }
         vector<string> getOptions() {   return options;         }
+       virtual void terminateCommand(int);
  protected:
  //     string command;
         vector<string> options;
+       CommandFactory* cFactory;
  };
  
  
@@ -54,6 +57,8 @@ public:
         virtual bool getInput();
  private:
         GlobalData* globaldata;
+       vector<string> previousInputs; //this is used to make the arrow keys work
+       
  };
  
  
diff --git a/fastamap.cpp b/fastamap.cpp

index 57754667da593265618aa0d9e5c046e18b2b7203..f5ac122112a633de7768b251568edda33623a81c 100644 (file)
--- a/fastamap.cpp
+++ b/fastamap.cpp
@@ -24,20 +24,21 @@ void FastaMap::readFastaFile(string inFileName) {
                         Sequence currSeq(in);
                         name = currSeq.getName();
                         
-                       if(currSeq.getIsAligned())      {       sequence = currSeq.getAligned();        }
-                       else                                            {       sequence = currSeq.getUnaligned();      }
-                       
-                       seqmap[name] = sequence;  
-                       map<string,group>::iterator it = data.find(sequence);
-                       if (it == data.end()) {         //it's unique.
-                               data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
-//                             data[sequence].groupnumber = 1;
-                               data[sequence].names = name;
-                       }else { // its a duplicate.
-                               data[sequence].names += "," + name;
-//                             data[sequence].groupnumber++;
-                       }       
-                       
+                       if (name != "") {
+                               if(currSeq.getIsAligned())      {       sequence = currSeq.getAligned();        }
+                               else                                            {       sequence = currSeq.getUnaligned();      }
+                               
+                               seqmap[name] = sequence;  
+                               map<string,group>::iterator it = data.find(sequence);
+                               if (it == data.end()) {         //it's unique.
+                                       data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
+                                       //                              data[sequence].groupnumber = 1;
+                                       data[sequence].names = name;
+                               }else { // its a duplicate.
+                                       data[sequence].names += "," + name;
+                                       //                              data[sequence].groupnumber++;
+                               }       
+                       }
                         gobble(in);
                 }
                 in.close();             
@@ -71,20 +72,21 @@ void FastaMap::readFastaFile(string inFastaFile, string oldNameFileName){ //prin
                 Sequence currSeq(inFASTA);
                 name = currSeq.getName();
                 
-               if(currSeq.getIsAligned())      {       sequence = currSeq.getAligned();        }
-               else                                            {       sequence = currSeq.getUnaligned();      }
-               
-               seqmap[name] = sequence;  
-               map<string,group>::iterator it = data.find(sequence);
-               if (it == data.end()) {         //it's unique.
-                       data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
-//                     data[sequence].groupnumber = 1;
-                       data[sequence].names = oldNameMap[name];
-               }else { // its a duplicate.
-                       data[sequence].names += "," + oldNameMap[name];
-//                     data[sequence].groupnumber++;
-               }       
-               
+               if (name != "") {
+                       if(currSeq.getIsAligned())      {       sequence = currSeq.getAligned();        }
+                       else                                            {       sequence = currSeq.getUnaligned();      }
+                       
+                       seqmap[name] = sequence;  
+                       map<string,group>::iterator it = data.find(sequence);
+                       if (it == data.end()) {         //it's unique.
+                               data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
+                               //                      data[sequence].groupnumber = 1;
+                               data[sequence].names = oldNameMap[name];
+                       }else { // its a duplicate.
+                               data[sequence].names += "," + oldNameMap[name];
+                               //                      data[sequence].groupnumber++;
+                       }       
+               }
                 gobble(inFASTA);
         }
         
diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp

index 68450fb15a7b140803aab2d54d8c5bc84e7ce688..a689704131d32ae7e8a244babc672695cb432384 100644 (file)
--- a/filterseqscommand.cpp
+++ b/filterseqscommand.cpp
@@ -121,10 +121,12 @@ int FilterSeqsCommand::execute() {
                 if(trump != '*' || isTrue(vertical) || soft != 0){
                         while(!inFASTA.eof()){  //read through and create the filter...
                                 Sequence seq(inFASTA);
-                               if(trump != '*'){       F.doTrump(seq); }
-                               if(isTrue(vertical) || soft != 0){      F.getFreqs(seq);        }
-                               numSeqs++;
-                               cout.flush();
+                               if (seq.getName() != "") {
+                                       if(trump != '*'){       F.doTrump(seq); }
+                                       if(isTrue(vertical) || soft != 0){      F.getFreqs(seq);        }
+                                       numSeqs++;
+                                       cout.flush();
+                               }
                         }
                 
                 }
@@ -152,17 +154,19 @@ int FilterSeqsCommand::execute() {
                 numSeqs = 0;
                 while(!inFasta2.eof()){
                         Sequence seq(inFasta2);
-                       string align = seq.getAligned();
-                       string filterSeq = "";
-       
-                       for(int j=0;j<alignmentLength;j++){
-                               if(filter[j] == '1'){
-                                       filterSeq += align[j];
+                       if (seq.getName() != "") {
+                               string align = seq.getAligned();
+                               string filterSeq = "";
+                               
+                               for(int j=0;j<alignmentLength;j++){
+                                       if(filter[j] == '1'){
+                                               filterSeq += align[j];
+                                       }
                                 }
+                               
+                               outFASTA << '>' << seq.getName() << endl << filterSeq << endl;
+                               numSeqs++;
                         }
-
-                       outFASTA << '>' << seq.getName() << endl << filterSeq << endl;
-                       numSeqs++;
                         gobble(inFasta2);
                 }
                 outFASTA.close();
diff --git a/getseqscommand.cpp b/getseqscommand.cpp

index 04b254599f46ef00081a3f153c0e097fea665cf4..315417a30586d290b2a692e04b2e0afacb1e6fb3 100644 (file)
--- a/getseqscommand.cpp
+++ b/getseqscommand.cpp
@@ -125,15 +125,16 @@ void GetSeqsCommand::readFasta(){
                         Sequence currSeq(in);
                         name = currSeq.getName();
                         
-                       //if this name is in the accnos file
-                       if (names.count(name) == 1) {
-                               wroteSomething = true;
-                               
-                               currSeq.printSequence(out);
-                               
-                               names.erase(name);
+                       if (name != "") {
+                               //if this name is in the accnos file
+                               if (names.count(name) == 1) {
+                                       wroteSomething = true;
+                                       
+                                       currSeq.printSequence(out);
+                                       
+                                       names.erase(name);
+                               }
                         }
-                       
                         gobble(in);
                 }
                 in.close();     
diff --git a/getsharedotucommand.cpp b/getsharedotucommand.cpp

index 76e5725d850e17d52bff0c72abfd2d83669778ee..742377f14a12cb55f306cf334936a9ca1f1e12f3 100644 (file)
--- a/getsharedotucommand.cpp
+++ b/getsharedotucommand.cpp
@@ -133,7 +133,7 @@ int GetSharedOTUCommand::execute(){
                         
                         while(!inFasta.eof()) {
                                 Sequence seq(inFasta);
-                               seqs.push_back(seq);
+                               if (seq.getName() != "") {  seqs.push_back(seq);   }
                         }
                         inFasta.close();
                 }
diff --git a/listseqscommand.cpp b/listseqscommand.cpp

index efc88e98232497383d8d9234ac2d53641d8d3d6e..3087b48fd9669d13b5460f032f911b8f32049dce 100644 (file)
--- a/listseqscommand.cpp
+++ b/listseqscommand.cpp
@@ -125,7 +125,7 @@ void ListSeqsCommand::readFasta(){
                         Sequence currSeq(in);
                         name = currSeq.getName();
                         
-                       names.push_back(name);
+                       if (name != "") {  names.push_back(name);  }
                         
                         gobble(in);
                 }
diff --git a/mothur.cpp b/mothur.cpp

index 1c0c98f9f1caf4cec5de25f4c66ce7f9a3a3af8f..4e064b142d896cb1060d98c9574e42a4355a9efb 100644 (file)
--- a/mothur.cpp
+++ b/mothur.cpp
@@ -77,7 +77,7 @@ int main(int argc, char *argv[]){
                                 
                 //srand(54321);
                 srand( (unsigned)time( NULL ) );
-
+               
                 Engine* mothur;
                 bool bail = 0;
                 string input;
@@ -92,9 +92,16 @@ int main(int argc, char *argv[]){
                         }
                 }
                 else{
-                       mothur = new InteractEngine(argv[0]);           
+                       mothur = new InteractEngine(argv[0]);   
                 }
+               
+               //used to intercept the terminate signal, so instead of terminating mothur it will end a command
+               //void (*prev_fn)(int);
+               //prev_fn = signal(SIGTERM, mothur->terminateCommand(0));
+               
+               //if (prev_fn==SIG_IGN) signal (SIGTERM,SIG_IGN);
  
+               
                 while(bail == 0)                {       bail = mothur->getInput();                      }
         
                 delete mothur;
diff --git a/mothur.h b/mothur.h

index ba88f71fabffced9509deed545cecde7228b2b63..5c1880d569eeb7f37089c1a2d418f8fadf7b4ab7 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -21,6 +21,7 @@
  #include <iomanip>
  #include <fstream>
  #include <sstream>
+#include <signal.h>
  
  //exception
  #include <stdexcept>
@@ -283,9 +284,6 @@ inline void errorOut(exception& e, string object, string function) {
         
  }
  
-
-
-
  /***********************************************************************/
  
  inline bool isTrue(string f){
@@ -425,7 +423,21 @@ inline string getExtension(string longName){
         
         return extension;
  }
-
+/***********************************************************************/
+inline bool isBlank(string fileName){
+       
+       ifstream fileHandle;
+       fileHandle.open(fileName.c_str());
+       if(!fileHandle) {
+               mothurOut("Error: Could not open " + fileName);  mothurOutEndLine();
+               return false;
+       }else {
+               //check for blank file
+               gobble(fileHandle);
+               if (fileHandle.eof()) { fileHandle.close(); return true;  }
+       }
+       return false;
+}
  /***********************************************************************/
  
  inline int openInputFile(string fileName, ifstream& fileHandle){
diff --git a/nast.cpp b/nast.cpp

index 4848eb20be0b5cfdb43715b5ba1589d253ba4571..6a4ae3db2f679ee4ccadf2b0ff3270f5da4f15aa 100644 (file)
--- a/nast.cpp
+++ b/nast.cpp
@@ -32,7 +32,6 @@ Nast::Nast(Alignment* method, Sequence* cand, Sequence* temp) : alignment(method
                 errorOut(e, "Nast", "Nast");
                 exit(1);
         }
-
  }
  
  /**************************************************************************************************/
@@ -42,10 +41,10 @@ void Nast::pairwiseAlignSeqs(){     //      Here we call one of the pairwise alignment me
         try {
  
                 alignment->align(candidateSeq->getUnaligned(), templateSeq->getUnaligned());
-               
+       
                 string candAln = alignment->getSeqAAln();
                 string tempAln = alignment->getSeqBAln();
-               
+       
                 if(candAln == ""){
  
                         candidateSeq->setPairwise("");
@@ -78,7 +77,6 @@ void Nast::pairwiseAlignSeqs(){       //      Here we call one of the pairwise alignment me
  
                 candidateSeq->setPairwise(candAln);                                     //      set the pairwise sequences in the Sequence objects for
                 templateSeq->setPairwise(tempAln);                                      //      the candidate and template sequences
-
         }
         catch(exception& e) {
                 errorOut(e, "Nast", "pairwiseAlignSeqs");
diff --git a/nast.hpp b/nast.hpp

index 4cf1b85b9524cd69f8c824143087bef79e0efb67..bc2467f13c3d65fe41309ffb6de13ffdbaafd7c4 100644 (file)
--- a/nast.hpp
+++ b/nast.hpp
@@ -29,6 +29,7 @@ class Nast {
         
  public:
         Nast(Alignment*, Sequence*, Sequence*);
+       ~Nast(){};
         float getSimilarityScore();
         int getMaxInsertLength();
         
diff --git a/overlap.cpp b/overlap.cpp

index e30b51d243ac776fc36720321e53e6b72702237e..40b49f88bfe4d5cda9f123690253f87cda9ce61f 100644 (file)
--- a/overlap.cpp
+++ b/overlap.cpp
@@ -60,7 +60,7 @@ void Overlap::setOverlap(vector<vector<AlignmentCell> >& alignment, const int nA
         
         int rowIndex = maxRow(alignment, band);         //      get the index for the row with the highest right hand side score
         int colIndex = maxColumn(alignment, band);      //      get the index for the column with the highest bottom row score
-       
+               
         int row = lB-1;
         int column = lA-1;
         
diff --git a/removeseqscommand.cpp b/removeseqscommand.cpp

index 1b9edb0e98e20f7752fb9b8aab3584c1c05bccc0..28f8cf7a6d890b0c688c388b3a2c0e8c8438ba27 100644 (file)
--- a/removeseqscommand.cpp
+++ b/removeseqscommand.cpp
@@ -125,13 +125,14 @@ void RemoveSeqsCommand::readFasta(){
                         Sequence currSeq(in);
                         name = currSeq.getName();
                         
-                       //if this name is in the accnos file
-                       if (names.count(name) == 0) {
-                               wroteSomething = true;
-                               
-                               currSeq.printSequence(out);
-                       }else {         names.erase(name);              }
-                       
+                       if (name != "") {
+                               //if this name is in the accnos file
+                               if (names.count(name) == 0) {
+                                       wroteSomething = true;
+                                       
+                                       currSeq.printSequence(out);
+                               }else {         names.erase(name);              }
+                       }
                         gobble(in);
                 }
                 in.close();     
diff --git a/reversecommand.cpp b/reversecommand.cpp

index 7a87a99a9720f3f5c8bc04e77bac39d5eabf5ea0..37e95685430184760cd0919c18a1cfeea2ec4c42 100644 (file)
--- a/reversecommand.cpp
+++ b/reversecommand.cpp
@@ -82,9 +82,11 @@ int ReverseSeqsCommand::execute(){
                 openOutputFile(reverseFile, outFASTA);
                 
                 while(!inFASTA.eof()){
-                       Sequence currSeq(inFASTA);
-                       currSeq.reverseComplement();
-                       currSeq.printSequence(outFASTA);
+                       Sequence currSeq(inFASTA);  gobble(inFASTA);
+                       if (currSeq.getName() != "") {
+                               currSeq.reverseComplement();
+                               currSeq.printSequence(outFASTA);
+                       }
                 }
                 inFASTA.close();
                 outFASTA.close();
diff --git a/screenseqscommand.cpp b/screenseqscommand.cpp

index d5e917c204f1c2757cae52eb5dc8227929851ed1..9225e992bab6e17d0c3228cc36ced9a2505933e7 100644 (file)
--- a/screenseqscommand.cpp
+++ b/screenseqscommand.cpp
@@ -131,20 +131,22 @@ int ScreenSeqsCommand::execute(){
                 
                 while(!inFASTA.eof()){
                         Sequence currSeq(inFASTA);
-                       bool goodSeq = 1;               //      innocent until proven guilty
-                       if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
-                       if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
-                       if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
-                       if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
-                       if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
-                       if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
-                       
-                       if(goodSeq == 1){
-                               currSeq.printSequence(goodSeqOut);      
-                       }
-                       else{
-                               currSeq.printSequence(badSeqOut);       
-                               badSeqNames.insert(currSeq.getName());
+                       if (currSeq.getName() != "") {
+                               bool goodSeq = 1;               //      innocent until proven guilty
+                               if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
+                               if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
+                               if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
+                               if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
+                               if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
+                               if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
+                               
+                               if(goodSeq == 1){
+                                       currSeq.printSequence(goodSeqOut);      
+                               }
+                               else{
+                                       currSeq.printSequence(badSeqOut);       
+                                       badSeqNames.insert(currSeq.getName());
+                               }
                         }
                         gobble(inFASTA);
                 }       
diff --git a/secondarystructurecommand.cpp b/secondarystructurecommand.cpp

index 7156082f8b1b43228a62c6cf93abec9f2d900109..c2c104e71b1f535cd2a818afe268e5d7e1ed4c24 100644 (file)
--- a/secondarystructurecommand.cpp
+++ b/secondarystructurecommand.cpp
@@ -91,11 +91,13 @@ int AlignCheckCommand::execute(){
                 
                 while(!in.eof()){
                         
-                       Sequence seq(in);
-                       statData data = getStats(seq.getAligned());
-                       
-                       out << seq.getName() << '\t' << data.pound << '\t' << data.dash << '\t' << data.plus << '\t' << data.equal << '\t';
-                       out << data.loop << '\t' << data.tilde << '\t' << data.total << endl;
+                       Sequence seq(in);  gobble(in);
+                       if (seq.getName() != "") {
+                               statData data = getStats(seq.getAligned());
+                               
+                               out << seq.getName() << '\t' << data.pound << '\t' << data.dash << '\t' << data.plus << '\t' << data.equal << '\t';
+                               out << data.loop << '\t' << data.tilde << '\t' << data.total << endl;
+                       }
                 }
  
                 in.close();
diff --git a/seqsummarycommand.cpp b/seqsummarycommand.cpp

index 4eafaf8ed15e7fde8c60f9c3d80a376f1d796f3d..9ab27f9cbd439fe55ed1856c6940f0e42e6ee7e0 100644 (file)
--- a/seqsummarycommand.cpp
+++ b/seqsummarycommand.cpp
@@ -91,18 +91,20 @@ int SeqSummaryCommand::execute(){
  
                 while(!inFASTA.eof()){
                         Sequence current(inFASTA);
-                       startPosition.push_back(current.getStartPos());
-                       endPosition.push_back(current.getEndPos());
-                       seqLength.push_back(current.getNumBases());
-                       ambigBases.push_back(current.getAmbigBases());
-                       longHomoPolymer.push_back(current.getLongHomoPolymer());
-
-                       outSummary << current.getName() << '\t';
-                       outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
-                       outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
-                       outSummary << current.getLongHomoPolymer() << endl;
-                       
-                       numSeqs++;
+                       if (current.getName() != "") {
+                               startPosition.push_back(current.getStartPos());
+                               endPosition.push_back(current.getEndPos());
+                               seqLength.push_back(current.getNumBases());
+                               ambigBases.push_back(current.getAmbigBases());
+                               longHomoPolymer.push_back(current.getLongHomoPolymer());
+                               
+                               outSummary << current.getName() << '\t';
+                               outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
+                               outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
+                               outSummary << current.getLongHomoPolymer() << endl;
+                               
+                               numSeqs++;
+                       }
                         gobble(inFASTA);
                 }
                 inFASTA.close();
diff --git a/sequence.cpp b/sequence.cpp

index b2c2b53d0b4078e6e296312662dcdfe0ffe5fbec..752e081e367fc6d572b5622e0b4eb6003a28f9bb 100644 (file)
--- a/sequence.cpp
+++ b/sequence.cpp
@@ -28,35 +28,85 @@ Sequence::Sequence(string newName, string sequence) {
         
  }
  //********************************************************************************************************************
-
+//this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
  Sequence::Sequence(ifstream& fastaFile){
  
         initialize();
         fastaFile >> name;
         name = name.substr(1);
-       
-       while (!fastaFile.eof())        {       char c = fastaFile.get(); if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
-
-       char letter;
         string sequence;
         
-       while(fastaFile){
-               letter= fastaFile.get();
-               if(letter == '>'){
-                       fastaFile.putback(letter);
+       //read comments
+       while ((name[0] == '#') && fastaFile) { 
+           while (!fastaFile.eof())    {       char c = fastaFile.get(); if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
+               sequence = getCommentString(fastaFile);
+               
+               if (fastaFile) {  
+                       fastaFile >> name;  
+                       name = name.substr(1);  
+               }else { 
+                       name = "";
                         break;
                 }
-               else if(isprint(letter)){
-                       letter = toupper(letter);
-                       if(letter == 'U'){letter = 'T';}
-                       sequence += letter;
-               }
         }
-
+       
+       //read real sequence
+       while (!fastaFile.eof())        {       char c = fastaFile.get(); if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
+               
+       sequence = getSequenceString(fastaFile);                
+       
         setAligned(sequence);   
         //setUnaligned removes any gap characters for us                                                
         setUnaligned(sequence);                                                         
  }
+//********************************************************************************************************************
+string Sequence::getSequenceString(ifstream& fastaFile) {
+       try {
+               char letter;
+               string sequence = "";   
+               
+               while(fastaFile){
+                       letter= fastaFile.get();
+                       if(letter == '>'){
+                               fastaFile.putback(letter);
+                               break;
+                       }
+                       else if(isprint(letter)){
+                               letter = toupper(letter);
+                               if(letter == 'U'){letter = 'T';}
+                               sequence += letter;
+                       }
+               }
+               
+               return sequence;
+       }
+       catch(exception& e) {
+               errorOut(e, "Sequence", "getSequenceString");
+               exit(1);
+       }
+}
+//********************************************************************************************************************
+//comment can contain '>' so we need to account for that
+string Sequence::getCommentString(ifstream& fastaFile) {
+       try {
+               char letter;
+               string sequence = "";
+               
+               while(fastaFile){
+                       letter=fastaFile.get();
+                       if((letter == '\r') || (letter == '\n')){  
+                               gobble(fastaFile);  //in case its a \r\n situation
+                               break;
+                       }
+               }
+               
+               return sequence;
+       }
+       catch(exception& e) {
+               errorOut(e, "Sequence", "getCommentString");
+               exit(1);
+       }
+}
  
  //********************************************************************************************************************
  
@@ -109,6 +159,7 @@ void Sequence::setAligned(string sequence){
         //if the alignment starts or ends with a gap, replace it with a period to indicate missing data
         aligned = sequence;
         alignmentLength = aligned.length();
+       setUnaligned(sequence); 
  
         if(aligned[0] == '-'){
                 for(int i=0;i<alignmentLength;i++){
diff --git a/sequence.hpp b/sequence.hpp

index ac9384bf156bf471cc35b3fe8d57f6ecf777e857..f6d7c869853a17808d8d894466f3d473b18c13d8 100644 (file)
--- a/sequence.hpp
+++ b/sequence.hpp
@@ -48,6 +48,8 @@ public:
         
  private:
         void initialize();
+       string getSequenceString(ifstream&);
+       string getCommentString(ifstream&);
         string name;
         string unaligned;
         string aligned;
diff --git a/sequencedb.cpp b/sequencedb.cpp

index 32fc14644698ddedb4248aba10ea3ed3415bf7be..b3da429fc57c9da7081640876ee9fbedda808e25 100644 (file)
--- a/sequencedb.cpp
+++ b/sequencedb.cpp
@@ -35,7 +35,8 @@ SequenceDB::SequenceDB(ifstream& filehandle) {
                 while (!filehandle.eof()) {
                         //input sequence info into sequencedb
                         Sequence newSequence(filehandle);
-                       data.push_back(newSequence);
+                       
+                       if (newSequence.getName() != "") {   data.push_back(newSequence);  }
                         
                         //takes care of white space
                         gobble(filehandle);
diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp

index 5ca70a99cb4b600780b425ff7f29b909794cc362..ca7a927134f1f904401ab017d456ff1f6859a569 100644 (file)
--- a/trimseqscommand.cpp
+++ b/trimseqscommand.cpp
@@ -173,61 +173,63 @@ int TrimSeqsCommand::execute(){
                 while(!inFASTA.eof()){
                         Sequence currSeq(inFASTA);
                         string origSeq = currSeq.getUnaligned();
-                       int group;
-                       string trashCode = "";
-                       
-                       if(qFileName != ""){
-                               if(qThreshold != 0)             {       success = stripQualThreshold(currSeq, qFile);   }
-                               else if(qAverage != 0)  {       success = cullQualAverage(currSeq, qFile);              }
-                               if ((!qtrim) && (origSeq.length() != currSeq.getUnaligned().length())) { 
-                                       success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
+                       if (origSeq != "") {
+                               int group;
+                               string trashCode = "";
+                               
+                               if(qFileName != ""){
+                                       if(qThreshold != 0)             {       success = stripQualThreshold(currSeq, qFile);   }
+                                       else if(qAverage != 0)  {       success = cullQualAverage(currSeq, qFile);              }
+                                       if ((!qtrim) && (origSeq.length() != currSeq.getUnaligned().length())) { 
+                                               success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
+                                       }
+                                       if(!success)                    {       trashCode += 'q';                                                               }
                                 }
-                               if(!success)                    {       trashCode += 'q';                                                               }
-                       }
-                       if(barcodes.size() != 0){
-       
-                               success = stripBarcode(currSeq, group);
-                               if(!success){   trashCode += 'b';       }
-                       }
-                       if(numFPrimers != 0){
-                               success = stripForward(currSeq);
-                               if(!success){   trashCode += 'f';       }
-                       }
-                       if(numRPrimers != 0){
-                               success = stripReverse(currSeq);
-                               if(!success){   trashCode += 'r';       }
-                       }
-                       if(minLength > 0 || maxLength > 0){
-                               success = cullLength(currSeq);
-                       if ((currSeq.getUnaligned().length() > 300) && (success)) {  cout << "too long " << currSeq.getUnaligned().length() << endl;  }
-                               if(!success){   trashCode += 'l'; }
-                       }
-                       if(maxHomoP > 0){
-                               success = cullHomoP(currSeq);
-                               if(!success){   trashCode += 'h';       }
-                       }
-                       if(maxAmbig != -1){
-                               success = cullAmbigs(currSeq);
-                               if(!success){   trashCode += 'n';       }
-                       }
-                       
-                       if(flip){       currSeq.reverseComplement();    }               // should go last                       
-                       
-                       if(trashCode.length() == 0){
-                               currSeq.setAligned(currSeq.getUnaligned());  //this is because of a modification we made to the sequence class to fix a bug.  all seqs have an aligned version, which is the version that gets printed.
-                               currSeq.printSequence(outFASTA);
                                 if(barcodes.size() != 0){
-                                       outGroups << currSeq.getName() << '\t' << groupVector[group] << endl;
                                         
-                                       if(allFiles){
-                                               currSeq.printSequence(*fastaFileNames[group]);                                  
+                                       success = stripBarcode(currSeq, group);
+                                       if(!success){   trashCode += 'b';       }
+                               }
+                               if(numFPrimers != 0){
+                                       success = stripForward(currSeq);
+                                       if(!success){   trashCode += 'f';       }
+                               }
+                               if(numRPrimers != 0){
+                                       success = stripReverse(currSeq);
+                                       if(!success){   trashCode += 'r';       }
+                               }
+                               if(minLength > 0 || maxLength > 0){
+                                       success = cullLength(currSeq);
+                                       if ((currSeq.getUnaligned().length() > 300) && (success)) {  cout << "too long " << currSeq.getUnaligned().length() << endl;  }
+                                       if(!success){   trashCode += 'l'; }
+                               }
+                               if(maxHomoP > 0){
+                                       success = cullHomoP(currSeq);
+                                       if(!success){   trashCode += 'h';       }
+                               }
+                               if(maxAmbig != -1){
+                                       success = cullAmbigs(currSeq);
+                                       if(!success){   trashCode += 'n';       }
+                               }
+                               
+                               if(flip){       currSeq.reverseComplement();    }               // should go last                       
+                               
+                               if(trashCode.length() == 0){
+                                       currSeq.setAligned(currSeq.getUnaligned());  //this is because of a modification we made to the sequence class to fix a bug.  all seqs have an aligned version, which is the version that gets printed.
+                                       currSeq.printSequence(outFASTA);
+                                       if(barcodes.size() != 0){
+                                               outGroups << currSeq.getName() << '\t' << groupVector[group] << endl;
+                                               
+                                               if(allFiles){
+                                                       currSeq.printSequence(*fastaFileNames[group]);                                  
+                                               }
                                         }
                                 }
-                       }
-                       else{
-                               currSeq.setName(currSeq.getName() + '|' + trashCode);
-                               currSeq.setUnaligned(origSeq);
-                               currSeq.printSequence(scrapFASTA);
+                               else{
+                                       currSeq.setName(currSeq.getName() + '|' + trashCode);
+                                       currSeq.setUnaligned(origSeq);
+                                       currSeq.printSequence(scrapFASTA);
+                               }
                         }
                         gobble(inFASTA);
                 }
author	westcott <westcott>
	Wed, 2 Dec 2009 20:11:35 +0000 (20:11 +0000)
committer	westcott <westcott>
	Wed, 2 Dec 2009 20:11:35 +0000 (20:11 +0000)
aligncommand.cpp		patch \| blob \| history
aligncommand.h		patch \| blob \| history
alignmentdb.cpp		patch \| blob \| history
chimera.cpp		patch \| blob \| history
chimeraseqscommand.cpp		patch \| blob \| history
classifyseqscommand.cpp		patch \| blob \| history
commandfactory.cpp		patch \| blob \| history
commandfactory.hpp		patch \| blob \| history
engine.cpp		patch \| blob \| history
engine.hpp		patch \| blob \| history
fastamap.cpp		patch \| blob \| history
filterseqscommand.cpp		patch \| blob \| history
getseqscommand.cpp		patch \| blob \| history
getsharedotucommand.cpp		patch \| blob \| history
listseqscommand.cpp		patch \| blob \| history
mothur.cpp		patch \| blob \| history
mothur.h		patch \| blob \| history
nast.cpp		patch \| blob \| history
nast.hpp		patch \| blob \| history
overlap.cpp		patch \| blob \| history
removeseqscommand.cpp		patch \| blob \| history
reversecommand.cpp		patch \| blob \| history
screenseqscommand.cpp		patch \| blob \| history
secondarystructurecommand.cpp		patch \| blob \| history
seqsummarycommand.cpp		patch \| blob \| history
sequence.cpp		patch \| blob \| history
sequence.hpp		patch \| blob \| history
sequencedb.cpp		patch \| blob \| history
trimseqscommand.cpp		patch \| blob \| history