]> git.donarmstrong.com Git - mothur.git/commitdiff
modified sequence class to read fasta files with comments. this required modification...
authorwestcott <westcott>
Wed, 2 Dec 2009 20:11:35 +0000 (20:11 +0000)
committerwestcott <westcott>
Wed, 2 Dec 2009 20:11:35 +0000 (20:11 +0000)
29 files changed:
aligncommand.cpp
aligncommand.h
alignmentdb.cpp
chimera.cpp
chimeraseqscommand.cpp
classifyseqscommand.cpp
commandfactory.cpp
commandfactory.hpp
engine.cpp
engine.hpp
fastamap.cpp
filterseqscommand.cpp
getseqscommand.cpp
getsharedotucommand.cpp
listseqscommand.cpp
mothur.cpp
mothur.h
nast.cpp
nast.hpp
overlap.cpp
removeseqscommand.cpp
reversecommand.cpp
screenseqscommand.cpp
secondarystructurecommand.cpp
seqsummarycommand.cpp
sequence.cpp
sequence.hpp
sequencedb.cpp
trimseqscommand.cpp

index 57eebb0eb096ef8cc554ba4f0eaee8f470c5b6d2..55f0b821b1392f40b1f6ecd2c9414a6f8a04fff2 100644 (file)
@@ -39,7 +39,7 @@ AlignCommand::AlignCommand(string option){
                else {
                        
                        //valid paramters for this command
-                       string AlignArray[] =  {"template","candidate","search","ksize","align","match","mismatch","gapopen","gapextend", "processors"};
+                       string AlignArray[] =  {"template","candidate","search","ksize","align","match","mismatch","gapopen","gapextend", "processors","flip","threshold"};
                        vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -90,6 +90,12 @@ AlignCommand::AlignCommand(string option){
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
                        convert(temp, processors); 
                        
+                       temp = validParameter.validFile(parameters, "flip", false);                     if (temp == "not found"){       temp = "f";                             }
+                       flip = isTrue(temp); 
+                       
+                       temp = validParameter.validFile(parameters, "threshold", false);        if (temp == "not found"){       temp = "0.80";                  }
+                       convert(temp, threshold); 
+                       
                        search = validParameter.validFile(parameters, "search", false);         if (search == "not found"){     search = "kmer";                }
                        
                        align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
@@ -127,6 +133,10 @@ void AlignCommand::help(){
                mothurOut("The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n");
                mothurOut("The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -1.0.\n");
                mothurOut("The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -2.0.\n");
+               mothurOut("The flip parameter is used to specify whether or not you want mothur to try the reverse compement if a sequence falls below the threshold.  The default is false.\n");
+               mothurOut("The threshold is used to specify a cutoff at which an alignment is deemed 'bad' and the reverse complement may be tried. \n");
+               mothurOut("If the flip parameter is set to true the reverse complement of the sequence is aligned and the better alignment is reported.\n");
+               mothurOut("The default for the threshold parameter is 0.80, meaning at least 80% of the bases must remain or the sequence is reported as potentially reversed.\n");
                mothurOut("The align.seqs command should be in the following format: \n");
                mothurOut("align.seqs(template=yourTemplateFile, candidate=yourCandidateFile, align=yourAlignmentMethod, search=yourSearchmethod, ksize=yourKmerSize, match=yourMatchBonus, mismatch=yourMismatchpenalty, gapopen=yourGapopenPenalty, gapextend=yourGapExtendPenalty) \n");
                mothurOut("Example align.seqs(candidate=candidate.fasta, template=core.filtered, align=kmer, search=gotoh, ksize=8, match=2.0, mismatch=3.0, gapopen=-2.0, gapextend=-1.0)\n");
@@ -162,6 +172,7 @@ int AlignCommand::execute(){
                
                string alignFileName = candidateFileName.substr(0,candidateFileName.find_last_of(".")+1) + "align";
                string reportFileName = candidateFileName.substr(0,candidateFileName.find_last_of(".")+1) + "align.report";
+               string accnosFileName = candidateFileName.substr(0,candidateFileName.find_last_of(".")+1) + "flip.accnos";
                
                int numFastaSeqs = 0;
                int start = time(NULL);
@@ -175,8 +186,17 @@ int AlignCommand::execute(){
                        
                        lines.push_back(new linePair(0, numFastaSeqs));
                
-                       driver(lines[0], alignFileName, reportFileName);
+                       driver(lines[0], alignFileName, reportFileName, accnosFileName);
                        
+                       //delete accnos file if its blank else report to user
+                       if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  }
+                       else { 
+                               mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
+                               if (!flip) {
+                                       mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); 
+                               }else{  mothurOut(" If the reverse compliment proved to be better it was reported.");  }
+                               mothurOutEndLine();
+                       }
                }
                else{
                        vector<int> positions;
@@ -205,11 +225,35 @@ int AlignCommand::execute(){
                                }
                                lines.push_back(new linePair(startPos, numSeqsPerProcessor));
                        }
-                       createProcesses(alignFileName, reportFileName); 
+                       createProcesses(alignFileName, reportFileName, accnosFileName); 
                        
                        rename((alignFileName + toString(processIDS[0]) + ".temp").c_str(), alignFileName.c_str());
                        rename((reportFileName + toString(processIDS[0]) + ".temp").c_str(), reportFileName.c_str());
                        
+                       vector<string> nonBlankAccnosFiles;
+                       //delete blank accnos files generated with multiple processes
+                       for(int i=0;i<processors;i++){  
+                               if (!(isBlank(accnosFileName + toString(processIDS[i]) + ".temp"))) {
+                                       nonBlankAccnosFiles.push_back(accnosFileName + toString(processIDS[i]) + ".temp");
+                               }else { remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());  }
+                       }
+                       
+                       //append accnos files
+                       if (nonBlankAccnosFiles.size() != 0) { 
+                               rename(nonBlankAccnosFiles[0].c_str(), accnosFileName.c_str());
+                               
+                               for (int h=1; h < nonBlankAccnosFiles.size(); h++) {
+                                       appendAlignFiles(nonBlankAccnosFiles[h], accnosFileName);
+                                       remove(nonBlankAccnosFiles[h].c_str());
+                               }
+                               mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
+                               if (!flip) {
+                                       mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); 
+                               }else{  mothurOut(" If the reverse compliment proved to be better it was reported.");  }
+                               mothurOutEndLine();
+                       }
+                       
+                       //append other files
                        for(int i=1;i<processors;i++){
                                appendAlignFiles((alignFileName + toString(processIDS[i]) + ".temp"), alignFileName);
                                remove((alignFileName + toString(processIDS[i]) + ".temp").c_str());
@@ -227,9 +271,22 @@ int AlignCommand::execute(){
                
                lines.push_back(new linePair(0, numFastaSeqs));
                
-               driver(lines[0], alignFileName, reportFileName);
+               driver(lines[0], alignFileName, reportFileName, accnosFileName);
+               
+               //delete accnos file if its blank else report to user
+               if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  }
+               else { 
+                       mothurOut("Some of you sequences generated alignments that eliminated too many bases, a list is provided in " + accnosFileName + ".");
+                       if (!flip) {
+                                mothurOut(" If you set the flip parameter to true mothur will try aligning the reverse compliment as well."); 
+                       }else{  mothurOut(" If the reverse compliment proved to be better it was reported.");  }
+                       mothurOutEndLine();
+               }
+               
 #endif
                
+               
+               
                mothurOut("It took " + toString(time(NULL) - start) + " secs to align " + toString(numFastaSeqs) + " sequences.");
                mothurOutEndLine();
                mothurOutEndLine();
@@ -244,10 +301,14 @@ int AlignCommand::execute(){
 
 //**********************************************************************************************************************
 
-int AlignCommand::driver(linePair* line, string alignFName, string reportFName){
+int AlignCommand::driver(linePair* line, string alignFName, string reportFName, string accnosFName){
        try {
                ofstream alignmentFile;
                openOutputFile(alignFName, alignmentFile);
+               
+               ofstream accnosFile;
+               openOutputFile(accnosFName, accnosFile);
+               
                NastReport report(reportFName);
                
                ifstream inFASTA;
@@ -258,36 +319,83 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName){
                for(int i=0;i<line->numSeqs;i++){
                        
                        Sequence* candidateSeq = new Sequence(inFASTA);
-
-                       if (candidateSeq->getUnaligned().length() > alignment->getnRows()) {
-                               alignment->resize(candidateSeq->getUnaligned().length()+1);
-                       }
-       
-                       report.setCandidate(candidateSeq);
-       
-                       Sequence temp = templateDB->findClosestSequence(candidateSeq);
-
-                       Sequence* templateSeq = &temp;
-
-                       report.setTemplate(templateSeq);
-                       report.setSearchParameters(search, templateDB->getSearchScore());
+                       int origNumBases = candidateSeq->getNumBases();
+                       string originalUnaligned = candidateSeq->getUnaligned();
+                       int numBasesNeeded = origNumBases * threshold;
+                       
+                       if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
+                               if (candidateSeq->getUnaligned().length() > alignment->getnRows()) {
+                                       alignment->resize(candidateSeq->getUnaligned().length()+1);
+                               }
+                                                               
+                               Sequence temp = templateDB->findClosestSequence(candidateSeq);
+                               Sequence* templateSeq = &temp;
                                
-                       Nast nast(alignment, candidateSeq, templateSeq);
-
-                       report.setAlignmentParameters(align, alignment);
-       
-                       report.setNastParameters(nast);
-       
-                       alignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl;
+                               float searchScore = templateDB->getSearchScore();
+                                                               
+                               Nast* nast = new Nast(alignment, candidateSeq, templateSeq);
+                               Sequence* copy;
                                
-                       report.print();
+                               Nast* nast2;
+                               bool needToDeleteCopy = false;  //this is needed in case you have you enter the ifs below
+                                                                                               //since nast does not make a copy of hte sequence passed, and it is used by the reporter below
+                                                                                               //you can't delete the copy sequence til after you report, but you may choose not to create it in the first place
+                                                                                               //so this bool tells you if you need to delete it
+                                                                                               
+                               //if there is a possibility that this sequence should be reversed
+                               if (candidateSeq->getNumBases() < numBasesNeeded) {
+                                       
+                                       string wasBetter = "";
+                                       //if the user wants you to try the reverse
+                                       if (flip) {
+                                               //get reverse compliment
+                                               copy = new Sequence(candidateSeq->getName(), originalUnaligned);
+                                               copy->reverseComplement();
+                                               
+                                               //rerun alignment
+                                               Sequence temp2 = templateDB->findClosestSequence(copy);
+                                               Sequence* templateSeq2 = &temp2;
+                                               
+                                               searchScore = templateDB->getSearchScore();
+                                               
+                                               nast2 = new Nast(alignment, copy, templateSeq2);
                        
+                                               //check if any better
+                                               if (copy->getNumBases() > candidateSeq->getNumBases()) {
+                                                       candidateSeq->setAligned(copy->getAligned());  //use reverse compliments alignment since its better
+                                                       templateSeq = templateSeq2; 
+                                                       delete nast;
+                                                       nast = nast2;
+                                                       needToDeleteCopy = true;
+                                               }else{  
+                                                       wasBetter = "\treverse complement did NOT produce a better alignment, please check sequence.";
+                                                       delete nast2;
+                                                       delete copy;    
+                                               }
+                                       }
+                                       
+                                       //create accnos file with names
+                                       accnosFile << candidateSeq->getName() << wasBetter << endl;
+                               }
+                               
+                               report.setCandidate(candidateSeq);
+                               report.setTemplate(templateSeq);
+                               report.setSearchParameters(search, searchScore);
+                               report.setAlignmentParameters(align, alignment);
+                               report.setNastParameters(*nast);
+       
+                               alignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl;
+                               
+                               report.print();
+                               delete nast;
+                               if (needToDeleteCopy) {   delete copy;   }
+                       }
                        delete candidateSeq;
-               
                }
                
                alignmentFile.close();
                inFASTA.close();
+               accnosFile.close();
                
                return 1;
        }
@@ -299,7 +407,7 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName){
 
 /**************************************************************************************************/
 
-void AlignCommand::createProcesses(string alignFileName, string reportFileName) {
+void AlignCommand::createProcesses(string alignFileName, string reportFileName, string accnosFName) {
        try {
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                int process = 0;
@@ -313,7 +421,7 @@ void AlignCommand::createProcesses(string alignFileName, string reportFileName)
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
-                               driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp");
+                               driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp", accnosFName + toString(getpid()) + ".temp");
                                exit(0);
                        }else { mothurOut("unable to spawn the necessary processes."); mothurOutEndLine(); exit(0); }
                }
@@ -354,8 +462,7 @@ void AlignCommand::appendAlignFiles(string temp, string filename) {
                exit(1);
        }
 }
-
-/**************************************************************************************************/
+//**********************************************************************************************************************
 
 void AlignCommand::appendReportFiles(string temp, string filename) {
        try{
index c3c244138a7b2457dd20ba5bedc152b88f0e0e2b..f7d59b0206c989558f0e11163d1f7abddcf9a784 100644 (file)
@@ -36,16 +36,16 @@ private:
        AlignmentDB* templateDB;
        Alignment* alignment;
        
-       int driver(linePair*, string, string);
-       void createProcesses(string, string);
+       int driver(linePair*, string, string, string);
+       void createProcesses(string, string, string);
        void appendAlignFiles(string, string); 
        void appendReportFiles(string, string);
        
        string candidateFileName, templateFileName, distanceFileName, search, align;
-       float match, misMatch, gapOpen, gapExtend;
+       float match, misMatch, gapOpen, gapExtend, threshold;
        int processors, kmerSize;
        
-       bool abort;
+       bool abort, flip;
 };
 
 #endif
index 6b05989d40a0184540a9397fb75efd79e49a14cc..e4509521e7383d305151170e2fcc96da1c2f3e88 100644 (file)
@@ -25,14 +25,13 @@ AlignmentDB::AlignmentDB(string fastaFileName, string method, int kmerSize, floa
                mothurOut("Reading in the " + fastaFileName + " template sequences...\t");      cout.flush();
                
                while (!fastaFile.eof()) {
-                       Sequence temp(fastaFile);
+                       Sequence temp(fastaFile);  gobble(fastaFile);
                        
-                       templateSequences.push_back(temp);
-                       
-                       //save longest base
-                       if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length(); }
-                       
-                       gobble(fastaFile);
+                       if (temp.getName() != "") {
+                               templateSequences.push_back(temp);
+                               //save longest base
+                               if (temp.getUnaligned().length() > longest)  { longest = temp.getUnaligned().length(); }
+                       }
                }
                
                numSeqs = templateSequences.size();
index 8851b755a09a0b030418e7881d09caedb4753584..db153dde6e90b285f7592628789346f3bb696fb6 100644 (file)
@@ -94,9 +94,9 @@ vector<Sequence*> Chimera::readSeqs(string file) {
                //read in seqs and store in vector
                while(!in.eof()){
                        
-                       Sequence* current = new Sequence(in);
-                       container.push_back(current);
-                       gobble(in);
+                       Sequence* current = new Sequence(in);  gobble(in);
+                       
+                       if (current->getName() != "") {  container.push_back(current);  }
                }
                
                in.close();
index 51fc0e7b3a93265eee63092adfe5ea75cdb87f6b..bf0ffa66009ba7cdc02f46ff4d2c784e58a45a1f 100644 (file)
@@ -156,7 +156,7 @@ void ChimeraSeqsCommand::help(){
                mothurOut("The ksize parameter allows you to input kmersize. \n");
                mothurOut("The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence.\n");
                mothurOut("The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n");
-               mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method.\n");
+               //mothurOut("The iters parameter allows you to specify the number of bootstrap iters to do with the chimeraslayer method.\n");
                mothurOut("NOT ALL PARAMETERS ARE USED BY ALL METHODS. Please look below for method specifics.\n\n");
                mothurOut("Details for each method: \n"); 
                mothurOut("\tpintail: \n"); 
@@ -168,8 +168,8 @@ void ChimeraSeqsCommand::help(){
                mothurOut("\t\tparameters: fasta=required, template=required, filter=F, mask=no mask, processors=1, window=10% of length, numwanted=20\n"); 
                mothurOut("\tchimeracheck: \n"); 
                mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, ksize=7, svg=F, name=none\n\n"); 
-               mothurOut("\tchimeraslayer: \n"); 
-               mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, mask=no mask, numwanted=10, match=5, mismatch=-4, divergence=1.0, minsim=90, parents=5, iters=1000, window=100. \n\n"); 
+               //mothurOut("\tchimeraslayer: \n"); 
+               //mothurOut("\t\tparameters: fasta=required, template=required, processors=1, increment=10, mask=no mask, numwanted=10, match=5, mismatch=-4, divergence=1.0, minsim=90, parents=5, iters=1000, window=100. \n\n"); 
                mothurOut("The chimera.seqs command should be in the following format: \n");
                mothurOut("chimera.seqs(fasta=yourFastaFile, filter=yourFilter, correction=yourCorrection, processors=yourProcessors, method=bellerophon) \n");
                mothurOut("Example: chimera.seqs(fasta=AD.align, filter=True, correction=true, method=bellerophon, window=200) \n");
index d2f8fc748f3064bd03a525ce6d9e6bb7895d55fe..2b76e5083d1e08d2d9dc29382688081cf462b953 100644 (file)
@@ -349,14 +349,15 @@ int ClassifySeqsCommand::driver(linePair* line, string taxFName, string tempTFNa
                for(int i=0;i<line->numSeqs;i++){
                        
                        Sequence* candidateSeq = new Sequence(inFASTA);
-
-                       taxonomy = classify->getTaxonomy(candidateSeq);
                        
-                       if (taxonomy != "bad seq") {
-                               outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
-                               outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
-                       }
-                                                       
+                       if (candidateSeq->getName() != "") {
+                               taxonomy = classify->getTaxonomy(candidateSeq);
+                               
+                               if (taxonomy != "bad seq") {
+                                       outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
+                                       outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
+                               }
+                       }                               
                        delete candidateSeq;
                        
                        if((i+1) % 100 == 0){
index 635cff032a645b7e69c168c7b461eaca71bb364d..a3858639a1f78d633e9c1537054978bac4e4d499 100644 (file)
@@ -181,7 +181,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                else if(commandName == "merge.files")                   {       command = new MergeFileCommand(optionString);                   }
                else if(commandName == "system")                                {       command = new SystemCommand(optionString);                              }
                else if(commandName == "align.check")                   {       command = new AlignCheckCommand(optionString);                  }
-               else if(commandName == "get.sharedseqs")                        {       command = new GetSharedOTUCommand(optionString);                }
+               else if(commandName == "get.sharedseqs")                {       command = new GetSharedOTUCommand(optionString);                }
                else if(commandName == "get.otulist")                   {       command = new GetListCountCommand(optionString);                }
                else if(commandName == "hcluster")                              {       command = new HClusterCommand(optionString);                    }
                else if(commandName == "classify.seqs")                 {       command = new ClassifySeqsCommand(optionString);                }
@@ -195,7 +195,22 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
                exit(1);
        }
 }
+/***********************************************************/
+//This function is used to interrupt a command
+Command* CommandFactory::getCommand(){
+       try {
+               delete command;   //delete the old command
 
+               string s = "";
+           command = new NoCommand(s);
+       
+               return command;
+       }
+       catch(exception& e) {
+               errorOut(e, "CommandFactory", "getCommand");
+               exit(1);
+       }
+}
 /***********************************************************************/
 bool CommandFactory::isValidCommand(string command) {
        try {   
index 0cede38b05904dd9252f8987c8782f32f04cc297..55f70eaabf1440488c46679472dd86c36693b088 100644 (file)
@@ -19,6 +19,7 @@ public:
        CommandFactory();
        ~CommandFactory();
        Command* getCommand(string, string);
+       Command* getCommand();
        bool isValidCommand(string);
        void printCommands(ostream&);
 
index 0a0f5a0ce7f0f4f0a120509e2c3d5c0e51471ecb..371bda8fb83f2b8ceb4793c49f067ed5987f2094 100644 (file)
@@ -20,14 +20,11 @@ InteractEngine::InteractEngine(string path){
 
        globaldata = GlobalData::getInstance();
        globaldata->argv = path;
-       
-       
 }
 
 /***********************************************************************/
 
-InteractEngine::~InteractEngine(){
-       }
+InteractEngine::~InteractEngine(){}
 
 /***********************************************************************/
 //This function allows the user to input commands one line at a time until they quit.
@@ -44,9 +41,47 @@ bool InteractEngine::getInput(){
                        
                        mothurOutEndLine();
                        mothurOut("mothur > ");
+                       
+                       //get input char by char so you can check for use of arrow keys
+                       //if (_kbhit()){
+                       //      _getch(); // edit : if you want to check the arrow-keys you must call getch twice because special-keys have two values
+                       //      return _getch();
+                       //}
+                       //return 0; // if no key is pressed
+                       //setbuf(stdin, NULL); //no buffering
+/*if(ch==0)
+{
+ch=getch();
+if(ch==72) cout<<"up";
+else if(ch==75) cout<<"left";
+else if(ch==77) cout<<"right";
+else if(ch==80) cout<<"down";
+cout<<endl;
+}
+else break;
+}
+delay(2000);
+return 0;
+}*/
+                       
+                       //int letter = 0;
+                       //while ((letter != 10) && (letter != 13)) {
+                       //      letter = getch();
+                               
+                       //      cout << "char code = " << letter << endl;
+                               
+                       //      input += char(letter);
+                       //}
+               //      input = input.substr(0, input.length()-1); //cut off newline char
+               
+       //cout << input << endl;                
+                       
                        getline(cin, input);
                        if (cin.eof()) { input = "quit()"; }
                        
+                       //save command
+                       //previousInputs.push_back(input);
+                       
                        mothurOutJustToLog(input);
                        mothurOutEndLine();
                        
@@ -55,13 +90,13 @@ bool InteractEngine::getInput(){
                        
                        CommandOptionParser parser(input);
                        commandName = parser.getCommandString();
+       //cout << " command = " << commandName << endl;
                        options = parser.getOptionString();
                        
                        if (commandName != "") {
                        
                                //executes valid command
-                               CommandFactory cFactory;
-                               Command* command = cFactory.getCommand(commandName, options);
+                               Command* command = cFactory->getCommand(commandName, options);
                                quitCommandCalled = command->execute();
                                
                        }else {
@@ -76,7 +111,17 @@ bool InteractEngine::getInput(){
                exit(1);
        }
 }
-
+/***********************************************************************/
+void Engine::terminateCommand(int dummy)  {
+       try {
+               mothurOut("Stopping command."); mothurOutEndLine();
+               cFactory->getCommand();  //terminates command
+       }
+       catch(exception& e) {
+               errorOut(e, "InteractEngine", "terminateCommand");
+               exit(1);
+       }
+}
 /***********************************************************************/
 //This function opens the batchfile to be used by BatchEngine::getInput.
 BatchEngine::BatchEngine(string path, string batchFileName){
@@ -95,8 +140,7 @@ BatchEngine::BatchEngine(string path, string batchFileName){
 
 /***********************************************************************/
 
-BatchEngine::~BatchEngine(){
-       }
+BatchEngine::~BatchEngine(){   }
 
 /***********************************************************************/
 //This Function allows the user to run a batchfile containing several commands on Dotur
@@ -138,8 +182,7 @@ bool BatchEngine::getInput(){
                                if (commandName != "") {
 
                                        //executes valid command
-                                       CommandFactory cFactory;
-                                       Command* command = cFactory.getCommand(commandName, options);
+                                       Command* command = cFactory->getCommand(commandName, options);
                                        quitCommandCalled = command->execute();
                                }else {         
                                        mothurOut("Invalid."); 
@@ -172,8 +215,6 @@ ScriptEngine::ScriptEngine(string path, string commandString){
 
                globaldata->argv = path;
                
-               
-       
        }
        catch(exception& e) {
                errorOut(e, "ScriptEngine", "ScriptEngine");
@@ -183,8 +224,7 @@ ScriptEngine::ScriptEngine(string path, string commandString){
 
 /***********************************************************************/
 
-ScriptEngine::~ScriptEngine(){
-       }
+ScriptEngine::~ScriptEngine(){         }
 
 /***********************************************************************/
 //This Function allows the user to run a batchfile containing several commands on mothur
@@ -221,8 +261,7 @@ bool ScriptEngine::getInput(){
                        if (commandName != "") {
 
                                //executes valid command
-                               CommandFactory cFactory;
-                               Command* command = cFactory.getCommand(commandName, options);
+                               Command* command = cFactory->getCommand(commandName, options);
                                quitCommandCalled = command->execute();
                        }else {         
                                mothurOut("Invalid."); 
index 603ff0a1c65fa528439b7a8f456991ae600c26ef..d4ec3b65a5cdd428c44967ed66ea454d7bc5b2f8 100644 (file)
@@ -22,13 +22,16 @@ class GlobalData;
 
 class Engine {
 public:
-       virtual ~Engine(){};
+       Engine() {  cFactory = new CommandFactory();    }
+       virtual ~Engine(){  delete cFactory;  }
        virtual bool getInput() = 0;
 //     string getCommand()                     {       return command;         }
        vector<string> getOptions() {   return options;         }
+       virtual void terminateCommand(int);
 protected:
 //     string command;
        vector<string> options;
+       CommandFactory* cFactory;
 };
 
 
@@ -54,6 +57,8 @@ public:
        virtual bool getInput();
 private:
        GlobalData* globaldata;
+       vector<string> previousInputs; //this is used to make the arrow keys work
+       
 };
 
 
index 57754667da593265618aa0d9e5c046e18b2b7203..f5ac122112a633de7768b251568edda33623a81c 100644 (file)
@@ -24,20 +24,21 @@ void FastaMap::readFastaFile(string inFileName) {
                        Sequence currSeq(in);
                        name = currSeq.getName();
                        
-                       if(currSeq.getIsAligned())      {       sequence = currSeq.getAligned();        }
-                       else                                            {       sequence = currSeq.getUnaligned();      }
-                       
-                       seqmap[name] = sequence;  
-                       map<string,group>::iterator it = data.find(sequence);
-                       if (it == data.end()) {         //it's unique.
-                               data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
-//                             data[sequence].groupnumber = 1;
-                               data[sequence].names = name;
-                       }else { // its a duplicate.
-                               data[sequence].names += "," + name;
-//                             data[sequence].groupnumber++;
-                       }       
-                       
+                       if (name != "") {
+                               if(currSeq.getIsAligned())      {       sequence = currSeq.getAligned();        }
+                               else                                            {       sequence = currSeq.getUnaligned();      }
+                               
+                               seqmap[name] = sequence;  
+                               map<string,group>::iterator it = data.find(sequence);
+                               if (it == data.end()) {         //it's unique.
+                                       data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
+                                       //                              data[sequence].groupnumber = 1;
+                                       data[sequence].names = name;
+                               }else { // its a duplicate.
+                                       data[sequence].names += "," + name;
+                                       //                              data[sequence].groupnumber++;
+                               }       
+                       }
                        gobble(in);
                }
                in.close();             
@@ -71,20 +72,21 @@ void FastaMap::readFastaFile(string inFastaFile, string oldNameFileName){ //prin
                Sequence currSeq(inFASTA);
                name = currSeq.getName();
                
-               if(currSeq.getIsAligned())      {       sequence = currSeq.getAligned();        }
-               else                                            {       sequence = currSeq.getUnaligned();      }
-               
-               seqmap[name] = sequence;  
-               map<string,group>::iterator it = data.find(sequence);
-               if (it == data.end()) {         //it's unique.
-                       data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
-//                     data[sequence].groupnumber = 1;
-                       data[sequence].names = oldNameMap[name];
-               }else { // its a duplicate.
-                       data[sequence].names += "," + oldNameMap[name];
-//                     data[sequence].groupnumber++;
-               }       
-               
+               if (name != "") {
+                       if(currSeq.getIsAligned())      {       sequence = currSeq.getAligned();        }
+                       else                                            {       sequence = currSeq.getUnaligned();      }
+                       
+                       seqmap[name] = sequence;  
+                       map<string,group>::iterator it = data.find(sequence);
+                       if (it == data.end()) {         //it's unique.
+                               data[sequence].groupname = name;  //group name will be the name of the first duplicate sequence found.
+                               //                      data[sequence].groupnumber = 1;
+                               data[sequence].names = oldNameMap[name];
+                       }else { // its a duplicate.
+                               data[sequence].names += "," + oldNameMap[name];
+                               //                      data[sequence].groupnumber++;
+                       }       
+               }
                gobble(inFASTA);
        }
        
index 68450fb15a7b140803aab2d54d8c5bc84e7ce688..a689704131d32ae7e8a244babc672695cb432384 100644 (file)
@@ -121,10 +121,12 @@ int FilterSeqsCommand::execute() {
                if(trump != '*' || isTrue(vertical) || soft != 0){
                        while(!inFASTA.eof()){  //read through and create the filter...
                                Sequence seq(inFASTA);
-                               if(trump != '*'){       F.doTrump(seq); }
-                               if(isTrue(vertical) || soft != 0){      F.getFreqs(seq);        }
-                               numSeqs++;
-                               cout.flush();
+                               if (seq.getName() != "") {
+                                       if(trump != '*'){       F.doTrump(seq); }
+                                       if(isTrue(vertical) || soft != 0){      F.getFreqs(seq);        }
+                                       numSeqs++;
+                                       cout.flush();
+                               }
                        }
                
                }
@@ -152,17 +154,19 @@ int FilterSeqsCommand::execute() {
                numSeqs = 0;
                while(!inFasta2.eof()){
                        Sequence seq(inFasta2);
-                       string align = seq.getAligned();
-                       string filterSeq = "";
-       
-                       for(int j=0;j<alignmentLength;j++){
-                               if(filter[j] == '1'){
-                                       filterSeq += align[j];
+                       if (seq.getName() != "") {
+                               string align = seq.getAligned();
+                               string filterSeq = "";
+                               
+                               for(int j=0;j<alignmentLength;j++){
+                                       if(filter[j] == '1'){
+                                               filterSeq += align[j];
+                                       }
                                }
+                               
+                               outFASTA << '>' << seq.getName() << endl << filterSeq << endl;
+                               numSeqs++;
                        }
-
-                       outFASTA << '>' << seq.getName() << endl << filterSeq << endl;
-                       numSeqs++;
                        gobble(inFasta2);
                }
                outFASTA.close();
index 04b254599f46ef00081a3f153c0e097fea665cf4..315417a30586d290b2a692e04b2e0afacb1e6fb3 100644 (file)
@@ -125,15 +125,16 @@ void GetSeqsCommand::readFasta(){
                        Sequence currSeq(in);
                        name = currSeq.getName();
                        
-                       //if this name is in the accnos file
-                       if (names.count(name) == 1) {
-                               wroteSomething = true;
-                               
-                               currSeq.printSequence(out);
-                               
-                               names.erase(name);
+                       if (name != "") {
+                               //if this name is in the accnos file
+                               if (names.count(name) == 1) {
+                                       wroteSomething = true;
+                                       
+                                       currSeq.printSequence(out);
+                                       
+                                       names.erase(name);
+                               }
                        }
-                       
                        gobble(in);
                }
                in.close();     
index 76e5725d850e17d52bff0c72abfd2d83669778ee..742377f14a12cb55f306cf334936a9ca1f1e12f3 100644 (file)
@@ -133,7 +133,7 @@ int GetSharedOTUCommand::execute(){
                        
                        while(!inFasta.eof()) {
                                Sequence seq(inFasta);
-                               seqs.push_back(seq);
+                               if (seq.getName() != "") {  seqs.push_back(seq);   }
                        }
                        inFasta.close();
                }
index efc88e98232497383d8d9234ac2d53641d8d3d6e..3087b48fd9669d13b5460f032f911b8f32049dce 100644 (file)
@@ -125,7 +125,7 @@ void ListSeqsCommand::readFasta(){
                        Sequence currSeq(in);
                        name = currSeq.getName();
                        
-                       names.push_back(name);
+                       if (name != "") {  names.push_back(name);  }
                        
                        gobble(in);
                }
index 1c0c98f9f1caf4cec5de25f4c66ce7f9a3a3af8f..4e064b142d896cb1060d98c9574e42a4355a9efb 100644 (file)
@@ -77,7 +77,7 @@ int main(int argc, char *argv[]){
                                
                //srand(54321);
                srand( (unsigned)time( NULL ) );
-
+               
                Engine* mothur;
                bool bail = 0;
                string input;
@@ -92,9 +92,16 @@ int main(int argc, char *argv[]){
                        }
                }
                else{
-                       mothur = new InteractEngine(argv[0]);           
+                       mothur = new InteractEngine(argv[0]);   
                }
+               
+               //used to intercept the terminate signal, so instead of terminating mothur it will end a command
+               //void (*prev_fn)(int);
+               //prev_fn = signal(SIGTERM, mothur->terminateCommand(0));
+               
+               //if (prev_fn==SIG_IGN) signal (SIGTERM,SIG_IGN);
 
+               
                while(bail == 0)                {       bail = mothur->getInput();                      }
        
                delete mothur;
index ba88f71fabffced9509deed545cecde7228b2b63..5c1880d569eeb7f37089c1a2d418f8fadf7b4ab7 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -21,6 +21,7 @@
 #include <iomanip>
 #include <fstream>
 #include <sstream>
+#include <signal.h>
 
 //exception
 #include <stdexcept>
@@ -283,9 +284,6 @@ inline void errorOut(exception& e, string object, string function) {
        
 }
 
-
-
-
 /***********************************************************************/
 
 inline bool isTrue(string f){
@@ -425,7 +423,21 @@ inline string getExtension(string longName){
        
        return extension;
 }
-
+/***********************************************************************/
+inline bool isBlank(string fileName){
+       
+       ifstream fileHandle;
+       fileHandle.open(fileName.c_str());
+       if(!fileHandle) {
+               mothurOut("Error: Could not open " + fileName);  mothurOutEndLine();
+               return false;
+       }else {
+               //check for blank file
+               gobble(fileHandle);
+               if (fileHandle.eof()) { fileHandle.close(); return true;  }
+       }
+       return false;
+}
 /***********************************************************************/
 
 inline int openInputFile(string fileName, ifstream& fileHandle){
index 4848eb20be0b5cfdb43715b5ba1589d253ba4571..6a4ae3db2f679ee4ccadf2b0ff3270f5da4f15aa 100644 (file)
--- a/nast.cpp
+++ b/nast.cpp
@@ -32,7 +32,6 @@ Nast::Nast(Alignment* method, Sequence* cand, Sequence* temp) : alignment(method
                errorOut(e, "Nast", "Nast");
                exit(1);
        }
-
 }
 
 /**************************************************************************************************/
@@ -42,10 +41,10 @@ void Nast::pairwiseAlignSeqs(){     //      Here we call one of the pairwise alignment me
        try {
 
                alignment->align(candidateSeq->getUnaligned(), templateSeq->getUnaligned());
-               
+       
                string candAln = alignment->getSeqAAln();
                string tempAln = alignment->getSeqBAln();
-               
+       
                if(candAln == ""){
 
                        candidateSeq->setPairwise("");
@@ -78,7 +77,6 @@ void Nast::pairwiseAlignSeqs(){       //      Here we call one of the pairwise alignment me
 
                candidateSeq->setPairwise(candAln);                                     //      set the pairwise sequences in the Sequence objects for
                templateSeq->setPairwise(tempAln);                                      //      the candidate and template sequences
-
        }
        catch(exception& e) {
                errorOut(e, "Nast", "pairwiseAlignSeqs");
index 4cf1b85b9524cd69f8c824143087bef79e0efb67..bc2467f13c3d65fe41309ffb6de13ffdbaafd7c4 100644 (file)
--- a/nast.hpp
+++ b/nast.hpp
@@ -29,6 +29,7 @@ class Nast {
        
 public:
        Nast(Alignment*, Sequence*, Sequence*);
+       ~Nast(){};
        float getSimilarityScore();
        int getMaxInsertLength();
        
index e30b51d243ac776fc36720321e53e6b72702237e..40b49f88bfe4d5cda9f123690253f87cda9ce61f 100644 (file)
@@ -60,7 +60,7 @@ void Overlap::setOverlap(vector<vector<AlignmentCell> >& alignment, const int nA
        
        int rowIndex = maxRow(alignment, band);         //      get the index for the row with the highest right hand side score
        int colIndex = maxColumn(alignment, band);      //      get the index for the column with the highest bottom row score
-       
+               
        int row = lB-1;
        int column = lA-1;
        
index 1b9edb0e98e20f7752fb9b8aab3584c1c05bccc0..28f8cf7a6d890b0c688c388b3a2c0e8c8438ba27 100644 (file)
@@ -125,13 +125,14 @@ void RemoveSeqsCommand::readFasta(){
                        Sequence currSeq(in);
                        name = currSeq.getName();
                        
-                       //if this name is in the accnos file
-                       if (names.count(name) == 0) {
-                               wroteSomething = true;
-                               
-                               currSeq.printSequence(out);
-                       }else {         names.erase(name);              }
-                       
+                       if (name != "") {
+                               //if this name is in the accnos file
+                               if (names.count(name) == 0) {
+                                       wroteSomething = true;
+                                       
+                                       currSeq.printSequence(out);
+                               }else {         names.erase(name);              }
+                       }
                        gobble(in);
                }
                in.close();     
index 7a87a99a9720f3f5c8bc04e77bac39d5eabf5ea0..37e95685430184760cd0919c18a1cfeea2ec4c42 100644 (file)
@@ -82,9 +82,11 @@ int ReverseSeqsCommand::execute(){
                openOutputFile(reverseFile, outFASTA);
                
                while(!inFASTA.eof()){
-                       Sequence currSeq(inFASTA);
-                       currSeq.reverseComplement();
-                       currSeq.printSequence(outFASTA);
+                       Sequence currSeq(inFASTA);  gobble(inFASTA);
+                       if (currSeq.getName() != "") {
+                               currSeq.reverseComplement();
+                               currSeq.printSequence(outFASTA);
+                       }
                }
                inFASTA.close();
                outFASTA.close();
index d5e917c204f1c2757cae52eb5dc8227929851ed1..9225e992bab6e17d0c3228cc36ced9a2505933e7 100644 (file)
@@ -131,20 +131,22 @@ int ScreenSeqsCommand::execute(){
                
                while(!inFASTA.eof()){
                        Sequence currSeq(inFASTA);
-                       bool goodSeq = 1;               //      innocent until proven guilty
-                       if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
-                       if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
-                       if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
-                       if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
-                       if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
-                       if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
-                       
-                       if(goodSeq == 1){
-                               currSeq.printSequence(goodSeqOut);      
-                       }
-                       else{
-                               currSeq.printSequence(badSeqOut);       
-                               badSeqNames.insert(currSeq.getName());
+                       if (currSeq.getName() != "") {
+                               bool goodSeq = 1;               //      innocent until proven guilty
+                               if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
+                               if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
+                               if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
+                               if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
+                               if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
+                               if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
+                               
+                               if(goodSeq == 1){
+                                       currSeq.printSequence(goodSeqOut);      
+                               }
+                               else{
+                                       currSeq.printSequence(badSeqOut);       
+                                       badSeqNames.insert(currSeq.getName());
+                               }
                        }
                        gobble(inFASTA);
                }       
index 7156082f8b1b43228a62c6cf93abec9f2d900109..c2c104e71b1f535cd2a818afe268e5d7e1ed4c24 100644 (file)
@@ -91,11 +91,13 @@ int AlignCheckCommand::execute(){
                
                while(!in.eof()){
                        
-                       Sequence seq(in);
-                       statData data = getStats(seq.getAligned());
-                       
-                       out << seq.getName() << '\t' << data.pound << '\t' << data.dash << '\t' << data.plus << '\t' << data.equal << '\t';
-                       out << data.loop << '\t' << data.tilde << '\t' << data.total << endl;
+                       Sequence seq(in);  gobble(in);
+                       if (seq.getName() != "") {
+                               statData data = getStats(seq.getAligned());
+                               
+                               out << seq.getName() << '\t' << data.pound << '\t' << data.dash << '\t' << data.plus << '\t' << data.equal << '\t';
+                               out << data.loop << '\t' << data.tilde << '\t' << data.total << endl;
+                       }
                }
 
                in.close();
index 4eafaf8ed15e7fde8c60f9c3d80a376f1d796f3d..9ab27f9cbd439fe55ed1856c6940f0e42e6ee7e0 100644 (file)
@@ -91,18 +91,20 @@ int SeqSummaryCommand::execute(){
 
                while(!inFASTA.eof()){
                        Sequence current(inFASTA);
-                       startPosition.push_back(current.getStartPos());
-                       endPosition.push_back(current.getEndPos());
-                       seqLength.push_back(current.getNumBases());
-                       ambigBases.push_back(current.getAmbigBases());
-                       longHomoPolymer.push_back(current.getLongHomoPolymer());
-
-                       outSummary << current.getName() << '\t';
-                       outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
-                       outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
-                       outSummary << current.getLongHomoPolymer() << endl;
-                       
-                       numSeqs++;
+                       if (current.getName() != "") {
+                               startPosition.push_back(current.getStartPos());
+                               endPosition.push_back(current.getEndPos());
+                               seqLength.push_back(current.getNumBases());
+                               ambigBases.push_back(current.getAmbigBases());
+                               longHomoPolymer.push_back(current.getLongHomoPolymer());
+                               
+                               outSummary << current.getName() << '\t';
+                               outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
+                               outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
+                               outSummary << current.getLongHomoPolymer() << endl;
+                               
+                               numSeqs++;
+                       }
                        gobble(inFASTA);
                }
                inFASTA.close();
index b2c2b53d0b4078e6e296312662dcdfe0ffe5fbec..752e081e367fc6d572b5622e0b4eb6003a28f9bb 100644 (file)
@@ -28,35 +28,85 @@ Sequence::Sequence(string newName, string sequence) {
        
 }
 //********************************************************************************************************************
-
+//this function will jump over commented out sequences, but if the last sequence in a file is commented out it makes a blank seq
 Sequence::Sequence(ifstream& fastaFile){
 
        initialize();
        fastaFile >> name;
        name = name.substr(1);
-       
-       while (!fastaFile.eof())        {       char c = fastaFile.get(); if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
-
-       char letter;
        string sequence;
        
-       while(fastaFile){
-               letter= fastaFile.get();
-               if(letter == '>'){
-                       fastaFile.putback(letter);
+       //read comments
+       while ((name[0] == '#') && fastaFile) { 
+           while (!fastaFile.eof())    {       char c = fastaFile.get(); if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
+               sequence = getCommentString(fastaFile);
+               
+               if (fastaFile) {  
+                       fastaFile >> name;  
+                       name = name.substr(1);  
+               }else { 
+                       name = "";
                        break;
                }
-               else if(isprint(letter)){
-                       letter = toupper(letter);
-                       if(letter == 'U'){letter = 'T';}
-                       sequence += letter;
-               }
        }
-
+       
+       //read real sequence
+       while (!fastaFile.eof())        {       char c = fastaFile.get(); if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
+               
+       sequence = getSequenceString(fastaFile);                
+       
        setAligned(sequence);   
        //setUnaligned removes any gap characters for us                                                
        setUnaligned(sequence);                                                         
 }
+//********************************************************************************************************************
+string Sequence::getSequenceString(ifstream& fastaFile) {
+       try {
+               char letter;
+               string sequence = "";   
+               
+               while(fastaFile){
+                       letter= fastaFile.get();
+                       if(letter == '>'){
+                               fastaFile.putback(letter);
+                               break;
+                       }
+                       else if(isprint(letter)){
+                               letter = toupper(letter);
+                               if(letter == 'U'){letter = 'T';}
+                               sequence += letter;
+                       }
+               }
+               
+               return sequence;
+       }
+       catch(exception& e) {
+               errorOut(e, "Sequence", "getSequenceString");
+               exit(1);
+       }
+}
+//********************************************************************************************************************
+//comment can contain '>' so we need to account for that
+string Sequence::getCommentString(ifstream& fastaFile) {
+       try {
+               char letter;
+               string sequence = "";
+               
+               while(fastaFile){
+                       letter=fastaFile.get();
+                       if((letter == '\r') || (letter == '\n')){  
+                               gobble(fastaFile);  //in case its a \r\n situation
+                               break;
+                       }
+               }
+               
+               return sequence;
+       }
+       catch(exception& e) {
+               errorOut(e, "Sequence", "getCommentString");
+               exit(1);
+       }
+}
 
 //********************************************************************************************************************
 
@@ -109,6 +159,7 @@ void Sequence::setAligned(string sequence){
        //if the alignment starts or ends with a gap, replace it with a period to indicate missing data
        aligned = sequence;
        alignmentLength = aligned.length();
+       setUnaligned(sequence); 
 
        if(aligned[0] == '-'){
                for(int i=0;i<alignmentLength;i++){
index ac9384bf156bf471cc35b3fe8d57f6ecf777e857..f6d7c869853a17808d8d894466f3d473b18c13d8 100644 (file)
@@ -48,6 +48,8 @@ public:
        
 private:
        void initialize();
+       string getSequenceString(ifstream&);
+       string getCommentString(ifstream&);
        string name;
        string unaligned;
        string aligned;
index 32fc14644698ddedb4248aba10ea3ed3415bf7be..b3da429fc57c9da7081640876ee9fbedda808e25 100644 (file)
@@ -35,7 +35,8 @@ SequenceDB::SequenceDB(ifstream& filehandle) {
                while (!filehandle.eof()) {
                        //input sequence info into sequencedb
                        Sequence newSequence(filehandle);
-                       data.push_back(newSequence);
+                       
+                       if (newSequence.getName() != "") {   data.push_back(newSequence);  }
                        
                        //takes care of white space
                        gobble(filehandle);
index 5ca70a99cb4b600780b425ff7f29b909794cc362..ca7a927134f1f904401ab017d456ff1f6859a569 100644 (file)
@@ -173,61 +173,63 @@ int TrimSeqsCommand::execute(){
                while(!inFASTA.eof()){
                        Sequence currSeq(inFASTA);
                        string origSeq = currSeq.getUnaligned();
-                       int group;
-                       string trashCode = "";
-                       
-                       if(qFileName != ""){
-                               if(qThreshold != 0)             {       success = stripQualThreshold(currSeq, qFile);   }
-                               else if(qAverage != 0)  {       success = cullQualAverage(currSeq, qFile);              }
-                               if ((!qtrim) && (origSeq.length() != currSeq.getUnaligned().length())) { 
-                                       success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
+                       if (origSeq != "") {
+                               int group;
+                               string trashCode = "";
+                               
+                               if(qFileName != ""){
+                                       if(qThreshold != 0)             {       success = stripQualThreshold(currSeq, qFile);   }
+                                       else if(qAverage != 0)  {       success = cullQualAverage(currSeq, qFile);              }
+                                       if ((!qtrim) && (origSeq.length() != currSeq.getUnaligned().length())) { 
+                                               success = 0; //if you don't want to trim and the sequence does not meet quality requirements, move to scrap
+                                       }
+                                       if(!success)                    {       trashCode += 'q';                                                               }
                                }
-                               if(!success)                    {       trashCode += 'q';                                                               }
-                       }
-                       if(barcodes.size() != 0){
-       
-                               success = stripBarcode(currSeq, group);
-                               if(!success){   trashCode += 'b';       }
-                       }
-                       if(numFPrimers != 0){
-                               success = stripForward(currSeq);
-                               if(!success){   trashCode += 'f';       }
-                       }
-                       if(numRPrimers != 0){
-                               success = stripReverse(currSeq);
-                               if(!success){   trashCode += 'r';       }
-                       }
-                       if(minLength > 0 || maxLength > 0){
-                               success = cullLength(currSeq);
-                       if ((currSeq.getUnaligned().length() > 300) && (success)) {  cout << "too long " << currSeq.getUnaligned().length() << endl;  }
-                               if(!success){   trashCode += 'l'; }
-                       }
-                       if(maxHomoP > 0){
-                               success = cullHomoP(currSeq);
-                               if(!success){   trashCode += 'h';       }
-                       }
-                       if(maxAmbig != -1){
-                               success = cullAmbigs(currSeq);
-                               if(!success){   trashCode += 'n';       }
-                       }
-                       
-                       if(flip){       currSeq.reverseComplement();    }               // should go last                       
-                       
-                       if(trashCode.length() == 0){
-                               currSeq.setAligned(currSeq.getUnaligned());  //this is because of a modification we made to the sequence class to fix a bug.  all seqs have an aligned version, which is the version that gets printed.
-                               currSeq.printSequence(outFASTA);
                                if(barcodes.size() != 0){
-                                       outGroups << currSeq.getName() << '\t' << groupVector[group] << endl;
                                        
-                                       if(allFiles){
-                                               currSeq.printSequence(*fastaFileNames[group]);                                  
+                                       success = stripBarcode(currSeq, group);
+                                       if(!success){   trashCode += 'b';       }
+                               }
+                               if(numFPrimers != 0){
+                                       success = stripForward(currSeq);
+                                       if(!success){   trashCode += 'f';       }
+                               }
+                               if(numRPrimers != 0){
+                                       success = stripReverse(currSeq);
+                                       if(!success){   trashCode += 'r';       }
+                               }
+                               if(minLength > 0 || maxLength > 0){
+                                       success = cullLength(currSeq);
+                                       if ((currSeq.getUnaligned().length() > 300) && (success)) {  cout << "too long " << currSeq.getUnaligned().length() << endl;  }
+                                       if(!success){   trashCode += 'l'; }
+                               }
+                               if(maxHomoP > 0){
+                                       success = cullHomoP(currSeq);
+                                       if(!success){   trashCode += 'h';       }
+                               }
+                               if(maxAmbig != -1){
+                                       success = cullAmbigs(currSeq);
+                                       if(!success){   trashCode += 'n';       }
+                               }
+                               
+                               if(flip){       currSeq.reverseComplement();    }               // should go last                       
+                               
+                               if(trashCode.length() == 0){
+                                       currSeq.setAligned(currSeq.getUnaligned());  //this is because of a modification we made to the sequence class to fix a bug.  all seqs have an aligned version, which is the version that gets printed.
+                                       currSeq.printSequence(outFASTA);
+                                       if(barcodes.size() != 0){
+                                               outGroups << currSeq.getName() << '\t' << groupVector[group] << endl;
+                                               
+                                               if(allFiles){
+                                                       currSeq.printSequence(*fastaFileNames[group]);                                  
+                                               }
                                        }
                                }
-                       }
-                       else{
-                               currSeq.setName(currSeq.getName() + '|' + trashCode);
-                               currSeq.setUnaligned(origSeq);
-                               currSeq.printSequence(scrapFASTA);
+                               else{
+                                       currSeq.setName(currSeq.getName() + '|' + trashCode);
+                                       currSeq.setUnaligned(origSeq);
+                                       currSeq.printSequence(scrapFASTA);
+                               }
                        }
                        gobble(inFASTA);
                }