]> git.donarmstrong.com Git - mothur.git/blobdiff - trimseqscommand.cpp
fixes while testing
[mothur.git] / trimseqscommand.cpp
index 320bc41f4816eccb8f4b0fb19cb92864e91055d2..f684b66a8e72d18f98172579197fac7cc5a2f543 100644 (file)
 #include "trimseqscommand.h"
 #include "needlemanoverlap.hpp"
 
+//**********************************************************************************************************************
+vector<string> TrimSeqsCommand::getValidParameters(){  
+       try {
+               string Array[] =  {"fasta", "flip", "oligos", "maxambig", "maxhomop", "minlength", "maxlength", "qfile", 
+                                                                       "qthreshold", "qwindowaverage", "qstepsize", "qwindowsize", "qaverage", "rollaverage", "allfiles", "qtrim","tdiffs", "pdiffs", "bdiffs", "processors", "outputdir","inputdir"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimSeqsCommand", "getValidParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+TrimSeqsCommand::TrimSeqsCommand(){    
+       try {
+               abort = true;
+               //initialize outputTypes
+               vector<string> tempOutNames;
+               outputTypes["fasta"] = tempOutNames;
+               outputTypes["qual"] = tempOutNames;
+               outputTypes["group"] = tempOutNames;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimSeqsCommand", "TrimSeqsCommand");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> TrimSeqsCommand::getRequiredParameters(){       
+       try {
+               string Array[] =  {"fasta"};
+               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimSeqsCommand", "getRequiredParameters");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+vector<string> TrimSeqsCommand::getRequiredFiles(){    
+       try {
+               vector<string> myArray;
+               return myArray;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimSeqsCommand", "getRequiredFiles");
+               exit(1);
+       }
+}
 //***************************************************************************************************************
 
 TrimSeqsCommand::TrimSeqsCommand(string option)  {
@@ -39,6 +90,12 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                                if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
                        }
                        
+                       //initialize outputTypes
+                       vector<string> tempOutNames;
+                       outputTypes["fasta"] = tempOutNames;
+                       outputTypes["qual"] = tempOutNames;
+                       outputTypes["group"] = tempOutNames;
+                       
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
                        if (inputDir == "not found"){   inputDir = "";          }
@@ -72,7 +129,7 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
                        
                        //check for required parameters
                        fastaFile = validParameter.validFile(parameters, "fasta", true);
-                       if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the screen.seqs command."); m->mothurOutEndLine(); abort = true; }
+                       if (fastaFile == "not found") { m->mothurOut("fasta is a required parameter for the trim.seqs command."); m->mothurOutEndLine(); abort = true; }
                        else if (fastaFile == "not open") { abort = true; }     
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
@@ -220,18 +277,18 @@ int TrimSeqsCommand::execute(){
                numRPrimers = 0;
                
                string trimSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
-               outputNames.push_back(trimSeqFile);
+               outputNames.push_back(trimSeqFile); outputTypes["fasta"].push_back(trimSeqFile);
                string scrapSeqFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.fasta";
-               outputNames.push_back(scrapSeqFile);
+               outputNames.push_back(scrapSeqFile); outputTypes["fasta"].push_back(scrapSeqFile);
                string trimQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
                string scrapQualFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "scrap.qual";
-               if (qFileName != "") {  outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile);  }
+               if (qFileName != "") {  outputNames.push_back(trimQualFile); outputNames.push_back(scrapQualFile);  outputTypes["qual"].push_back(trimQualFile); outputTypes["qual"].push_back(scrapQualFile); }
                string groupFile = outputDir + m->getRootName(m->getSimpleName(fastaFile)) + "groups";
                
                vector<string> fastaFileNames;
                vector<string> qualFileNames;
                if(oligoFile != ""){
-                       outputNames.push_back(groupFile);
+                       outputNames.push_back(groupFile); outputTypes["group"].push_back(groupFile);
                        getOligos(fastaFileNames, qualFileNames);
                }
 
@@ -339,7 +396,7 @@ int TrimSeqsCommand::execute(){
                                ofstream outGroups;
                                string outGroupFilename = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "groups";
                                m->openOutputFile(outGroupFilename, outGroups);
-                               outputNames.push_back(outGroupFilename);
+                               outputNames.push_back(outGroupFilename); outputTypes["group"].push_back(outGroupFilename);  
                                
                                string thisGroup = "";
                                if (i > comboStarts) {
@@ -425,22 +482,40 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                }
                
                ofstream outGroups;
-               vector<ofstream*> fastaFileNames;
-               vector<ofstream*> qualFileNames;
+               //vector<ofstream*> fastaFileNames;
+               //vector<ofstream*> qualFileNames;
                
                if (oligoFile != "") {          
                        m->openOutputFile(groupFile, outGroups);   
                        for (int i = 0; i < fastaNames.size(); i++) {
 
                        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                               fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); 
+                               fastaNames[i] = (fastaNames[i] + toString(getpid()) + ".temp");
+                               //fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); 
+                               //clear old file if it exists
+                               ofstream temp;
+                               m->openOutputFile(fastaNames[i], temp);
+                               temp.close();
                                if(qFileName != ""){
-                                       qualFileNames.push_back(new ofstream((qualNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); 
+                                       qualNames[i] = (qualNames[i] + toString(getpid()) + ".temp");
+                                       //qualFileNames.push_back(new ofstream((qualNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); 
+                                       //clear old file if it exists
+                                       ofstream temp2;
+                                       m->openOutputFile(qualNames[i], temp2);
+                                       temp2.close();
                                }
                        #else
-                               fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(i) + ".temp").c_str(), ios::ate));                      
+                               //fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(i) + ".temp").c_str(), ios::ate)); 
+                               fastaNames[i] = (fastaNames[i] + toString(i) + ".temp");
+                               ofstream temp;
+                               m->openOutputFile(fastaNames[i], temp);
+                               temp.close();                   
                                if(qFileName != ""){
-                                       qualFileNames.push_back(new ofstream((qualNames[i] + toString(i) + ".temp").c_str(), ios::ate));                        
+                                       //qualFileNames.push_back(new ofstream((qualNames[i] + toString(i) + ".temp").c_str(), ios::ate));      
+                                       qualNames[i] = (qualNames[i] + toString(i) + ".temp");
+                                       ofstream temp2;
+                                       m->openOutputFile(qualNames[i], temp2);
+                                       temp2.close();          
                                }
                        #endif
                        }
@@ -462,11 +537,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                inFASTA.close(); outFASTA.close(); scrapFASTA.close();
                                if (oligoFile != "") {   outGroups.close();   }
                                
-                               for(int i=0;i<fastaFileNames.size();i++){  fastaFileNames[i]->close(); delete fastaFileNames[i];  }     
+                               //for(int i=0;i<fastaFileNames.size();i++){  fastaFileNames[i]->close(); delete fastaFileNames[i];  }   
 
                                if(qFileName != ""){
                                        qFile.close();
-                                       for(int i=0;i<qualFileNames.size();i++){  qualFileNames[i]->close(); delete qualFileNames[i];  }        
+                                       //for(int i=0;i<qualFileNames.size();i++){  qualFileNames[i]->close(); delete qualFileNames[i];  }      
                                }
                                for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
 
@@ -553,10 +628,18 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                                }
                                                outGroups << currSeq.getName() << '\t' << thisGroup << endl;
                                                if(allFiles){
-                                                       currSeq.printSequence(*fastaFileNames[indexToFastaFile]);
+                                                       ofstream outTemp;
+                                                       m->openOutputFileAppend(fastaNames[indexToFastaFile], outTemp);
+                                                       //currSeq.printSequence(*fastaFileNames[indexToFastaFile]);
+                                                       currSeq.printSequence(outTemp);
+                                                       outTemp.close();
                                                        
                                                        if(qFileName != ""){
-                                                               currQual.printQScores(*qualFileNames[indexToFastaFile]);                                                        
+                                                               //currQual.printQScores(*qualFileNames[indexToFastaFile]);
+                                                               ofstream outTemp2;
+                                                               m->openOutputFileAppend(qualNames[indexToFastaFile], outTemp2);
+                                                               currQual.printQScores(outTemp2);
+                                                               outTemp2.close();                                                       
                                                        }
                                                }
                                        }
@@ -571,9 +654,13 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                count++;
                        }
                        
-                       unsigned long int pos = inFASTA.tellg();
-                       if ((pos == -1) || (pos >= line->end)) { break; }
-                       
+                       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                               unsigned long int pos = inFASTA.tellg();
+                               if ((pos == -1) || (pos >= line->end)) { break; }
+                       #else
+                               if (inFASTA.eof()) { break; }
+                       #endif
+                               
                        //report progress
                        if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
                        
@@ -588,17 +675,17 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                if (oligoFile != "") {   outGroups.close();   }
                if(qFileName != "")     {       qFile.close();  scrapQual.close(); outQual.close();     }
                
-               for(int i=0;i<fastaFileNames.size();i++){
-                       fastaFileNames[i]->close();
-                       delete fastaFileNames[i];
-               }               
+               //for(int i=0;i<fastaFileNames.size();i++){
+               //      fastaFileNames[i]->close();
+               //      delete fastaFileNames[i];
+               //}             
                
-               if(qFileName != ""){
-                       for(int i=0;i<qualFileNames.size();i++){
-                               qualFileNames[i]->close();
-                               delete qualFileNames[i];
-                       }               
-               }                       
+               //if(qFileName != ""){
+                       //for(int i=0;i<qualFileNames.size();i++){
+                               //qualFileNames[i]->close();
+                               //delete qualFileNames[i];
+                       //}             
+               //}                     
                
                return count;
        }
@@ -787,8 +874,10 @@ void TrimSeqsCommand::getOligos(vector<string>& outFASTAVec, vector<string>& out
                                                        }
                                                }else {
                                                        outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
+                                                       outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
                                                        if(qFileName != ""){
                                                                outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
+                                                               outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
                                                        }                                                       
                                                }
                                        }
@@ -810,11 +899,13 @@ void TrimSeqsCommand::getOligos(vector<string>& outFASTAVec, vector<string>& out
                                        groupVector.push_back(group);
                                        
                                        if(allFiles){
+                                               outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
                                                outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
                                                outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + group + ".fasta"));
                                                if(qFileName != ""){
                                                        outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
                                                        outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
+                                                       outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + group + ".qual"));
                                                }                                                       
                                        }
                                }else{  m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  }
@@ -831,12 +922,14 @@ void TrimSeqsCommand::getOligos(vector<string>& outFASTAVec, vector<string>& out
                                for (map<string, int>::iterator itPrime = primers.begin(); itPrime != primers.end(); itPrime++) {
                                        if (groupVector[itPrime->second] != "") { //there is a group for this primer
                                                outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
+                                               outputTypes["fasta"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
                                                outFASTAVec.push_back((outputDir + m->getRootName(m->getSimpleName(fastaFile)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".fasta"));
                                                combos[(groupVector[itBar->second] + "." + groupVector[itPrime->second])] = outFASTAVec.size()-1;
                                                
                                                if(qFileName != ""){
                                                        outQualVec.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
                                                        outputNames.push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
+                                                       outputTypes["qual"].push_back((outputDir + m->getRootName(m->getSimpleName(qFileName)) + groupVector[itBar->second] + "." + groupVector[itPrime->second] + ".qual"));
                                                }
                                        }
                                }