]> git.donarmstrong.com Git - mothur.git/blobdiff - trimflowscommand.cpp
mods to shhh.seqs and trim.flows to fix eye candy output
[mothur.git] / trimflowscommand.cpp
index 5c5d28b0390470ae05833f177fba7f7d1012e051..6ee865516bee6851fadf28f81e70bb249c35562d 100644 (file)
 #include "needlemanoverlap.hpp"
 
 //**********************************************************************************************************************
-
-vector<string> TrimFlowsCommand::getValidParameters(){ 
+vector<string> TrimFlowsCommand::setParameters(){      
        try {
-               string Array[] =  {"flow", "totalflows", "minflows",
-                       "fasta", "minlength", "maxlength", "maxhomop", "signal", "noise"
-                       "oligos", "pdiffs", "bdiffs", "tdiffs", 
-                       "allfiles", "processors",
-
-
-                       
-//                     "group",
-                       "outputdir","inputdir"
+               CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pflow);
+               CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
+               CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "",false,false); parameters.push_back(pmaxhomop);
+               CommandParameter pmaxflows("maxflows", "Number", "", "720", "", "", "",false,false); parameters.push_back(pmaxflows);
+               CommandParameter pminflows("minflows", "Number", "", "360", "", "", "",false,false); parameters.push_back(pminflows);
+               CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
+               CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
+               CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
+               CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
+               CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
+               CommandParameter psignal("signal", "Number", "", "0.50", "", "", "",false,false); parameters.push_back(psignal);
+               CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pnoise);
+               CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "",false,false); parameters.push_back(pallfiles);
+               CommandParameter porder("order", "String", "", "", "", "", "",false,false); parameters.push_back(porder);
+               CommandParameter pfasta("fasta", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfasta);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                
-               };
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
-               return myArray;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TrimFlowsCommand", "getValidParameters");
-               exit(1);
-       }
-}
-
-//**********************************************************************************************************************
-
-vector<string> TrimFlowsCommand::getRequiredParameters(){      
-       try {
-               string Array[] =  {"flow"};
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                return myArray;
        }
        catch(exception& e) {
-               m->errorOut(e, "TrimFlowsCommand", "getRequiredParameters");
+               m->errorOut(e, "TrimFlowsCommand", "setParameters");
                exit(1);
        }
 }
-
 //**********************************************************************************************************************
-
-vector<string> TrimFlowsCommand::getRequiredFiles(){   
+string TrimFlowsCommand::getHelpString(){      
        try {
-               vector<string> myArray;
-               return myArray;
+               string helpString = "";
+               helpString += "The trim.flows command reads a flowgram file and creates .....\n";
+               helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
+               helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.flows.\n";
+               return helpString;
        }
        catch(exception& e) {
-               m->errorOut(e, "TrimFlowsCommand", "getRequiredFiles");
+               m->errorOut(e, "TrimFlowsCommand", "getHelpString");
                exit(1);
        }
 }
@@ -66,6 +61,7 @@ vector<string> TrimFlowsCommand::getRequiredFiles(){
 TrimFlowsCommand::TrimFlowsCommand(){  
        try {
                abort = true; calledHelp = true; 
+               setParameters();
                vector<string> tempOutNames;
                outputTypes["flow"] = tempOutNames;
                outputTypes["fasta"] = tempOutNames;
@@ -75,26 +71,6 @@ TrimFlowsCommand::TrimFlowsCommand(){
                exit(1);
        }
 }
-
-//***************************************************************************************************************
-
-TrimFlowsCommand::~TrimFlowsCommand(){ /*      do nothing      */      }
-
-//***************************************************************************************************************
-
-void TrimFlowsCommand::help(){
-       try {
-               m->mothurOut("The trim.flows command reads a flowgram file and creates .....\n");
-               m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
-               m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.flows.\n\n");
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TrimFlowsCommand", "help");
-               exit(1);
-       }
-}
-
 //**********************************************************************************************************************
 
 TrimFlowsCommand::TrimFlowsCommand(string option)  {
@@ -105,20 +81,11 @@ TrimFlowsCommand::TrimFlowsCommand(string option)  {
                
                //allow user to run help
                if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
                
                else {
-                       //valid paramters for this command
-                       string AlignArray[] =  {"flow", "totalflows", "minflows",
-                               "fasta", "minlength", "maxlength", "maxhomop", "signal", "noise",
-                               "oligos", "pdiffs", "bdiffs", "tdiffs", 
-                               "allfiles", "processors",
-               
-                               //                      "group",
-                               "outputdir","inputdir"
-                               
-                       };
-                       
-                       vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+                                               
+                       vector<string> myArray = setParameters();
                        
                        OptionParser parser(option);
                        map<string,string> parameters = parser.getParameters();
@@ -157,21 +124,19 @@ TrimFlowsCommand::TrimFlowsCommand(string option)  {
                                        if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
                                }
                                
-                               
-//                             it = parameters.find("group");
-//                             //user has given a template file
-//                             if(it != parameters.end()){ 
-//                                     path = m->hasPath(it->second);
-//                                     //if the user has not given a path then, add inputdir. else leave path alone.
-//                                     if (path == "") {       parameters["group"] = inputDir + it->second;            }
-//                             }
                        }
                        
                        
                        //check for required parameters
                        flowFileName = validParameter.validFile(parameters, "flow", true);
-                       if (flowFileName == "not found") { m->mothurOut("flow is a required parameter for the trim.flows command."); m->mothurOutEndLine(); abort = true; }
-                       else if (flowFileName == "not open") { abort = true; }  
+                       if (flowFileName == "not found") { 
+                               flowFileName = m->getFlowFile(); 
+                               if (flowFileName != "") {  m->mothurOut("Using " + flowFileName + " as input file for the flow parameter."); m->mothurOutEndLine(); }
+                               else { 
+                                       m->mothurOut("No valid current flow file. You must provide a flow file."); m->mothurOutEndLine(); 
+                                       abort = true;
+                               } 
+                       }else if (flowFileName == "not open") { flowFileName = ""; abort = true; }      
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
@@ -184,11 +149,11 @@ TrimFlowsCommand::TrimFlowsCommand(string option)  {
                        // ...at some point should added some additional type checking...
                        
                        string temp;
-                       temp = validParameter.validFile(parameters, "minflows", false);         if (temp == "not found") { temp = "360"; }
+                       temp = validParameter.validFile(parameters, "minflows", false); if (temp == "not found") { temp = "450"; }
                        convert(temp, minFlows);  
 
-                       temp = validParameter.validFile(parameters, "totalflows", false);       if (temp == "not found") { temp = "720"; }
-                       convert(temp, totalFlows);  
+                       temp = validParameter.validFile(parameters, "maxflows", false); if (temp == "not found") { temp = "450"; }
+                       convert(temp, maxFlows);  
                        
                        
                        temp = validParameter.validFile(parameters, "oligos", true);
@@ -225,12 +190,19 @@ TrimFlowsCommand::TrimFlowsCommand(string option)  {
                        convert(temp, tdiffs);
                        if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
                        
-                       temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "T";          }
+                       temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found"){ temp = "T";           }
                        allFiles = m->isTrue(temp);
                        
-                       temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found") { temp = "1"; }
-                       convert(temp, processors); 
-                       
+                       temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
+                       m->setProcessors(temp);
+                       convert(temp, processors);
+       
+                       flowOrder = validParameter.validFile(parameters, "order", false);
+                       if (flowOrder == "not found"){ flowOrder = "TACG";              }
+                       else if(flowOrder.length() != 4){
+                               m->mothurOut("The value of the order option must be four bases long\n");
+                       }
+
                        if(oligoFileName == ""){        allFiles = 0;           }
 
                        numFPrimers = 0;
@@ -284,10 +256,12 @@ int TrimFlowsCommand::execute(){
                
                if (m->control_pressed) {  return 0; }                  
                
+               string flowFilesFileName;
+               ofstream output;
+               
                if(allFiles){
                        
-                       ofstream output;
-                       string flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.files";
+                       flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.files";
                        m->openOutputFile(flowFilesFileName, output);
 
                        for(int i=0;i<barcodePrimerComboFileNames.size();i++){
@@ -301,22 +275,39 @@ int TrimFlowsCommand::execute(){
                                        if (pFile==NULL) perror ("Error opening file");
                                        else{
                                                fseek (pFile, 0, SEEK_END);
-                                               size=ftell (pFile);
+                                               size=ftell(pFile);
                                                fclose (pFile);
                                        }
 
                                        if(size < 10){
-//                                             m->mothurOut("deleting: " + barcodePrimerComboFileNames[i][j] + '\n');
                                                remove(barcodePrimerComboFileNames[i][j].c_str());
                                        }
                                        else{
-//                                             m->mothurOut("saving: " + barcodePrimerComboFileNames[i][j] + '\n');
                                                output << barcodePrimerComboFileNames[i][j] << endl;
+                                               outputNames.push_back(barcodePrimerComboFileNames[i][j]);
+                                               outputTypes["flow"].push_back(barcodePrimerComboFileNames[i][j]);
                                        }
                                }
                        }
                        output.close();
                }
+               else{
+                       flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.files";
+                       m->openOutputFile(flowFilesFileName, output);
+                       
+                       output << trimFlowFileName << endl;
+                       
+                       output.close();
+               }
+               outputTypes["flow.files"].push_back(flowFilesFileName);
+               outputNames.push_back(flowFilesFileName);
+               
+//             set fasta file as new current fastafile
+//             string current = "";
+//             itTypes = outputTypes.find("fasta");
+//             if (itTypes != outputTypes.end()) {
+//                     if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
+//             }
                
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
@@ -346,20 +337,22 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                scrapFlowFile.setf(ios::fixed, ios::floatfield); scrapFlowFile.setf(ios::showpoint);
 
                if(line->start == 4){
-                       scrapFlowFile << totalFlows << endl;
-                       trimFlowFile << totalFlows << endl;
-                       for(int i=0;i<barcodePrimerComboFileNames.size();i++){
-                               for(int j=0;j<barcodePrimerComboFileNames[0].size();j++){
-                                       //                              barcodePrimerComboFileNames[i][j] += toString(getpid()) + ".temp";
-                                       ofstream temp;
-                                       m->openOutputFile(barcodePrimerComboFileNames[i][j], temp);
-                                       temp << totalFlows << endl;
-                                       temp.close();
-                               }
-                       }                       
+                       scrapFlowFile << maxFlows << endl;
+                       trimFlowFile << maxFlows << endl;
+                       if(allFiles){
+                               for(int i=0;i<barcodePrimerComboFileNames.size();i++){
+                                       for(int j=0;j<barcodePrimerComboFileNames[0].size();j++){
+                                               //                              barcodePrimerComboFileNames[i][j] += toString(getpid()) + ".temp";
+                                               ofstream temp;
+                                               m->openOutputFile(barcodePrimerComboFileNames[i][j], temp);
+                                               temp << maxFlows << endl;
+                                               temp.close();
+                                       }
+                               }                       
+                       }
                }
                
-               FlowData flowData(numFlows, signal, noise, maxHomoP);
+               FlowData flowData(numFlows, signal, noise, maxHomoP, flowOrder);
                
                ofstream fastaFile;
                if(fasta){      m->openOutputFile(fastaFileName, fastaFile);    }
@@ -371,7 +364,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
 
                int count = 0;
                bool moreSeqs = 1;
-               
+                       
                while(moreSeqs) {
                        
                        int success = 1;
@@ -379,7 +372,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                        string trashCode = "";
                        
                        flowData.getNext(flowFile);
-                       flowData.capFlows(totalFlows);  
+                       flowData.capFlows(maxFlows);    
                        
                        Sequence currSeq = flowData.getSequence();
                        if(!flowData.hasMinFlows(minFlows)){    //screen to see if sequence is of a minimum number of flows
@@ -395,7 +388,8 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                                }
                        }
                        
-                       int primerIndex, barcodeIndex;
+                       int primerIndex = 0;
+                       int barcodeIndex = 0;
                        
                        if(barcodes.size() != 0){
                                success = stripBarcode(currSeq, barcodeIndex);
@@ -415,25 +409,21 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                                success = stripReverse(currSeq);
                                if(!success)                            {       trashCode += 'r';       }
                        }
-                                               
-                               
+
                        if(trashCode.length() == 0){
+                                                       
                                flowData.printFlows(trimFlowFile);
+                       
+                               if(fasta)       {       currSeq.printSequence(fastaFile);       }
                                
-                               if(fasta){
-                                       currSeq.printSequence(fastaFile);
-                               }
-                               
-                               if(barcodes.size() != 0 || primers.size() != 0){
-//                                     string fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + barcodePrimerCombos[barcodeIndex][primerIndex] + ".flow";
+                               if(allFiles){
                                        ofstream output;
                                        m->openOutputFileAppend(barcodePrimerComboFileNames[barcodeIndex][primerIndex], output);
                                        output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
                                        
                                        flowData.printFlows(output);
                                        output.close();
-                               }
-                               
+                               }                               
                        }
                        else{
                                flowData.printFlows(scrapFlowFile, trashCode);
@@ -541,13 +531,27 @@ void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
                }
                oligosFile.close();
                
-               //add in potential combos       
+               if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
+               
+               //add in potential combos
+               if(barcodeNameVector.size() == 0){
+                       barcodes[""] = 0;
+                       barcodeNameVector.push_back("");                        
+               }
+               
+               if(primerNameVector.size() == 0){
+                       primers[""] = 0;
+                       primerNameVector.push_back("");                 
+               }
+               
+               
                outFlowFileNames.resize(barcodeNameVector.size());
                for(int i=0;i<outFlowFileNames.size();i++){
                        outFlowFileNames[i].assign(primerNameVector.size(), "");
                }
                
                if(allFiles){
+
                        for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
                                for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
 
@@ -562,12 +566,15 @@ void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
                                                fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + comboGroupName + ".flow";
                                        }
                                        else{
-                                               comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
+                                               if(barcodeName == ""){
+                                                       comboGroupName = primerNameVector[itPrimer->second];
+                                               }
+                                               else{
+                                                       comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
+                                               }
                                                fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + comboGroupName + ".flow";
                                        }
                                        
-                                       outputNames.push_back(fileName);
-                                       outputTypes["flow"].push_back(fileName);
                                        outFlowFileNames[itBar->second][itPrimer->second] = fileName;
                                        
                                        ofstream temp;
@@ -708,7 +715,7 @@ int TrimFlowsCommand::stripForward(Sequence& seq, int& group){
                
                string rawSequence = seq.getUnaligned();
                int success = pdiffs + 1;       //guilty until proven innocent
-               
+
                //can you find the primer
                for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
                        string oligo = it->first;
@@ -726,7 +733,7 @@ int TrimFlowsCommand::stripForward(Sequence& seq, int& group){
                }
                
                //if you found the barcode or if you don't want to allow for diffs
-               if ((pdiffs == 0) || (success == 0)) { return success;  }
+               if ((pdiffs == 0) || (success == 0)) {  return success;  }
                
                else { //try aligning and see if you can find it
                        
@@ -1016,16 +1023,17 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim
                        }else if (pid == 0){
                                
                                vector<vector<string> > tempBarcodePrimerComboFileNames = barcodePrimerComboFileNames;
-                               for(int i=0;i<tempBarcodePrimerComboFileNames.size();i++){
-                                       for(int j=0;j<tempBarcodePrimerComboFileNames[0].size();j++){
-                                               tempBarcodePrimerComboFileNames[i][j] += toString(getpid()) + ".temp";
-                                               ofstream temp;
-                                               m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp);
-                                               temp.close();
-                                               
+                               if(allFiles){
+                                       for(int i=0;i<tempBarcodePrimerComboFileNames.size();i++){
+                                               for(int j=0;j<tempBarcodePrimerComboFileNames[0].size();j++){
+                                                       tempBarcodePrimerComboFileNames[i][j] += toString(getpid()) + ".temp";
+                                                       ofstream temp;
+                                                       m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp);
+                                                       temp.close();
+                                                       
+                                               }
                                        }
                                }
-                                               
                                driverCreateTrim(flowFileName,
                                                                 (trimFlowFileName + toString(getpid()) + ".temp"),
                                                                 (scrapFlowFileName + toString(getpid()) + ".temp"),