]> git.donarmstrong.com Git - mothur.git/blobdiff - trimflowscommand.cpp
working on current change
[mothur.git] / trimflowscommand.cpp
index a2c3a88a82f7a0a22a0cc20583290fcf011090d5..ec043360ad1cd553d782d1d67ab531c9a4610778 100644 (file)
 #include "needlemanoverlap.hpp"
 
 //**********************************************************************************************************************
-
-vector<string> TrimFlowsCommand::getValidParameters(){ 
+vector<string> TrimFlowsCommand::setParameters(){      
        try {
-               string Array[] =  {"flow", "totalflows", "minflows",
-                       "fasta", "minlength", "maxlength", "maxhomop", "signal", "noise"
-                       "oligos", "pdiffs", "bdiffs", "tdiffs", 
-                       "allfiles",
-
-                       
-//                     "group",
-//                     "processors",
-                       "outputdir","inputdir"
+               CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pflow);
+               CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
+               CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "",false,false); parameters.push_back(pmaxhomop);
+               CommandParameter pmaxflows("maxflows", "Number", "", "720", "", "", "",false,false); parameters.push_back(pmaxflows);
+               CommandParameter pminflows("minflows", "Number", "", "360", "", "", "",false,false); parameters.push_back(pminflows);
+               CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
+               CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
+               CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
+               CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
+               CommandParameter psignal("signal", "Number", "", "0.50", "", "", "",false,false); parameters.push_back(psignal);
+               CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pnoise);
+               CommandParameter pallfiles("allfiles", "Boolean", "", "t", "", "", "",false,false); parameters.push_back(pallfiles);
+               CommandParameter porder("order", "String", "", "", "", "", "",false,false); parameters.push_back(porder);
+               CommandParameter pfasta("fasta", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfasta);
+               CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+               CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
                
-               };
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
-               return myArray;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TrimFlowsCommand", "getValidParameters");
-               exit(1);
-       }
-}
-
-//**********************************************************************************************************************
-
-vector<string> TrimFlowsCommand::getRequiredParameters(){      
-       try {
-               string Array[] =  {"flow"};
-               vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
+               vector<string> myArray;
+               for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
                return myArray;
        }
        catch(exception& e) {
-               m->errorOut(e, "TrimFlowsCommand", "getRequiredParameters");
+               m->errorOut(e, "TrimFlowsCommand", "setParameters");
                exit(1);
        }
 }
-
 //**********************************************************************************************************************
-
-vector<string> TrimFlowsCommand::getRequiredFiles(){   
+string TrimFlowsCommand::getHelpString(){      
        try {
-               vector<string> myArray;
-               return myArray;
+               string helpString = "";
+               helpString += "The trim.flows command reads a flowgram file and creates .....\n";
+               helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
+               helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.flows.\n";
+               return helpString;
        }
        catch(exception& e) {
-               m->errorOut(e, "TrimFlowsCommand", "getRequiredFiles");
+               m->errorOut(e, "TrimFlowsCommand", "getHelpString");
                exit(1);
        }
 }
@@ -65,8 +58,8 @@ vector<string> TrimFlowsCommand::getRequiredFiles(){
 
 TrimFlowsCommand::TrimFlowsCommand(){  
        try {
-               abort = true;
-               //initialize outputTypes
+               abort = true; calledHelp = true; 
+               setParameters();
                vector<string> tempOutNames;
                outputTypes["flow"] = tempOutNames;
                outputTypes["fasta"] = tempOutNames;
@@ -76,51 +69,21 @@ TrimFlowsCommand::TrimFlowsCommand(){
                exit(1);
        }
 }
-
-//***************************************************************************************************************
-
-TrimFlowsCommand::~TrimFlowsCommand(){ /*      do nothing      */      }
-
-//***************************************************************************************************************
-
-void TrimFlowsCommand::help(){
-       try {
-               m->mothurOut("The trim.flows command reads a flowgram file and creates .....\n");
-               m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n");
-               m->mothurOut("For more details please check out the wiki http://www.mothur.org/wiki/Trim.flows.\n\n");
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TrimFlowsCommand", "help");
-               exit(1);
-       }
-}
-
 //**********************************************************************************************************************
 
 TrimFlowsCommand::TrimFlowsCommand(string option)  {
        try {
                
-               abort = false;
+               abort = false; calledHelp = false;   
                comboStarts = 0;
                
                //allow user to run help
-               if(option == "help") { help(); abort = true; }
+               if(option == "help") { help(); abort = true; calledHelp = true; }
+               else if(option == "citation") { citation(); abort = true; calledHelp = true;}
                
                else {
-                       //valid paramters for this command
-                       string AlignArray[] =  {"flow", "totalflows", "minflows",
-                               "fasta", "minlength", "maxlength", "maxhomop", "signal", "noise",
-                               "oligos", "pdiffs", "bdiffs", "tdiffs", 
-                               "allfiles",
-               
-                               //                      "group",
-                               //                      "processors",
-                               "outputdir","inputdir"
-                               
-                       };
-                       
-                       vector<string> myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string)));
+                                               
+                       vector<string> myArray = setParameters();
                        
                        OptionParser parser(option);
                        map<string,string> parameters = parser.getParameters();
@@ -136,8 +99,7 @@ TrimFlowsCommand::TrimFlowsCommand(string option)  {
                        //initialize outputTypes
                        vector<string> tempOutNames;
                        outputTypes["flow"] = tempOutNames;
-                       //              outputTypes["fasta"] = tempOutNames;
-                       //              outputTypes["group"] = tempOutNames;
+                       outputTypes["fasta"] = tempOutNames;
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -152,29 +114,27 @@ TrimFlowsCommand::TrimFlowsCommand(string option)  {
                                        if (path == "") {       parameters["flow"] = inputDir + it->second;             }
                                }
                                
-//                             it = parameters.find("oligos");
-//                             //user has given a template file
-//                             if(it != parameters.end()){ 
-//                                     path = m->hasPath(it->second);
-//                                     //if the user has not given a path then, add inputdir. else leave path alone.
-//                                     if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
-//                             }
-                               
+                               it = parameters.find("oligos");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
+                               }
                                
-//                             it = parameters.find("group");
-//                             //user has given a template file
-//                             if(it != parameters.end()){ 
-//                                     path = m->hasPath(it->second);
-//                                     //if the user has not given a path then, add inputdir. else leave path alone.
-//                                     if (path == "") {       parameters["group"] = inputDir + it->second;            }
-//                             }
                        }
                        
                        
                        //check for required parameters
                        flowFileName = validParameter.validFile(parameters, "flow", true);
-                       if (flowFileName == "not found") { m->mothurOut("flow is a required parameter for the trim.flows command."); m->mothurOutEndLine(); abort = true; }
-                       else if (flowFileName == "not open") { abort = true; }  
+                       if (flowFileName == "not found") { 
+                               flowFileName = m->getFlowFile(); 
+                               if (flowFileName != "") {  m->mothurOut("Using " + flowFileName + " as input file for the flow parameter."); m->mothurOutEndLine(); }
+                               else { 
+                                       m->mothurOut("No valid current flow file. You must provide a flow file."); m->mothurOutEndLine(); 
+                                       abort = true;
+                               } 
+                       }else if (flowFileName == "not open") { flowFileName = ""; abort = true; }      
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
@@ -187,17 +147,17 @@ TrimFlowsCommand::TrimFlowsCommand(string option)  {
                        // ...at some point should added some additional type checking...
                        
                        string temp;
-                       temp = validParameter.validFile(parameters, "minflows", false);         if (temp == "not found") { temp = "360"; }
+                       temp = validParameter.validFile(parameters, "minflows", false); if (temp == "not found") { temp = "450"; }
                        convert(temp, minFlows);  
 
-                       temp = validParameter.validFile(parameters, "totalflows", false);       if (temp == "not found") { temp = "720"; }
-                       convert(temp, totalFlows);  
+                       temp = validParameter.validFile(parameters, "maxflows", false); if (temp == "not found") { temp = "450"; }
+                       convert(temp, maxFlows);  
                        
                        
                        temp = validParameter.validFile(parameters, "oligos", true);
                        if (temp == "not found")        {       oligoFileName = "";             }
                        else if(temp == "not open")     {       abort = true;                   } 
-                       else                                            {       oligoFileName = temp;   }
+                       else                                            {       oligoFileName = temp;   m->setOligosFile(oligoFileName); }
                        
                        temp = validParameter.validFile(parameters, "fasta", false);            if (temp == "not found"){       fasta = 0;              }
                        else if(m->isTrue(temp))        {       fasta = 1;      }
@@ -210,34 +170,33 @@ TrimFlowsCommand::TrimFlowsCommand(string option)  {
 
                        temp = validParameter.validFile(parameters, "noise", false);            if (temp == "not found"){       temp = "0.70";  }
                        convert(temp, noise);  
-
-                       temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found"){       temp = "0";             }
-                       convert(temp, minLength); 
-                       
-                       temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found"){       temp = "0";             }
-                       convert(temp, maxLength);
-                       
+       
                        temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found"){       temp = "0";             }
                        convert(temp, bdiffs);
                        
                        temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found"){       temp = "0";             }
                        convert(temp, pdiffs);
                        
-                       temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found"){ int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
+                       temp = validParameter.validFile(parameters, "tdiffs", false);
+                       if (temp == "not found"){ int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
                        convert(temp, tdiffs);
                        if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
                        
-                       temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found") { temp = "T";          }
-                       allFiles = m->isTrue(temp);
-                       
-//                     temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found") { temp = "1"; }
-//                     convert(temp, processors); 
-                       
-                       if(allFiles && oligoFileName == ""){
-                               m->mothurOut("You selected allfiles, but didn't enter an oligos file.  Ignoring the allfiles request."); m->mothurOutEndLine();
-                               allFiles = 0;
+                       temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
+                       m->setProcessors(temp);
+                       convert(temp, processors);
+       
+                       flowOrder = validParameter.validFile(parameters, "order", false);
+                       if (flowOrder == "not found"){ flowOrder = "TACG";              }
+                       else if(flowOrder.length() != 4){
+                               m->mothurOut("The value of the order option must be four bases long\n");
                        }
 
+                       if(oligoFileName == "") {       allFiles = 0;           }
+                       else                                    {       allFiles = 1;           }
+
+                       numFPrimers = 0;
+                       numRPrimers = 0;
                }
                
        }
@@ -252,7 +211,7 @@ TrimFlowsCommand::TrimFlowsCommand(string option)  {
 int TrimFlowsCommand::execute(){
        try{
                
-               if (abort == true) { return 0; }
+               if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
 
                string trimFlowFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "trim.flow";
                outputNames.push_back(trimFlowFileName); outputTypes["flow"].push_back(trimFlowFileName);
@@ -265,9 +224,87 @@ int TrimFlowsCommand::execute(){
                        outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName);
                }
                
-               driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName);
+               vector<unsigned long int> flowFilePos = getFlowFileBreaks();
+               for (int i = 0; i < (flowFilePos.size()-1); i++) {
+                       lines.push_back(new linePair(flowFilePos[i], flowFilePos[(i+1)]));
+               }       
+
+               vector<vector<string> > barcodePrimerComboFileNames;
+               if(oligoFileName != ""){
+                       getOligos(barcodePrimerComboFileNames); 
+               }
+               
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               if(processors == 1){
+                       driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames, lines[0]);
+               }else{
+                       createProcessesCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames); 
+               }       
+#else
+               driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames, lines[0]);
+#endif
+               
+               if (m->control_pressed) {  return 0; }                  
+               
+               string flowFilesFileName;
+               ofstream output;
+               
+               if(allFiles){
+                       
+                       flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.files";
+                       m->openOutputFile(flowFilesFileName, output);
+
+                       for(int i=0;i<barcodePrimerComboFileNames.size();i++){
+                               for(int j=0;j<barcodePrimerComboFileNames[0].size();j++){
+                                       
+                                       FILE * pFile;
+                                       unsigned long int size;
+                                       
+                                       //get num bytes in file
+                                       pFile = fopen (barcodePrimerComboFileNames[i][j].c_str(),"rb");
+                                       if (pFile==NULL) perror ("Error opening file");
+                                       else{
+                                               fseek (pFile, 0, SEEK_END);
+                                               size=ftell(pFile);
+                                               fclose (pFile);
+                                       }
+
+                                       if(size < 10){
+                                               remove(barcodePrimerComboFileNames[i][j].c_str());
+                                       }
+                                       else{
+                                               output << barcodePrimerComboFileNames[i][j] << endl;
+                                               outputNames.push_back(barcodePrimerComboFileNames[i][j]);
+                                               outputTypes["flow"].push_back(barcodePrimerComboFileNames[i][j]);
+                                       }
+                               }
+                       }
+                       output.close();
+               }
+               else{
+                       flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.files";
+                       m->openOutputFile(flowFilesFileName, output);
+                       
+                       output << trimFlowFileName << endl;
+                       
+                       output.close();
+               }
+               outputTypes["flow.files"].push_back(flowFilesFileName);
+               outputNames.push_back(flowFilesFileName);
+               
+//             set fasta file as new current fastafile
+//             string current = "";
+//             itTypes = outputTypes.find("fasta");
+//             if (itTypes != outputTypes.end()) {
+//                     if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
+//             }
                
-               return 0;                       
+               m->mothurOutEndLine();
+               m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+               for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
+               m->mothurOutEndLine();
+               
+               return 0;       
        }
        catch(exception& e) {
                m->errorOut(e, "TrimSeqsCommand", "execute");
@@ -277,7 +314,7 @@ int TrimFlowsCommand::execute(){
 
 //***************************************************************************************************************
 
-int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName){
+int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName, vector<vector<string> > barcodePrimerComboFileNames, linePair* line){
        
        try {
                
@@ -288,48 +325,54 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                ofstream scrapFlowFile;
                m->openOutputFile(scrapFlowFileName, scrapFlowFile);
                scrapFlowFile.setf(ios::fixed, ios::floatfield); scrapFlowFile.setf(ios::showpoint);
-       
-               ifstream flowFile;
-               m->openInputFile(flowFileName, flowFile);
+
+               if(line->start == 4){
+                       scrapFlowFile << maxFlows << endl;
+                       trimFlowFile << maxFlows << endl;
+                       if(allFiles){
+                               for(int i=0;i<barcodePrimerComboFileNames.size();i++){
+                                       for(int j=0;j<barcodePrimerComboFileNames[0].size();j++){
+                                               //                              barcodePrimerComboFileNames[i][j] += toString(getpid()) + ".temp";
+                                               ofstream temp;
+                                               m->openOutputFile(barcodePrimerComboFileNames[i][j], temp);
+                                               temp << maxFlows << endl;
+                                               temp.close();
+                                       }
+                               }                       
+                       }
+               }
+               
+               FlowData flowData(numFlows, signal, noise, maxHomoP, flowOrder);
                
                ofstream fastaFile;
                if(fasta){      m->openOutputFile(fastaFileName, fastaFile);    }
                
-               vector<vector<string> > barcodePrimerCombos;    
-               if(oligoFileName != ""){        getOligos(barcodePrimerCombos); }
-               
-//             inFASTA.seekg(line->start);
+               ifstream flowFile;
+               m->openInputFile(flowFileName, flowFile);
                
-//             bool done = false;
+               flowFile.seekg(line->start);
+
                int count = 0;
-               
-               while (flowFile) {
+               bool moreSeqs = 1;
+                       
+               while(moreSeqs) {
                        
                        int success = 1;
                        int currentSeqDiffs = 0;
-                       
                        string trashCode = "";
-
-                       FlowData currFlow(flowFile, signal, noise, maxHomoP);
-                       m->gobble(flowFile);
-                       currFlow.capFlows(totalFlows);  
-                       Sequence currSeq = currFlow.getSequence();
                        
+                       flowData.getNext(flowFile);
+                       flowData.capFlows(maxFlows);    
                        
-                       if(!currFlow.hasMinFlows(minFlows)){            //screen to see if sequence is of a minimum number of flows
+                       Sequence currSeq = flowData.getSequence();
+                       if(!flowData.hasMinFlows(minFlows)){    //screen to see if sequence is of a minimum number of flows
                                success = 0;
                                trashCode += 'l';
                        }
                        
-                       if(minLength > 0 || maxLength > 0){                     //screen to see if sequence is above and below a specific number of bases
-                               int seqLength = currFlow.getSeqLength();
-                               if(seqLength < minLength || seqLength > maxLength){
-                                       success = 0;
-                                       trashCode += 'l';
-                               }
-                       }
+                       int primerIndex = 0;
+                       int barcodeIndex = 0;
                        
-                       int primerIndex, barcodeIndex;
                        if(barcodes.size() != 0){
                                success = stripBarcode(currSeq, barcodeIndex);
                                if(success > bdiffs)            {       trashCode += 'b';       }
@@ -348,76 +391,47 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                                success = stripReverse(currSeq);
                                if(!success)                            {       trashCode += 'r';       }
                        }
-                                               
-                               
+
                        if(trashCode.length() == 0){
-                               currFlow.printFlows(trimFlowFile);
-                               
-                               if(fasta){
-                                       currFlow.printFASTA(fastaFile);
-                               }
+                                                       
+                               flowData.printFlows(trimFlowFile);
+                       
+                               if(fasta)       {       currSeq.printSequence(fastaFile);       }
                                
-                               if(barcodes.size() != 0 || primers.size() != 0){
-                                       string fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + barcodePrimerCombos[barcodeIndex][primerIndex] + ".flow";
+                               if(allFiles){
                                        ofstream output;
-                                       m->openOutputFileAppend(fileName, output);
-                                       currFlow.printFlows(output);
+                                       m->openOutputFileAppend(barcodePrimerComboFileNames[barcodeIndex][primerIndex], output);
+                                       output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
+                                       
+                                       flowData.printFlows(output);
                                        output.close();
-                               }
-                               
+                               }                               
                        }
                        else{
-                               currFlow.printFlows(scrapFlowFile, trashCode);
+                               flowData.printFlows(scrapFlowFile, trashCode);
                        }
                                
-//                             if(barcodes.size() != 0){
-//                                     string thisGroup = groupVector[groupBar];
-//                                     int indexToFastaFile = groupBar;
-//                                     if (primers.size() != 0){
-//                                             //does this primer have a group
-//                                             if (groupVector[groupPrime] != "") {  
-//                                                     thisGroup += "." + groupVector[groupPrime]; 
-//                                                     indexToFastaFile = combos[thisGroup];
-//                                             }
-//                                     }
-//                                     outGroups << currSeq.getName() << '\t' << thisGroup << endl;
-//                                     if(allFiles){
-//                                             ofstream outTemp;
-//                                             m->openOutputFileAppend(fastaNames[indexToFastaFile], outTemp);
-//                                             //currSeq.printSequence(*fastaFileNames[indexToFastaFile]);
-//                                             currSeq.printSequence(outTemp);
-//                                             outTemp.close();
-//                                             
-//                                             if(qFileName != ""){
-//                                                     //currQual.printQScores(*qualFileNames[indexToFastaFile]);
-//                                                     ofstream outTemp2;
-//                                                     m->openOutputFileAppend(qualNames[indexToFastaFile], outTemp2);
-//                                                     currQual.printQScores(outTemp2);
-//                                                     outTemp2.close();                                                       
-//                                             }
-//                                     }
-//                             }
-//                     }
-//                     else{
-//                             currSeq.setName(currSeq.getName() + '|' + trashCode);
-//                             currSeq.setUnaligned(origSeq);
-//                             currSeq.setAligned(origSeq);
-//                             currSeq.printSequence(scrapFASTA);
-//                     }
-
                        count++;
                                                
                        //report progress
-                       if((count) % 1000 == 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
+                       if((count) % 10000 == 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+                       unsigned long int pos = flowFile.tellg();
+
+                       if ((pos == -1) || (pos >= line->end)) { break; }
+#else
+                       if (flowFile.eof()) { break; }
+#endif
                        
                }
                //report progress
-               if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
-               
+               if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
                
                trimFlowFile.close();
                scrapFlowFile.close();
                flowFile.close();
+               if(fasta){      fastaFile.close();      }
                
                return count;
        }
@@ -499,13 +513,27 @@ void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
                }
                oligosFile.close();
                
-               //add in potential combos       
+               if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
+               
+               //add in potential combos
+               if(barcodeNameVector.size() == 0){
+                       barcodes[""] = 0;
+                       barcodeNameVector.push_back("");                        
+               }
+               
+               if(primerNameVector.size() == 0){
+                       primers[""] = 0;
+                       primerNameVector.push_back("");                 
+               }
+               
+               
                outFlowFileNames.resize(barcodeNameVector.size());
                for(int i=0;i<outFlowFileNames.size();i++){
                        outFlowFileNames[i].assign(primerNameVector.size(), "");
                }
                
                if(allFiles){
+
                        for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
                                for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
 
@@ -520,13 +548,16 @@ void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
                                                fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + comboGroupName + ".flow";
                                        }
                                        else{
-                                               comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
+                                               if(barcodeName == ""){
+                                                       comboGroupName = primerNameVector[itPrimer->second];
+                                               }
+                                               else{
+                                                       comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
+                                               }
                                                fileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + comboGroupName + ".flow";
                                        }
                                        
-                                       outputNames.push_back(fileName);
-                                       outputTypes["flow"].push_back(fileName);
-                                       outFlowFileNames[itBar->second][itPrimer->second] = comboGroupName;
+                                       outFlowFileNames[itBar->second][itPrimer->second] = fileName;
                                        
                                        ofstream temp;
                                        m->openOutputFile(fileName, temp);
@@ -571,11 +602,9 @@ int TrimFlowsCommand::stripBarcode(Sequence& seq, int& group){
                }
                
                //if you found the barcode or if you don't want to allow for diffs
-               //              cout << success;
                if ((bdiffs == 0) || (success == 0)) { return success;  }
                
                else { //try aligning and see if you can find it
-                       //                      cout << endl;
                        
                        int maxLength = 0;
                        
@@ -583,7 +612,7 @@ int TrimFlowsCommand::stripBarcode(Sequence& seq, int& group){
                        if (barcodes.size() > 0) {
                                map<string,int>::iterator it=barcodes.begin();
                                
-                               for(it;it!=barcodes.end();it++){
+                               for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
                                        if(it->first.length() > maxLength){
                                                maxLength = it->first.length();
                                        }
@@ -620,7 +649,6 @@ int TrimFlowsCommand::stripBarcode(Sequence& seq, int& group){
                                oligo = oligo.substr(0,alnLength);
                                temp = temp.substr(0,alnLength);
                                
-                               int newStart=0;
                                int numDiff = countDiffs(oligo, temp);
                                
                                if(numDiff < minDiff){
@@ -669,7 +697,7 @@ int TrimFlowsCommand::stripForward(Sequence& seq, int& group){
                
                string rawSequence = seq.getUnaligned();
                int success = pdiffs + 1;       //guilty until proven innocent
-               
+
                //can you find the primer
                for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
                        string oligo = it->first;
@@ -687,11 +715,9 @@ int TrimFlowsCommand::stripForward(Sequence& seq, int& group){
                }
                
                //if you found the barcode or if you don't want to allow for diffs
-               //              cout << success;
-               if ((pdiffs == 0) || (success == 0)) { return success;  }
+               if ((pdiffs == 0) || (success == 0)) {  return success;  }
                
                else { //try aligning and see if you can find it
-                       //                      cout << endl;
                        
                        int maxLength = 0;
                        
@@ -736,11 +762,8 @@ int TrimFlowsCommand::stripForward(Sequence& seq, int& group){
                                oligo = oligo.substr(0,alnLength);
                                temp = temp.substr(0,alnLength);
                                
-                               int newStart=0;
                                int numDiff = countDiffs(oligo, temp);
                                
-                               //                              cout << oligo << '\t' << temp << '\t' << numDiff << endl;                               
-                               
                                if(numDiff < minDiff){
                                        minDiff = numDiff;
                                        minCount = 1;
@@ -887,17 +910,183 @@ int TrimFlowsCommand::countDiffs(string oligo, string seq){
        
 }
 
-//***************************************************************************************************************
-
-
-
+/**************************************************************************************************/
 
+vector<unsigned long int> TrimFlowsCommand::getFlowFileBreaks() {
 
+       try{
+                       
+               vector<unsigned long int> filePos;
+               filePos.push_back(0);
+                                       
+               FILE * pFile;
+               unsigned long int size;
+               
+               //get num bytes in file
+               pFile = fopen (flowFileName.c_str(),"rb");
+               if (pFile==NULL) perror ("Error opening file");
+               else{
+                       fseek (pFile, 0, SEEK_END);
+                       size=ftell (pFile);
+                       fclose (pFile);
+               }
+                               
+               //estimate file breaks
+               unsigned long int chunkSize = 0;
+               chunkSize = size / processors;
 
+               //file too small to divide by processors
+               if (chunkSize == 0)  {  processors = 1; filePos.push_back(size); return filePos;        }
+               
+               //for each process seekg to closest file break and search for next '>' char. make that the filebreak
+               for (int i = 0; i < processors; i++) {
+                       unsigned long int spot = (i+1) * chunkSize;
+                       
+                       ifstream in;
+                       m->openInputFile(flowFileName, in);
+                       in.seekg(spot);
+                       
+                       string dummy = m->getline(in);
+                       
+                       //there was not another sequence before the end of the file
+                       unsigned long int sanityPos = in.tellg();
+                       
+//                     if (sanityPos == -1) {  break;  }
+//                     else {  filePos.push_back(newSpot);  }
+                       if (sanityPos == -1) {  break;  }
+                       else {  filePos.push_back(sanityPos);  }
+                       
+                       in.close();
+               }
+               
+               //save end pos
+               filePos.push_back(size);
+               
+               //sanity check filePos
+               for (int i = 0; i < (filePos.size()-1); i++) {
+                       if (filePos[(i+1)] <= filePos[i]) {  filePos.erase(filePos.begin()+(i+1)); i--; }
+               }
 
+               ifstream in;
+               m->openInputFile(flowFileName, in);
+               in >> numFlows;
+               m->gobble(in);
+               unsigned long int spot = in.tellg();
+               filePos[0] = spot;
+               in.close();
+               
+               processors = (filePos.size() - 1);
+               
+               return filePos; 
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimSeqsCommand", "getFlowFileBreaks");
+               exit(1);
+       }
+}
 
+/**************************************************************************************************/
 
+int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName, vector<vector<string> > barcodePrimerComboFileNames){
 
+       try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+               int process = 1;
+               int exitCommand = 1;
+               processIDS.clear();
+               
+               //loop through and create all the processes you want
+               while (process != processors) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               
+                               vector<vector<string> > tempBarcodePrimerComboFileNames = barcodePrimerComboFileNames;
+                               if(allFiles){
+                                       for(int i=0;i<tempBarcodePrimerComboFileNames.size();i++){
+                                               for(int j=0;j<tempBarcodePrimerComboFileNames[0].size();j++){
+                                                       tempBarcodePrimerComboFileNames[i][j] += toString(getpid()) + ".temp";
+                                                       ofstream temp;
+                                                       m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp);
+                                                       temp.close();
+                                                       
+                                               }
+                                       }
+                               }
+                               driverCreateTrim(flowFileName,
+                                                                (trimFlowFileName + toString(getpid()) + ".temp"),
+                                                                (scrapFlowFileName + toString(getpid()) + ".temp"),
+                                                                (fastaFileName + toString(getpid()) + ".temp"),
+                                                                tempBarcodePrimerComboFileNames, lines[process]);
+
+                               exit(0);
+                       }else { 
+                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                               exit(0);
+                       }
+               }
+               
+               //parent do my part
+               ofstream temp;
+               m->openOutputFile(trimFlowFileName, temp);
+               temp.close();
 
+               m->openOutputFile(scrapFlowFileName, temp);
+               temp.close();
+               
+               if(fasta){
+                       m->openOutputFile(fastaFileName, temp);
+                       temp.close();
+               }
+               
+               driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames, lines[0]);
 
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processIDS.size();i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+               
+               //append files
+               m->mothurOutEndLine();
+               for(int i=0;i<processIDS.size();i++){
+                       
+                       m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
+                       
+                       m->appendFiles((trimFlowFileName + toString(processIDS[i]) + ".temp"), trimFlowFileName);
+                       remove((trimFlowFileName + toString(processIDS[i]) + ".temp").c_str());
+//                     m->mothurOut("\tDone with trim.flow file"); m->mothurOutEndLine();
+
+                       m->appendFiles((scrapFlowFileName + toString(processIDS[i]) + ".temp"), scrapFlowFileName);
+                       remove((scrapFlowFileName + toString(processIDS[i]) + ".temp").c_str());
+//                     m->mothurOut("\tDone with scrap.flow file"); m->mothurOutEndLine();
+
+                       if(fasta){
+                               m->appendFiles((fastaFileName + toString(processIDS[i]) + ".temp"), fastaFileName);
+                               remove((fastaFileName + toString(processIDS[i]) + ".temp").c_str());
+//                             m->mothurOut("\tDone with flow.fasta file"); m->mothurOutEndLine();
+                       }
+                       if(allFiles){                                           
+                               for (int j = 0; j < barcodePrimerComboFileNames.size(); j++) {
+                                       for (int k = 0; k < barcodePrimerComboFileNames[0].size(); k++) {
+                                               m->appendFiles((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp"), barcodePrimerComboFileNames[j][k]);
+                                               remove((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
+                                       }
+                               }
+                       }
+               }
+               
+               return exitCommand;
+#endif         
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimFlowsCommand", "createProcessesCreateTrim");
+               exit(1);
+       }
+}
 
+//***************************************************************************************************************