]> git.donarmstrong.com Git - mothur.git/blobdiff - trimflowscommand.cpp
added unix to ifdefs. minor changes while testing 1.24.0.
[mothur.git] / trimflowscommand.cpp
index 6ee865516bee6851fadf28f81e70bb249c35562d..00c4d94e1027c63d8a0c324167bb3d7ec8e4c570 100644 (file)
 #include "trimflowscommand.h"
 #include "needlemanoverlap.hpp"
 
+
 //**********************************************************************************************************************
 vector<string> TrimFlowsCommand::setParameters(){      
        try {
                CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pflow);
                CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(poligos);
                CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "",false,false); parameters.push_back(pmaxhomop);
-               CommandParameter pmaxflows("maxflows", "Number", "", "720", "", "", "",false,false); parameters.push_back(pmaxflows);
-               CommandParameter pminflows("minflows", "Number", "", "360", "", "", "",false,false); parameters.push_back(pminflows);
-               CommandParameter pminlength("minlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pminlength);
-               CommandParameter pmaxlength("maxlength", "Number", "", "0", "", "", "",false,false); parameters.push_back(pmaxlength);
+               CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "",false,false); parameters.push_back(pmaxflows);
+               CommandParameter pminflows("minflows", "Number", "", "450", "", "", "",false,false); parameters.push_back(pminflows);
                CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppdiffs);
                CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pbdiffs);
-               CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
+        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pldiffs);
+               CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(psdiffs);
+        CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(ptdiffs);
                CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
                CommandParameter psignal("signal", "Number", "", "0.50", "", "", "",false,false); parameters.push_back(psignal);
                CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "",false,false); parameters.push_back(pnoise);
@@ -150,52 +151,50 @@ TrimFlowsCommand::TrimFlowsCommand(string option)  {
                        
                        string temp;
                        temp = validParameter.validFile(parameters, "minflows", false); if (temp == "not found") { temp = "450"; }
-                       convert(temp, minFlows);  
+                       m->mothurConvert(temp, minFlows);  
 
                        temp = validParameter.validFile(parameters, "maxflows", false); if (temp == "not found") { temp = "450"; }
-                       convert(temp, maxFlows);  
+                       m->mothurConvert(temp, maxFlows);  
                        
                        
                        temp = validParameter.validFile(parameters, "oligos", true);
                        if (temp == "not found")        {       oligoFileName = "";             }
                        else if(temp == "not open")     {       abort = true;                   } 
-                       else                                            {       oligoFileName = temp;   }
+                       else                                            {       oligoFileName = temp;   m->setOligosFile(oligoFileName); }
                        
                        temp = validParameter.validFile(parameters, "fasta", false);            if (temp == "not found"){       fasta = 0;              }
                        else if(m->isTrue(temp))        {       fasta = 1;      }
                        
                        temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found"){       temp = "9";             }
-                       convert(temp, maxHomoP);  
+                       m->mothurConvert(temp, maxHomoP);  
 
                        temp = validParameter.validFile(parameters, "signal", false);           if (temp == "not found"){       temp = "0.50";  }
-                       convert(temp, signal);  
+                       m->mothurConvert(temp, signal);  
 
                        temp = validParameter.validFile(parameters, "noise", false);            if (temp == "not found"){       temp = "0.70";  }
-                       convert(temp, noise);  
-
-                       temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found"){       temp = "0";             }
-                       convert(temp, minLength); 
-                       
-                       temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found"){       temp = "0";             }
-                       convert(temp, maxLength);
-                       
+                       m->mothurConvert(temp, noise);  
+       
                        temp = validParameter.validFile(parameters, "bdiffs", false);           if (temp == "not found"){       temp = "0";             }
-                       convert(temp, bdiffs);
+                       m->mothurConvert(temp, bdiffs);
                        
                        temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found"){       temp = "0";             }
-                       convert(temp, pdiffs);
+                       m->mothurConvert(temp, pdiffs);
+                       
+            temp = validParameter.validFile(parameters, "ldiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, ldiffs);
+            
+            temp = validParameter.validFile(parameters, "sdiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, sdiffs);
                        
-                       temp = validParameter.validFile(parameters, "tdiffs", false);
-                       if (temp == "not found"){ int tempTotal = pdiffs + bdiffs;  temp = toString(tempTotal); }
-                       convert(temp, tdiffs);
-                       if(tdiffs == 0){        tdiffs = bdiffs + pdiffs;       }
+                       temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
+                       m->mothurConvert(temp, tdiffs);
                        
-                       temp = validParameter.validFile(parameters, "allfiles", false);         if (temp == "not found"){ temp = "T";           }
-                       allFiles = m->isTrue(temp);
+                       if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
+
                        
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
-                       convert(temp, processors);
+                       m->mothurConvert(temp, processors);
        
                        flowOrder = validParameter.validFile(parameters, "order", false);
                        if (flowOrder == "not found"){ flowOrder = "TACG";              }
@@ -203,7 +202,8 @@ TrimFlowsCommand::TrimFlowsCommand(string option)  {
                                m->mothurOut("The value of the order option must be four bases long\n");
                        }
 
-                       if(oligoFileName == ""){        allFiles = 0;           }
+                       if(oligoFileName == "") {       allFiles = 0;           }
+                       else                                    {       allFiles = 1;           }
 
                        numFPrimers = 0;
                        numRPrimers = 0;
@@ -211,7 +211,7 @@ TrimFlowsCommand::TrimFlowsCommand(string option)  {
                
        }
        catch(exception& e) {
-               m->errorOut(e, "TrimFlowsCommand", "TrimSeqsCommand");
+               m->errorOut(e, "TrimFlowsCommand", "TrimFlowsCommand");
                exit(1);
        }
 }
@@ -234,25 +234,46 @@ int TrimFlowsCommand::execute(){
                        outputNames.push_back(fastaFileName); outputTypes["fasta"].push_back(fastaFileName);
                }
                
-               vector<unsigned long int> flowFilePos = getFlowFileBreaks();
+               vector<unsigned long long> flowFilePos;
+       #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+               flowFilePos = getFlowFileBreaks();
                for (int i = 0; i < (flowFilePos.size()-1); i++) {
                        lines.push_back(new linePair(flowFilePos[i], flowFilePos[(i+1)]));
                }       
-
+       #else
+               ifstream in; m->openInputFile(flowFileName, in); in >> numFlows; in.close();
+       ///////////////////////////////////////// until I fix multiple processors for windows //////////////////        
+               processors = 1;
+       ///////////////////////////////////////// until I fix multiple processors for windows //////////////////                
+               if (processors == 1) {
+                       lines.push_back(new linePair(0, 1000));
+               }else {
+                       int numFlowLines;
+                       flowFilePos = m->setFilePosEachLine(flowFileName, numFlowLines);
+                       flowFilePos.erase(flowFilePos.begin() + 1); numFlowLines--;
+                       
+                       //figure out how many sequences you have to process
+                       int numSeqsPerProcessor = numFlowLines / processors;
+                       cout << numSeqsPerProcessor << '\t' << numFlowLines << endl;
+                       for (int i = 0; i < processors; i++) {
+                               int startIndex =  i * numSeqsPerProcessor;
+                               if(i == (processors - 1)){      numSeqsPerProcessor = numFlowLines - i * numSeqsPerProcessor;   }
+                               lines.push_back(new linePair(flowFilePos[startIndex], numSeqsPerProcessor));
+                               cout << flowFilePos[startIndex] << '\t' << numSeqsPerProcessor << endl;
+                       }
+               }
+       #endif
+               
                vector<vector<string> > barcodePrimerComboFileNames;
                if(oligoFileName != ""){
                        getOligos(barcodePrimerComboFileNames); 
                }
                
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                if(processors == 1){
                        driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames, lines[0]);
                }else{
                        createProcessesCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames); 
                }       
-#else
-               driverCreateTrim(flowFileName, trimFlowFileName, scrapFlowFileName, fastaFileName, barcodePrimerComboFileNames, lines[0]);
-#endif
                
                if (m->control_pressed) {  return 0; }                  
                
@@ -260,32 +281,34 @@ int TrimFlowsCommand::execute(){
                ofstream output;
                
                if(allFiles){
-                       
+                       set<string> namesAlreadyProcessed;
                        flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.files";
                        m->openOutputFile(flowFilesFileName, output);
 
                        for(int i=0;i<barcodePrimerComboFileNames.size();i++){
                                for(int j=0;j<barcodePrimerComboFileNames[0].size();j++){
-                                       
-                                       FILE * pFile;
-                                       unsigned long int size;
-                                       
-                                       //get num bytes in file
-                                       pFile = fopen (barcodePrimerComboFileNames[i][j].c_str(),"rb");
-                                       if (pFile==NULL) perror ("Error opening file");
-                                       else{
-                                               fseek (pFile, 0, SEEK_END);
-                                               size=ftell(pFile);
-                                               fclose (pFile);
-                                       }
-
-                                       if(size < 10){
-                                               remove(barcodePrimerComboFileNames[i][j].c_str());
-                                       }
-                                       else{
-                                               output << barcodePrimerComboFileNames[i][j] << endl;
-                                               outputNames.push_back(barcodePrimerComboFileNames[i][j]);
-                                               outputTypes["flow"].push_back(barcodePrimerComboFileNames[i][j]);
+                                       if (namesAlreadyProcessed.count(barcodePrimerComboFileNames[i][j]) == 0) {
+                                               FILE * pFile;
+                                               unsigned long long size;
+                                               
+                                               //get num bytes in file
+                                               pFile = fopen (barcodePrimerComboFileNames[i][j].c_str(),"rb");
+                                               if (pFile==NULL) perror ("Error opening file");
+                                               else{
+                                                       fseek (pFile, 0, SEEK_END);
+                                                       size=ftell(pFile);
+                                                       fclose (pFile);
+                                               }
+                                               
+                                               if(size < 10){
+                                                       m->mothurRemove(barcodePrimerComboFileNames[i][j]);
+                                               }
+                                               else{
+                                                       output << m->getFullPathName(barcodePrimerComboFileNames[i][j]) << endl;
+                                                       outputNames.push_back(barcodePrimerComboFileNames[i][j]);
+                                                       outputTypes["flow"].push_back(barcodePrimerComboFileNames[i][j]);
+                                               }
+                                               namesAlreadyProcessed.insert(barcodePrimerComboFileNames[i][j]);
                                        }
                                }
                        }
@@ -295,7 +318,7 @@ int TrimFlowsCommand::execute(){
                        flowFilesFileName = outputDir + m->getRootName(m->getSimpleName(flowFileName)) + "flow.files";
                        m->openOutputFile(flowFilesFileName, output);
                        
-                       output << trimFlowFileName << endl;
+                       output << m->getFullPathName(trimFlowFileName) << endl;
                        
                        output.close();
                }
@@ -324,10 +347,9 @@ int TrimFlowsCommand::execute(){
 
 //***************************************************************************************************************
 
-int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName, vector<vector<string> > barcodePrimerComboFileNames, linePair* line){
+int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName, vector<vector<string> > thisBarcodePrimerComboFileNames, linePair* line){
        
        try {
-               
                ofstream trimFlowFile;
                m->openOutputFile(trimFlowFileName, trimFlowFile);
                trimFlowFile.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
@@ -335,16 +357,24 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                ofstream scrapFlowFile;
                m->openOutputFile(scrapFlowFileName, scrapFlowFile);
                scrapFlowFile.setf(ios::fixed, ios::floatfield); scrapFlowFile.setf(ios::showpoint);
-
-               if(line->start == 4){
+               
+               ofstream fastaFile;
+               if(fasta){      m->openOutputFile(fastaFileName, fastaFile);    }
+               
+               ifstream flowFile;
+               m->openInputFile(flowFileName, flowFile);
+               
+               flowFile.seekg(line->start);
+               
+               if(line->start == 0){
+                       flowFile >> numFlows; m->gobble(flowFile);
                        scrapFlowFile << maxFlows << endl;
                        trimFlowFile << maxFlows << endl;
                        if(allFiles){
-                               for(int i=0;i<barcodePrimerComboFileNames.size();i++){
-                                       for(int j=0;j<barcodePrimerComboFileNames[0].size();j++){
-                                               //                              barcodePrimerComboFileNames[i][j] += toString(getpid()) + ".temp";
+                               for(int i=0;i<thisBarcodePrimerComboFileNames.size();i++){
+                                       for(int j=0;j<thisBarcodePrimerComboFileNames[0].size();j++){
                                                ofstream temp;
-                                               m->openOutputFile(barcodePrimerComboFileNames[i][j], temp);
+                                               m->openOutputFile(thisBarcodePrimerComboFileNames[i][j], temp);
                                                temp << maxFlows << endl;
                                                temp.close();
                                        }
@@ -353,52 +383,56 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                }
                
                FlowData flowData(numFlows, signal, noise, maxHomoP, flowOrder);
-               
-               ofstream fastaFile;
-               if(fasta){      m->openOutputFile(fastaFileName, fastaFile);    }
-               
-               ifstream flowFile;
-               m->openInputFile(flowFileName, flowFile);
-               
-               flowFile.seekg(line->start);
-
+               //cout << " driver flowdata address " <<  &flowData  << &flowFile << endl;      
                int count = 0;
                bool moreSeqs = 1;
-                       
+               
+               TrimOligos trimOligos(pdiffs, bdiffs, primers, barcodes, revPrimer);
+               
                while(moreSeqs) {
+                               
+                       if (m->control_pressed) { break; }
                        
                        int success = 1;
                        int currentSeqDiffs = 0;
                        string trashCode = "";
                        
-                       flowData.getNext(flowFile);
+                       flowData.getNext(flowFile); 
+                       //cout << "driver good bit " << flowFile.good() << endl;        
                        flowData.capFlows(maxFlows);    
                        
                        Sequence currSeq = flowData.getSequence();
+                       
                        if(!flowData.hasMinFlows(minFlows)){    //screen to see if sequence is of a minimum number of flows
                                success = 0;
                                trashCode += 'l';
                        }
                        
-                       if(minLength > 0 || maxLength > 0){     //screen to see if sequence is above and below a specific number of bases
-                               int seqLength = currSeq.getNumBases();
-                               if(seqLength < minLength || seqLength > maxLength){
-                                       success = 0;
-                                       trashCode += 'l';
-                               }
-                       }
-                       
                        int primerIndex = 0;
                        int barcodeIndex = 0;
                        
+            if(numLinkers != 0){
+                success = trimOligos.stripLinker(currSeq);
+                if(success > ldiffs)           {       trashCode += 'k';       }
+                else{ currentSeqDiffs += success;  }
+                
+            }
+            
                        if(barcodes.size() != 0){
-                               success = stripBarcode(currSeq, barcodeIndex);
+                               success = trimOligos.stripBarcode(currSeq, barcodeIndex);
                                if(success > bdiffs)            {       trashCode += 'b';       }
                                else{ currentSeqDiffs += success;  }
                        }
                        
+            if(numSpacers != 0){
+                success = trimOligos.stripSpacer(currSeq);
+                if(success > sdiffs)           {       trashCode += 's';       }
+                else{ currentSeqDiffs += success;  }
+                
+            }
+            
                        if(numFPrimers != 0){
-                               success = stripForward(currSeq, primerIndex);
+                               success = trimOligos.stripForward(currSeq, primerIndex);
                                if(success > pdiffs)            {       trashCode += 'f';       }
                                else{ currentSeqDiffs += success;  }
                        }
@@ -406,10 +440,10 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                        if (currentSeqDiffs > tdiffs)   {       trashCode += 't';   }
                        
                        if(numRPrimers != 0){
-                               success = stripReverse(currSeq);
+                               success = trimOligos.stripReverse(currSeq);
                                if(!success)                            {       trashCode += 'r';       }
                        }
-
+                       
                        if(trashCode.length() == 0){
                                                        
                                flowData.printFlows(trimFlowFile);
@@ -418,7 +452,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                                
                                if(allFiles){
                                        ofstream output;
-                                       m->openOutputFileAppend(barcodePrimerComboFileNames[barcodeIndex][primerIndex], output);
+                                       m->openOutputFileAppend(thisBarcodePrimerComboFileNames[barcodeIndex][primerIndex], output);
                                        output.setf(ios::fixed, ios::floatfield); trimFlowFile.setf(ios::showpoint);
                                        
                                        flowData.printFlows(output);
@@ -430,12 +464,12 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                        }
                                
                        count++;
-                                               
+                       //cout << "driver" << '\t' << currSeq.getName() << endl;                        
                        //report progress
                        if((count) % 10000 == 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
 
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                       unsigned long int pos = flowFile.tellg();
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+                       unsigned long long pos = flowFile.tellg();
 
                        if ((pos == -1) || (pos >= line->end)) { break; }
 #else
@@ -521,6 +555,10 @@ void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
 
                                        barcodes[oligo]=indexBarcode; indexBarcode++;
                                        barcodeNameVector.push_back(group);
+                               }else if(type == "LINKER"){
+                                       linker.push_back(oligo);
+                               }else if(type == "SPACER"){
+                                       spacer.push_back(oligo);
                                }
                                else{
                                        m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  
@@ -586,6 +624,8 @@ void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
                
                numFPrimers = primers.size();
                numRPrimers = revPrimer.size();
+        numLinkers = linker.size();
+        numSpacers = spacer.size();
                
        }
        catch(exception& e) {
@@ -593,352 +633,16 @@ void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
                exit(1);
        }
 }
-
-//***************************************************************************************************************
-
-int TrimFlowsCommand::stripBarcode(Sequence& seq, int& group){
-       try {
-               
-               string rawSequence = seq.getUnaligned();
-               int success = bdiffs + 1;       //guilty until proven innocent
-               
-               //can you find the barcode
-               for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
-                       string oligo = it->first;
-                       if(rawSequence.length() < oligo.length()){      //let's just assume that the barcodes are the same length
-                               success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
-                               break;  
-                       }
-                       
-                       if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
-                               group = it->second;
-                               seq.setUnaligned(rawSequence.substr(oligo.length()));
-                               
-                               success = 0;
-                               break;
-                       }
-               }
-               
-               //if you found the barcode or if you don't want to allow for diffs
-               if ((bdiffs == 0) || (success == 0)) { return success;  }
-               
-               else { //try aligning and see if you can find it
-                       
-                       int maxLength = 0;
-                       
-                       Alignment* alignment;
-                       if (barcodes.size() > 0) {
-                               map<string,int>::iterator it=barcodes.begin();
-                               
-                               for(it;it!=barcodes.end();it++){
-                                       if(it->first.length() > maxLength){
-                                               maxLength = it->first.length();
-                                       }
-                               }
-                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+bdiffs+1));  
-                               
-                       }else{ alignment = NULL; } 
-                       
-                       //can you find the barcode
-                       int minDiff = 1e6;
-                       int minCount = 1;
-                       int minGroup = -1;
-                       int minPos = 0;
-                       
-                       for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
-                               string oligo = it->first;
-                               //                              int length = oligo.length();
-                               
-                               if(rawSequence.length() < maxLength){   //let's just assume that the barcodes are the same length
-                                       success = bdiffs + 10;
-                                       break;
-                               }
-                               
-                               //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                               alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
-                               oligo = alignment->getSeqAAln();
-                               string temp = alignment->getSeqBAln();
-                               
-                               int alnLength = oligo.length();
-                               
-                               for(int i=oligo.length()-1;i>=0;i--){
-                                       if(oligo[i] != '-'){    alnLength = i+1;        break;  }
-                               }
-                               oligo = oligo.substr(0,alnLength);
-                               temp = temp.substr(0,alnLength);
-                               
-                               int numDiff = countDiffs(oligo, temp);
-                               
-                               if(numDiff < minDiff){
-                                       minDiff = numDiff;
-                                       minCount = 1;
-                                       minGroup = it->second;
-                                       minPos = 0;
-                                       for(int i=0;i<alnLength;i++){
-                                               if(temp[i] != '-'){
-                                                       minPos++;
-                                               }
-                                       }
-                               }
-                               else if(numDiff == minDiff){
-                                       minCount++;
-                               }
-                               
-                       }
-                       
-                       if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
-                       else if(minCount > 1)   {       success = bdiffs + 100; }       //can't tell the difference between multiple barcodes
-                       else{                                                                                                   //use the best match
-                               group = minGroup;
-                               seq.setUnaligned(rawSequence.substr(minPos));
-                               success = minDiff;
-                       }
-                       
-                       if (alignment != NULL) {  delete alignment;  }
-                       
-               }
-               
-               return success;
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TrimFlowsCommand", "stripBarcode");
-               exit(1);
-       }
-       
-}
-
-//***************************************************************************************************************
-
-int TrimFlowsCommand::stripForward(Sequence& seq, int& group){
-       try {
-               
-               string rawSequence = seq.getUnaligned();
-               int success = pdiffs + 1;       //guilty until proven innocent
-
-               //can you find the primer
-               for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
-                       string oligo = it->first;
-                       if(rawSequence.length() < oligo.length()){      //let's just assume that the primers are the same length
-                               success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
-                               break;  
-                       }
-                       
-                       if(compareDNASeq(oligo, rawSequence.substr(0,oligo.length()))){
-                               group = it->second;
-                               seq.setUnaligned(rawSequence.substr(oligo.length()));
-                               success = 0;
-                               break;
-                       }
-               }
-               
-               //if you found the barcode or if you don't want to allow for diffs
-               if ((pdiffs == 0) || (success == 0)) {  return success;  }
-               
-               else { //try aligning and see if you can find it
-                       
-                       int maxLength = 0;
-                       
-                       Alignment* alignment;
-                       if (primers.size() > 0) {
-                               map<string,int>::iterator it=primers.begin();
-                               
-                               for(it;it!=primers.end();it++){
-                                       if(it->first.length() > maxLength){
-                                               maxLength = it->first.length();
-                                       }
-                               }
-                               alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxLength+pdiffs+1));  
-                               
-                       }else{ alignment = NULL; } 
-                       
-                       //can you find the barcode
-                       int minDiff = 1e6;
-                       int minCount = 1;
-                       int minGroup = -1;
-                       int minPos = 0;
-                       
-                       for(map<string,int>::iterator it=primers.begin();it!=primers.end();it++){
-                               string oligo = it->first;
-                               //                              int length = oligo.length();
-                               
-                               if(rawSequence.length() < maxLength){   
-                                       success = pdiffs + 100;
-                                       break;
-                               }
-                               
-                               //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                               alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
-                               oligo = alignment->getSeqAAln();
-                               string temp = alignment->getSeqBAln();
-                               
-                               int alnLength = oligo.length();
-                               
-                               for(int i=oligo.length()-1;i>=0;i--){
-                                       if(oligo[i] != '-'){    alnLength = i+1;        break;  }
-                               }
-                               oligo = oligo.substr(0,alnLength);
-                               temp = temp.substr(0,alnLength);
-                               
-                               int numDiff = countDiffs(oligo, temp);
-                               
-                               if(numDiff < minDiff){
-                                       minDiff = numDiff;
-                                       minCount = 1;
-                                       minGroup = it->second;
-                                       minPos = 0;
-                                       for(int i=0;i<alnLength;i++){
-                                               if(temp[i] != '-'){
-                                                       minPos++;
-                                               }
-                                       }
-                               }
-                               else if(numDiff == minDiff){
-                                       minCount++;
-                               }
-                               
-                       }
-                       
-                       if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
-                       else if(minCount > 1)   {       success = pdiffs + 10;  }       //can't tell the difference between multiple primers
-                       else{                                                                                                   //use the best match
-                               group = minGroup;
-                               seq.setUnaligned(rawSequence.substr(minPos));
-                               success = minDiff;
-                       }
-                       
-                       if (alignment != NULL) {  delete alignment;  }
-                       
-               }
-               
-               return success;
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TrimFlowsCommand", "stripForward");
-               exit(1);
-       }
-}
-
-//***************************************************************************************************************
-
-bool TrimFlowsCommand::stripReverse(Sequence& seq){
-       try {
-
-               string rawSequence = seq.getUnaligned();
-               bool success = 0;       //guilty until proven innocent
-               
-               for(int i=0;i<numRPrimers;i++){
-                       string oligo = revPrimer[i];
-                       
-                       if(rawSequence.length() < oligo.length()){
-                               success = 0;
-                               break;
-                       }
-                       
-                       if(compareDNASeq(oligo, rawSequence.substr(rawSequence.length()-oligo.length(),oligo.length()))){
-                               seq.setUnaligned(rawSequence.substr(0,rawSequence.length()-oligo.length()));
-                               success = 1;
-                               break;
-                       }
-               }       
-
-               return success;
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TrimFlowsCommand", "stripReverse");
-               exit(1);
-       }
-}
-
-
-//***************************************************************************************************************
-
-bool TrimFlowsCommand::compareDNASeq(string oligo, string seq){
-       try {
-               bool success = 1;
-               int length = oligo.length();
-               
-               for(int i=0;i<length;i++){
-                       
-                       if(oligo[i] != seq[i]){
-                               if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C')    {       success = 0;    }
-                               else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       success = 0;    }
-                               else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       success = 0;    }
-                               else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
-                               else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       success = 0;    }
-                               else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       success = 0;    }
-                               else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       success = 0;    }                       
-                               
-                               if(success == 0)        {       break;   }
-                       }
-                       else{
-                               success = 1;
-                       }
-               }
-               
-               return success;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TrimFlowsCommand", "compareDNASeq");
-               exit(1);
-       }
-       
-}
-
-//***************************************************************************************************************
-
-int TrimFlowsCommand::countDiffs(string oligo, string seq){
-       try {
-               
-               int length = oligo.length();
-               int countDiffs = 0;
-               
-               for(int i=0;i<length;i++){
-                       
-                       if(oligo[i] != seq[i]){
-                               if(oligo[i] == 'A' || oligo[i] == 'T' || oligo[i] == 'G' || oligo[i] == 'C' || oligo[i] == '-' || oligo[i] == '.')      {       countDiffs++;   }
-                               else if((oligo[i] == 'N' || oligo[i] == 'I') && (seq[i] == 'N'))                                {       countDiffs++;   }
-                               else if(oligo[i] == 'R' && (seq[i] != 'A' && seq[i] != 'G'))                                    {       countDiffs++;   }
-                               else if(oligo[i] == 'Y' && (seq[i] != 'C' && seq[i] != 'T'))                                    {       countDiffs++;   }
-                               else if(oligo[i] == 'M' && (seq[i] != 'C' && seq[i] != 'A'))                                    {       countDiffs++;   }
-                               else if(oligo[i] == 'K' && (seq[i] != 'T' && seq[i] != 'G'))                                    {       countDiffs++;   }
-                               else if(oligo[i] == 'W' && (seq[i] != 'T' && seq[i] != 'A'))                                    {       countDiffs++;   }
-                               else if(oligo[i] == 'S' && (seq[i] != 'C' && seq[i] != 'G'))                                    {       countDiffs++;   }
-                               else if(oligo[i] == 'B' && (seq[i] != 'C' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
-                               else if(oligo[i] == 'D' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'G'))   {       countDiffs++;   }
-                               else if(oligo[i] == 'H' && (seq[i] != 'A' && seq[i] != 'T' && seq[i] != 'C'))   {       countDiffs++;   }
-                               else if(oligo[i] == 'V' && (seq[i] != 'A' && seq[i] != 'C' && seq[i] != 'G'))   {       countDiffs++;   }       
-                       }
-                       
-               }
-               
-               return countDiffs;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TrimFlowsCommand", "countDiffs");
-               exit(1);
-       }
-       
-}
-
 /**************************************************************************************************/
-
-vector<unsigned long int> TrimFlowsCommand::getFlowFileBreaks() {
+vector<unsigned long long> TrimFlowsCommand::getFlowFileBreaks() {
 
        try{
                        
-               vector<unsigned long int> filePos;
+               vector<unsigned long long> filePos;
                filePos.push_back(0);
                                        
                FILE * pFile;
-               unsigned long int size;
+               unsigned long long size;
                
                //get num bytes in file
                pFile = fopen (flowFileName.c_str(),"rb");
@@ -950,7 +654,7 @@ vector<unsigned long int> TrimFlowsCommand::getFlowFileBreaks() {
                }
                                
                //estimate file breaks
-               unsigned long int chunkSize = 0;
+               unsigned long long chunkSize = 0;
                chunkSize = size / processors;
 
                //file too small to divide by processors
@@ -958,7 +662,7 @@ vector<unsigned long int> TrimFlowsCommand::getFlowFileBreaks() {
                
                //for each process seekg to closest file break and search for next '>' char. make that the filebreak
                for (int i = 0; i < processors; i++) {
-                       unsigned long int spot = (i+1) * chunkSize;
+                       unsigned long long spot = (i+1) * chunkSize;
                        
                        ifstream in;
                        m->openInputFile(flowFileName, in);
@@ -967,7 +671,7 @@ vector<unsigned long int> TrimFlowsCommand::getFlowFileBreaks() {
                        string dummy = m->getline(in);
                        
                        //there was not another sequence before the end of the file
-                       unsigned long int sanityPos = in.tellg();
+                       unsigned long long sanityPos = in.tellg();
                        
 //                     if (sanityPos == -1) {  break;  }
 //                     else {  filePos.push_back(newSpot);  }
@@ -989,8 +693,8 @@ vector<unsigned long int> TrimFlowsCommand::getFlowFileBreaks() {
                m->openInputFile(flowFileName, in);
                in >> numFlows;
                m->gobble(in);
-               unsigned long int spot = in.tellg();
-               filePos[0] = spot;
+               //unsigned long long spot = in.tellg();
+               //filePos[0] = spot;
                in.close();
                
                processors = (filePos.size() - 1);
@@ -1008,10 +712,11 @@ vector<unsigned long int> TrimFlowsCommand::getFlowFileBreaks() {
 int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trimFlowFileName, string scrapFlowFileName, string fastaFileName, vector<vector<string> > barcodePrimerComboFileNames){
 
        try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-               int process = 1;
-               int exitCommand = 1;
                processIDS.clear();
+               int exitCommand = 1;
+               
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+               int process = 1;
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -1068,7 +773,85 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim
                        int temp = processIDS[i];
                        wait(&temp);
                }
+#else
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the trimFlowData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               vector<trimFlowData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor worker threads.
+               for( int i=0; i<processors-1; i++ ){
+                       // Allocate memory for thread data.
+                       string extension = "";
+                       if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
+                       
+                       vector<vector<string> > tempBarcodePrimerComboFileNames = barcodePrimerComboFileNames;
+                       if(allFiles){
+                               for(int i=0;i<tempBarcodePrimerComboFileNames.size();i++){
+                                       for(int j=0;j<tempBarcodePrimerComboFileNames[0].size();j++){
+                                               tempBarcodePrimerComboFileNames[i][j] += extension;
+                                               ofstream temp;
+                                               m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp);
+                                               temp.close();
+                                               
+                                       }
+                               }
+                       }
+                       
+                       trimFlowData* tempflow = new trimFlowData(flowFileName, (trimFlowFileName + extension), (scrapFlowFileName + extension), fastaFileName, flowOrder, tempBarcodePrimerComboFileNames, barcodes, primers, revPrimer, fasta, allFiles, lines[i]->start, lines[i]->end, m, signal, noise, numFlows, maxFlows, minFlows, maxHomoP, tdiffs, bdiffs, pdiffs, i);
+                       pDataArray.push_back(tempflow);
+                       
+                       //MyTrimFlowThreadFunction is in header. It must be global or static to work with the threads.
+                       //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+                       hThreadArray[i] = CreateThread(NULL, 0, MyTrimFlowThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
+               }
+               
+               //using the main process as a worker saves time and memory
+               ofstream temp;
+               m->openOutputFile(trimFlowFileName, temp);
+               temp.close();
+               
+               m->openOutputFile(scrapFlowFileName, temp);
+               temp.close();
+               
+               if(fasta){
+                       m->openOutputFile(fastaFileName, temp);
+                       temp.close();
+               }
+               
+               vector<vector<string> > tempBarcodePrimerComboFileNames = barcodePrimerComboFileNames;
+               if(allFiles){
+                       for(int i=0;i<tempBarcodePrimerComboFileNames.size();i++){
+                               for(int j=0;j<tempBarcodePrimerComboFileNames[0].size();j++){
+                                       tempBarcodePrimerComboFileNames[i][j] += toString(processors-1) + ".temp";
+                                       ofstream temp;
+                                       m->openOutputFile(tempBarcodePrimerComboFileNames[i][j], temp);
+                                       temp.close();
+                                       
+                               }
+                       }
+               }
+               
+               //do my part - do last piece because windows is looking for eof
+               int num = driverCreateTrim(flowFileName, (trimFlowFileName  + toString(processors-1) + ".temp"), (scrapFlowFileName  + toString(processors-1) + ".temp"), (fastaFileName + toString(processors-1) + ".temp"), tempBarcodePrimerComboFileNames, lines[processors-1]);
+               processIDS.push_back((processors-1)); 
+               
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       num += pDataArray[i]->count;
+                       CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+               }
                
+               
+#endif 
                //append files
                m->mothurOutEndLine();
                for(int i=0;i<processIDS.size();i++){
@@ -1076,30 +859,30 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim
                        m->mothurOut("Appending files from process " + toString(processIDS[i])); m->mothurOutEndLine();
                        
                        m->appendFiles((trimFlowFileName + toString(processIDS[i]) + ".temp"), trimFlowFileName);
-                       remove((trimFlowFileName + toString(processIDS[i]) + ".temp").c_str());
+                       m->mothurRemove((trimFlowFileName + toString(processIDS[i]) + ".temp"));
 //                     m->mothurOut("\tDone with trim.flow file"); m->mothurOutEndLine();
 
                        m->appendFiles((scrapFlowFileName + toString(processIDS[i]) + ".temp"), scrapFlowFileName);
-                       remove((scrapFlowFileName + toString(processIDS[i]) + ".temp").c_str());
+                       m->mothurRemove((scrapFlowFileName + toString(processIDS[i]) + ".temp"));
 //                     m->mothurOut("\tDone with scrap.flow file"); m->mothurOutEndLine();
 
                        if(fasta){
                                m->appendFiles((fastaFileName + toString(processIDS[i]) + ".temp"), fastaFileName);
-                               remove((fastaFileName + toString(processIDS[i]) + ".temp").c_str());
+                               m->mothurRemove((fastaFileName + toString(processIDS[i]) + ".temp"));
 //                             m->mothurOut("\tDone with flow.fasta file"); m->mothurOutEndLine();
                        }
                        if(allFiles){                                           
                                for (int j = 0; j < barcodePrimerComboFileNames.size(); j++) {
                                        for (int k = 0; k < barcodePrimerComboFileNames[0].size(); k++) {
                                                m->appendFiles((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp"), barcodePrimerComboFileNames[j][k]);
-                                               remove((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp").c_str());
+                                               m->mothurRemove((barcodePrimerComboFileNames[j][k] + toString(processIDS[i]) + ".temp"));
                                        }
                                }
                        }
                }
                
                return exitCommand;
-#endif         
+       
        }
        catch(exception& e) {
                m->errorOut(e, "TrimFlowsCommand", "createProcessesCreateTrim");