]> git.donarmstrong.com Git - mothur.git/commitdiff
added oligos, group, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs to fastq.info so you...
authorSarah Westcott <mothur.westcott@gmail.com>
Tue, 3 Dec 2013 19:51:46 +0000 (14:51 -0500)
committerSarah Westcott <mothur.westcott@gmail.com>
Tue, 3 Dec 2013 19:51:46 +0000 (14:51 -0500)
commandfactory.cpp
makecontigscommand.cpp
parsefastaqcommand.cpp
parsefastaqcommand.h
sffinfocommand.cpp
sffinfocommand.h
sracommand.cpp
sracommand.h

index c247f908ca190087e4a30bb05ea31d88a0e7fd6b..5a32ac1a526450b8d9408a88e0e57cc31e946720 100644 (file)
 #include "makelefsecommand.h"
 #include "lefsecommand.h"
 #include "kruskalwalliscommand.h"
 #include "makelefsecommand.h"
 #include "lefsecommand.h"
 #include "kruskalwalliscommand.h"
+#include "sracommand.h"
 
 /*******************************************************/
 
 
 /*******************************************************/
 
@@ -319,6 +320,7 @@ CommandFactory::CommandFactory(){
     commands["make.lefse"]          = "make.lefse";
     commands["lefse"]               = "lefse";
     commands["kruskal.wallis"]      = "kruskal.wallis";
     commands["make.lefse"]          = "make.lefse";
     commands["lefse"]               = "lefse";
     commands["kruskal.wallis"]      = "kruskal.wallis";
+    commands["sra"]                 = "sra";
     
 
 }
     
 
 }
@@ -533,7 +535,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
         else if(commandName == "make.contigs")          {      command = new MakeContigsCommand(optionString);             }
         else if(commandName == "load.logfile")          {      command = new LoadLogfileCommand(optionString);             }
         else if(commandName == "sff.multiple")          {      command = new SffMultipleCommand(optionString);             }
         else if(commandName == "make.contigs")          {      command = new MakeContigsCommand(optionString);             }
         else if(commandName == "load.logfile")          {      command = new LoadLogfileCommand(optionString);             }
         else if(commandName == "sff.multiple")          {      command = new SffMultipleCommand(optionString);             }
-        else if(commandName == "classify.rf")           {      command = new ClassifyRFSharedCommand(optionString);          }
+        else if(commandName == "classify.rf")           {      command = new ClassifyRFSharedCommand(optionString);        }
         else if(commandName == "filter.shared")         {      command = new FilterSharedCommand(optionString);            }
         else if(commandName == "primer.design")         {      command = new PrimerDesignCommand(optionString);            }
         else if(commandName == "get.dists")             {      command = new GetDistsCommand(optionString);                }
         else if(commandName == "filter.shared")         {      command = new FilterSharedCommand(optionString);            }
         else if(commandName == "primer.design")         {      command = new PrimerDesignCommand(optionString);            }
         else if(commandName == "get.dists")             {      command = new GetDistsCommand(optionString);                }
@@ -546,6 +548,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
         else if(commandName == "make.lefse")                   {       command = new MakeLefseCommand(optionString);                           }
         else if(commandName == "lefse")                 {      command = new LefseCommand(optionString);                   }
         else if(commandName == "kruskal.wallis")        {      command = new KruskalWallisCommand(optionString);           }
         else if(commandName == "make.lefse")                   {       command = new MakeLefseCommand(optionString);                           }
         else if(commandName == "lefse")                 {      command = new LefseCommand(optionString);                   }
         else if(commandName == "kruskal.wallis")        {      command = new KruskalWallisCommand(optionString);           }
+        else if(commandName == "sra")                   {      command = new SRACommand(optionString);                     }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
@@ -714,6 +717,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
         else if(commandName == "make.lefse")                   {       pipecommand = new MakeLefseCommand(optionString);                               }
         else if(commandName == "lefse")                 {      pipecommand = new LefseCommand(optionString);                   }
         else if(commandName == "kruskal.wallis")        {      pipecommand = new KruskalWallisCommand(optionString);           }
         else if(commandName == "make.lefse")                   {       pipecommand = new MakeLefseCommand(optionString);                               }
         else if(commandName == "lefse")                 {      pipecommand = new LefseCommand(optionString);                   }
         else if(commandName == "kruskal.wallis")        {      pipecommand = new KruskalWallisCommand(optionString);           }
+        else if(commandName == "sra")                   {      pipecommand = new SRACommand(optionString);                     }
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
@@ -868,6 +872,7 @@ Command* CommandFactory::getCommand(string commandName){
         else if(commandName == "make.lefse")                   {       shellcommand = new MakeLefseCommand();                          }
         else if(commandName == "lefse")                 {      shellcommand = new LefseCommand();                  }
         else if(commandName == "kruskal.wallis")        {      shellcommand = new KruskalWallisCommand();          }
         else if(commandName == "make.lefse")                   {       shellcommand = new MakeLefseCommand();                          }
         else if(commandName == "lefse")                 {      shellcommand = new LefseCommand();                  }
         else if(commandName == "kruskal.wallis")        {      shellcommand = new KruskalWallisCommand();          }
+        else if(commandName == "sra")                   {      shellcommand = new SRACommand();                    }
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
index bf27d264ec5f06f15a3d25b7ac8c5952cd03e17a..8796ab2113f455b12c8eace90df64d4bbe2c318e 100644 (file)
@@ -1977,7 +1977,6 @@ bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, stri
                     else { uniquePrimers.insert(tempPair); }
                                        
                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
                     else { uniquePrimers.insert(tempPair); }
                                        
                     if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
-                    
                                        primers[indexPrimer]=newPrimer; indexPrimer++;          
                                        primerNameVector.push_back(group);
                                }else if(type == "BARCODE"){
                                        primers[indexPrimer]=newPrimer; indexPrimer++;          
                                        primerNameVector.push_back(group);
                                }else if(type == "BARCODE"){
index 051c1dfb057c0148c3f0053f94707b7e6d034dc0..c3bd3933836c3f67149a96b9dba1661978b31bc4 100644 (file)
 vector<string> ParseFastaQCommand::setParameters(){    
        try {
                CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pfastq);
 vector<string> ParseFastaQCommand::setParameters(){    
        try {
                CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pfastq);
+        CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos);
+        CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup);
+        CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
+               CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
+        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
+               CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
+        CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
                CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);
                CommandParameter pqual("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqual);
         CommandParameter ppacbio("pacbio", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppacbio);
                CommandParameter pfasta("fasta", "Boolean", "", "T", "", "", "","fasta",false,false); parameters.push_back(pfasta);
                CommandParameter pqual("qfile", "Boolean", "", "T", "", "", "","qfile",false,false); parameters.push_back(pqual);
         CommandParameter ppacbio("pacbio", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppacbio);
@@ -35,8 +42,15 @@ string ParseFastaQCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The fastq.info command reads a fastq file and creates a fasta and quality file.\n";
        try {
                string helpString = "";
                helpString += "The fastq.info command reads a fastq file and creates a fasta and quality file.\n";
-               helpString += "The fastq.info command parameters are fastq, fasta, qfile and format; fastq is required.\n";
+               helpString += "The fastq.info command parameters are fastq, fasta, qfile, oligos, group and format; fastq is required.\n";
         helpString += "The fastq.info command should be in the following format: fastq.info(fastaq=yourFastaQFile).\n";
         helpString += "The fastq.info command should be in the following format: fastq.info(fastaq=yourFastaQFile).\n";
+        helpString += "The oligos parameter allows you to provide an oligos file to split your fastq file into separate fastq files by barcode and primers. \n";
+        helpString += "The group parameter allows you to provide a group file to split your fastq file into separate fastq files by group. \n";
+        helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the reads. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
+               helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
+               helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
+        helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
+               helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
                helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=sanger.\n";
         helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n";
         helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n";
                helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=sanger.\n";
         helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n";
         helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n";
@@ -56,7 +70,8 @@ string ParseFastaQCommand::getOutputPattern(string type) {
         string pattern = "";
         
         if (type == "fasta") {  pattern = "[filename],fasta"; } 
         string pattern = "";
         
         if (type == "fasta") {  pattern = "[filename],fasta"; } 
-        else if (type == "qfile") {  pattern = "[filename],qual"; } 
+        else if (type == "qfile") {  pattern = "[filename],qual"; }
+        else if (type == "fastq") {  pattern = "[filename],[group],fastq"; } 
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
         
         return pattern;
         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
         
         return pattern;
@@ -74,6 +89,7 @@ ParseFastaQCommand::ParseFastaQCommand(){
                vector<string> tempOutNames;
                outputTypes["fasta"] = tempOutNames;
                outputTypes["qfile"] = tempOutNames;
                vector<string> tempOutNames;
                outputTypes["fasta"] = tempOutNames;
                outputTypes["qfile"] = tempOutNames;
+        outputTypes["fastq"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "ParseFastaQCommand", "ParseFastaQCommand");
        }
        catch(exception& e) {
                m->errorOut(e, "ParseFastaQCommand", "ParseFastaQCommand");
@@ -83,7 +99,8 @@ ParseFastaQCommand::ParseFastaQCommand(){
 //**********************************************************************************************************************
 ParseFastaQCommand::ParseFastaQCommand(string option){
        try {
 //**********************************************************************************************************************
 ParseFastaQCommand::ParseFastaQCommand(string option){
        try {
-               abort = false; calledHelp = false;   
+               abort = false; calledHelp = false;
+        split = 1;
                
                if(option == "help") {  help(); abort = true; calledHelp = true; }
                else if(option == "citation") { citation(); abort = true; calledHelp = true;}
                
                if(option == "help") {  help(); abort = true; calledHelp = true; }
                else if(option == "citation") { citation(); abort = true; calledHelp = true;}
@@ -106,6 +123,7 @@ ParseFastaQCommand::ParseFastaQCommand(string option){
                        vector<string> tempOutNames;
                        outputTypes["fasta"] = tempOutNames;
                        outputTypes["qfile"] = tempOutNames;
                        vector<string> tempOutNames;
                        outputTypes["fasta"] = tempOutNames;
                        outputTypes["qfile"] = tempOutNames;
+            outputTypes["fastq"] = tempOutNames;
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
                        string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -119,12 +137,40 @@ ParseFastaQCommand::ParseFastaQCommand(string option){
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["fastq"] = inputDir + it->second;            }
                                }
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["fastq"] = inputDir + it->second;            }
                                }
+                
+                it = parameters.find("oligos");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
+                               }
+                
+                it = parameters.find("group");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                               }
                        }
                        
                        //check for required parameters
                        fastaQFile = validParameter.validFile(parameters, "fastq", true);
                        if (fastaQFile == "not found") {        m->mothurOut("fastq is a required parameter for the fastq.info command.");      m->mothurOutEndLine();  abort = true;   }
                        }
                        
                        //check for required parameters
                        fastaQFile = validParameter.validFile(parameters, "fastq", true);
                        if (fastaQFile == "not found") {        m->mothurOut("fastq is a required parameter for the fastq.info command.");      m->mothurOutEndLine();  abort = true;   }
-                       else if (fastaQFile == "not open")      {       fastaQFile = ""; abort = true;  }       
+                       else if (fastaQFile == "not open")      {       fastaQFile = ""; abort = true;  }
+            
+            oligosfile = validParameter.validFile(parameters, "oligos", true);
+                       if (oligosfile == "not found") {        oligosfile = "";        }
+                       else if (oligosfile == "not open")      {       oligosfile = ""; abort = true;  }
+            else { m->setOligosFile(oligosfile); split = 2; }
+            
+            groupfile = validParameter.validFile(parameters, "group", true);
+                       if (groupfile == "not found") { groupfile = ""; }
+                       else if (groupfile == "not open")       {       groupfile = ""; abort = true;   }
+            else { m->setGroupFile(groupfile); split = 2; }
+            
+            if ((groupfile != "") && (oligosfile != "")) { m->mothurOut("You must enter ONLY ONE of the following: oligos or group."); m->mothurOutEndLine(); abort = true;  }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);   if (outputDir == "not found"){  outputDir = m->hasPath(fastaQFile);     }
                        
                        //if the user changes the output directory command factory will send this info to us in the output parameter 
                        outputDir = validParameter.validFile(parameters, "outputdir", false);   if (outputDir == "not found"){  outputDir = m->hasPath(fastaQFile);     }
@@ -139,6 +185,23 @@ ParseFastaQCommand::ParseFastaQCommand(string option){
             temp = validParameter.validFile(parameters, "pacbio", false);      if(temp == "not found"){        temp = "F";     }
                        pacbio = m->isTrue(temp);
 
             temp = validParameter.validFile(parameters, "pacbio", false);      if(temp == "not found"){        temp = "F";     }
                        pacbio = m->isTrue(temp);
 
+            temp = validParameter.validFile(parameters, "bdiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, bdiffs);
+                       
+                       temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, pdiffs);
+            
+            temp = validParameter.validFile(parameters, "ldiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, ldiffs);
+            
+            temp = validParameter.validFile(parameters, "sdiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, sdiffs);
+                       
+                       temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
+                       m->mothurConvert(temp, tdiffs);
+                       
+                       if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
+
                        
             format = validParameter.validFile(parameters, "format", false);            if (format == "not found"){     format = "sanger";      }
             
                        
             format = validParameter.validFile(parameters, "format", false);            if (format == "not found"){     format = "sanger";      }
             
@@ -171,6 +234,18 @@ int ParseFastaQCommand::execute(){
                
                if (fasta) { m->openOutputFile(fastaFile, outFasta);  outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile);      }
                if (qual) { m->openOutputFile(qualFile, outQual);       outputNames.push_back(qualFile);  outputTypes["qfile"].push_back(qualFile);             }
                
                if (fasta) { m->openOutputFile(fastaFile, outFasta);  outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile);      }
                if (qual) { m->openOutputFile(qualFile, outQual);       outputNames.push_back(qualFile);  outputTypes["qfile"].push_back(qualFile);             }
+        
+        TrimOligos* trimOligos = NULL;
+        int numBarcodes, numPrimers; numBarcodes = 0; numPrimers = 0;
+        if (oligosfile != "")       {
+            readOligos(oligosfile);
+            numPrimers = primers.size(); numBarcodes = barcodes.size();
+            //find group read belongs to
+            if (pairedOligos)   {   trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); numPrimers = pairedPrimers.size(); }
+            else                {   trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);  }
+
+        }
+        else if (groupfile != "")   { readGroup(groupfile);     }
                
                ifstream in;
                m->openInputFile(fastaQFile, in);
                
                ifstream in;
                m->openInputFile(fastaQFile, in);
@@ -181,65 +256,100 @@ int ParseFastaQCommand::execute(){
             convertTable.push_back(temp);
         }
                
             convertTable.push_back(temp);
         }
                
+        
+        int count = 0;
                while (!in.eof()) {
                        
                        if (m->control_pressed) { break; }
                while (!in.eof()) {
                        
                        if (m->control_pressed) { break; }
-               
-                       //read sequence name
-                       string name = m->getline(in); m->gobble(in);
-                       if (name == "") {  m->mothurOut("[ERROR]: Blank fasta name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
-                       else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
-                       else { 
-                name = name.substr(1); 
-                m->checkName(name);
-            }
-                       
-                       //read sequence
-                       string sequence = m->getline(in); m->gobble(in);
-                       if (sequence == "") {  m->mothurOut("[ERROR]: missing sequence for " + name); m->mothurOutEndLine(); m->control_pressed = true; break; }
-                       
-                       //read sequence name
-                       string name2 = m->getline(in); m->gobble(in);
-                       if (name2 == "") {  m->mothurOut("[ERROR]: Blank quality name."); m->mothurOutEndLine(); m->control_pressed = true; break; }
-                       else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
-                       else { 
-                name2 = name2.substr(1);  
-                m->checkName(name2);
-            }
-                       
-                       //read quality scores
-                       string quality = m->getline(in); m->gobble(in);
-                       if (quality == "") {  m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; break; }
-                       
-                       //sanity check sequence length and number of quality scores match
-                       if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; break; } }
-                       if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; break; }
-                       
-            vector<int> qualScores;
-            if (qual) {
-                               qualScores = convertQual(quality);
-                               outQual << ">" << name << endl;
-                               for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
-                               outQual << endl;
-                       }
             
             
-            if (m->control_pressed) { break; }
+            bool ignore;
+            fastqRead2 thisRead = readFastq(in, ignore);
             
             
-            if (pacbio) {
-                if (!qual) { qualScores = convertQual(quality); } //get scores if we didn't already
-                for (int i = 0; i < qualScores.size(); i++) {
-                    if (qualScores[i] == 0){ sequence[i] = 'N'; }
+            if (!ignore) {
+                vector<int> qualScores;
+                if (qual) {
+                    qualScores = convertQual(thisRead.quality);
+                    outQual << ">" << thisRead.seq.getName() << endl;
+                    for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
+                    outQual << endl;
                 }
                 }
-            }
-            
-                       //print sequence info to files
-                       if (fasta) { outFasta << ">" << name << endl << sequence << endl; }
-                       
+                
+                if (m->control_pressed) { break; }
+                
+                if (pacbio) {
+                    if (!qual) { qualScores = convertQual(thisRead.quality); } //convert if not done
+                    string sequence = thisRead.seq.getAligned();
+                    for (int i = 0; i < qualScores.size(); i++) {
+                        if (qualScores[i] == 0){ sequence[i] = 'N'; }
+                    }
+                    thisRead.seq.setAligned(sequence);
+                }
+                
+                //print sequence info to files
+                if (fasta) { thisRead.seq.printSequence(outFasta); }
+                
+                if (split > 1) {
+                    int barcodeIndex, primerIndex, trashCodeLength;
+                    if (oligosfile != "")      {  trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, trimOligos, numBarcodes, numPrimers);    }
+                    else if (groupfile != "")  {  trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, "groupMode");   }
+                    else {  m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); }
+                    
+                    if(trashCodeLength == 0){
+                        ofstream out;
+                        m->openOutputFileAppend(fastqFileNames[barcodeIndex][primerIndex], out);
+                        out << thisRead.wholeRead;
+                        out.close();
+                    }else{
+                        ofstream out;
+                        m->openOutputFileAppend(noMatchFile, out);
+                        out << thisRead.wholeRead;
+                        out.close();
+                    }
+                }
+                //report progress
+                if((count+1) % 10000 == 0){    m->mothurOut(toString(count+1)); m->mothurOutEndLine();         }
+                if(count > 100000){    break;  }
+                count++;
+                       }
                }
                
                in.close();
                if (fasta)      { outFasta.close();     }
                if (qual)       { outQual.close();      }
                }
                
                in.close();
                if (fasta)      { outFasta.close();     }
                if (qual)       { outQual.close();      }
+        
+        //report progress
+               if (!m->control_pressed) {   if((count) % 10000 != 0){  m->mothurOut(toString(count)); m->mothurOutEndLine();           }  }
+        
+        if (split > 1) {
+            
+            if (groupfile != "")        { delete groupMap;      }
+            else if (oligosfile != "")  { delete trimOligos;    }
+           
+                       map<string, string>::iterator it;
+                       set<string> namesToRemove;
+                       for(int i=0;i<fastqFileNames.size();i++){
+                               for(int j=0;j<fastqFileNames[0].size();j++){
+                                       if (fastqFileNames[i][j] != "") {
+                                               if (namesToRemove.count(fastqFileNames[i][j]) == 0) {
+                                                       if(m->isBlank(fastqFileNames[i][j])){
+                                                               m->mothurRemove(fastqFileNames[i][j]);
+                                                               namesToRemove.insert(fastqFileNames[i][j]);
+                            }
+                                               }
+                                       }
+                               }
+                       }
+            
+                       //remove names for outputFileNames, just cleans up the output
+                       for(int i = 0; i < outputNames.size(); i++) {
+                if (namesToRemove.count(outputNames[i]) != 0) {
+                    outputNames.erase(outputNames.begin()+i);
+                    i--;
+                }
+            }
+            if(m->isBlank(noMatchFile)){  m->mothurRemove(noMatchFile); }
+            else { outputNames.push_back(noMatchFile); outputTypes["fastq"].push_back(noMatchFile); }
+        }
                
                if (m->control_pressed) { outputTypes.clear(); outputNames.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
                
                
                if (m->control_pressed) { outputTypes.clear(); outputNames.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
                
@@ -267,6 +377,55 @@ int ParseFastaQCommand::execute(){
                exit(1);
        }
 }
                exit(1);
        }
 }
+//**********************************************************************************************************************
+fastqRead2 ParseFastaQCommand::readFastq(ifstream& in, bool& ignore){
+    try {
+        ignore = false;
+        string wholeRead = "";
+        
+        //read sequence name
+        string line = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += line + "\n"; }
+        vector<string> pieces = m->splitWhiteSpace(line);
+        string name = "";  if (pieces.size() != 0) { name = pieces[0]; }
+        if (name == "") {  m->mothurOut("[WARNING]: Blank fasta name, ignoring read."); m->mothurOutEndLine(); ignore=true;  }
+        else if (name[0] != '@') { m->mothurOut("[WARNING]: reading " + name + " expected a name with @ as a leading character, ignoring read."); m->mothurOutEndLine(); ignore=true; }
+        else { name = name.substr(1); }
+        
+        //read sequence
+        string sequence = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += sequence + "\n"; }
+        if (sequence == "") {  m->mothurOut("[WARNING]: missing sequence for " + name + ", ignoring."); ignore=true; }
+        
+        //read sequence name
+        line = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += line + "\n"; }
+        pieces = m->splitWhiteSpace(line);
+        string name2 = "";  if (pieces.size() != 0) { name2 = pieces[0]; }
+        if (name2 == "") {  m->mothurOut("[WARNING]: expected a name with + as a leading character, ignoring."); ignore=true; }
+        else if (name2[0] != '+') { m->mothurOut("[WARNING]: reading " + name2 + " expected a name with + as a leading character, ignoring."); ignore=true; }
+        else { name2 = name2.substr(1); if (name2 == "") { name2 = name; } }
+        
+                
+        //read quality scores
+        string quality = m->getline(in); m->gobble(in); if (split > 1) { wholeRead += quality + "\n"; }
+        if (quality == "") {  m->mothurOut("[WARNING]: missing quality for " + name2 + ", ignoring."); ignore=true; }
+        
+        //sanity check sequence length and number of quality scores match
+        if (name2 != "") { if (name != name2) { m->mothurOut("[WARNING]: names do not match. read " + name + " for fasta and " + name2 + " for quality, ignoring."); ignore=true; } }
+        if (quality.length() != sequence.length()) { m->mothurOut("[WARNING]: Lengths do not match for sequence " + name + ". Read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores, ignoring read."); ignore=true; }
+        
+        m->checkName(name);
+        Sequence seq(name, sequence);
+        fastqRead2 read(seq, quality, wholeRead);
+            
+        if (m->debug) { m->mothurOut("[DEBUG]: " + read.seq.getName() + " " + read.seq.getAligned() + " " + quality + "\n"); }
+        
+        return read;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "ParseFastaQCommand", "readFastq");
+        exit(1);
+    }
+}
+
 //**********************************************************************************************************************
 vector<int> ParseFastaQCommand::convertQual(string qual) {
        try {
 //**********************************************************************************************************************
 vector<int> ParseFastaQCommand::convertQual(string qual) {
        try {
@@ -302,6 +461,446 @@ vector<int> ParseFastaQCommand::convertQual(string qual) {
        }
 }
 //**********************************************************************************************************************
        }
 }
 //**********************************************************************************************************************
+int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer, TrimOligos*& trimOligos, int numBarcodes, int numPrimers) {
+       try {
+        int success = 1;
+        string trashCode = "";
+        int currentSeqsDiffs = 0;
+        
+        Sequence currSeq(thisRead.seq.getName(), thisRead.seq.getAligned());
+        QualityScores currQual; currQual.setScores(convertQual(thisRead.quality));
+        
+        if(linker.size() != 0){
+            success = trimOligos->stripLinker(currSeq, currQual);
+            if(success > ldiffs)               {       trashCode += 'k';       }
+            else{ currentSeqsDiffs += success;  }
+            
+        }
+        
+        if(numBarcodes != 0){
+            success = trimOligos->stripBarcode(currSeq, currQual, barcode);
+            if(success > bdiffs)               {       trashCode += 'b';       }
+            else{ currentSeqsDiffs += success;  }
+        }
+        
+        if(spacer.size() != 0){
+            success = trimOligos->stripSpacer(currSeq, currQual);
+            if(success > sdiffs)               {       trashCode += 's';       }
+            else{ currentSeqsDiffs += success;  }
+            
+        }
+        
+        if(numPrimers != 0){
+            success = trimOligos->stripForward(currSeq, currQual, primer, true);
+            if(success > pdiffs)               {       trashCode += 'f';       }
+            else{ currentSeqsDiffs += success;  }
+        }
+        
+        if (currentSeqsDiffs > tdiffs) {       trashCode += 't';   }
+        
+        if(revPrimer.size() != 0){
+            success = trimOligos->stripReverse(currSeq, currQual);
+            if(!success)                               {       trashCode += 'r';       }
+        }
+        
+        
+        return trashCode.length();
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ParseFastaQCommand", "findGroup");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer, string groupMode) {
+       try {
+        string trashCode = "";
+        primer = 0;
+        
+        string group = groupMap->getGroup(thisRead.seq.getName());
+        if (group == "not found") {     trashCode += "g";   } //scrap for group
+        else { //find file group
+            map<string, int>::iterator it = barcodes.find(group);
+            if (it != barcodes.end()) {
+                barcode = it->second;
+            }else { trashCode += "g"; }
+        }
+        
+        return trashCode.length();
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ParseFastaQCommand", "findGroup");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+
+bool ParseFastaQCommand::readOligos(string oligoFile){
+       try {
+               ifstream inOligos;
+               m->openInputFile(oligoFile, inOligos);
+               
+               string type, oligo, roligo, group;
+        bool hasPrimer = false; bool hasPairedBarcodes = false; pairedOligos = false;
+        
+               int indexPrimer = 0;
+               int indexBarcode = 0;
+        int indexPairedPrimer = 0;
+               int indexPairedBarcode = 0;
+        set<string> uniquePrimers;
+        set<string> uniqueBarcodes;
+               
+               while(!inOligos.eof()){
+            
+                       inOligos >> type;
+            
+                       if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
+            
+                       if(type[0] == '#'){
+                               while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
+                               m->gobble(inOligos);
+                       }
+                       else{
+                               m->gobble(inOligos);
+                               //make type case insensitive
+                               for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
+                               
+                               inOligos >> oligo;
+                
+                if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
+                               
+                               for(int i=0;i<oligo.length();i++){
+                                       oligo[i] = toupper(oligo[i]);
+                                       if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
+                               }
+                               
+                               if(type == "FORWARD"){
+                                       group = "";
+                                       
+                                       // get rest of line in case there is a primer name
+                                       while (!inOligos.eof()) {
+                                               char c = inOligos.get();
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
+                                               else if (c == 32 || c == 9){;} //space or tab
+                                               else {  group += c;  }
+                                       }
+                                       
+                                       //check for repeat barcodes
+                                       map<string, int>::iterator itPrime = primers.find(oligo);
+                                       if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
+                                       
+                    if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); }  }
+                    
+                                       primers[oligo]=indexPrimer; indexPrimer++;
+                                       primerNameVector.push_back(group);
+                               }
+                else if (type == "PRIMER"){
+                    m->gobble(inOligos);
+                                       
+                    inOligos >> roligo;
+                    
+                    for(int i=0;i<roligo.length();i++){
+                        roligo[i] = toupper(roligo[i]);
+                        if(roligo[i] == 'U')   {       roligo[i] = 'T';        }
+                    }
+                    roligo = reverseOligo(roligo);
+                    
+                    group = "";
+                    
+                                       // get rest of line in case there is a primer name
+                                       while (!inOligos.eof()) {
+                                               char c = inOligos.get();
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
+                                               else if (c == 32 || c == 9){;} //space or tab
+                                               else {  group += c;  }
+                                       }
+                    
+                    oligosPair newPrimer(oligo, roligo);
+                    
+                    if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
+                                       
+                                       //check for repeat barcodes
+                    string tempPair = oligo+roligo;
+                    if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
+                    else { uniquePrimers.insert(tempPair); }
+                                       
+                    if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
+                    
+                                       pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
+                                       primerNameVector.push_back(group);
+                    hasPrimer = true;
+                }
+                               else if(type == "REVERSE"){
+                                       //Sequence oligoRC("reverse", oligo);
+                                       //oligoRC.reverseComplement();
+                    string oligoRC = reverseOligo(oligo);
+                                       revPrimer.push_back(oligoRC);
+                               }
+                               else if(type == "BARCODE"){
+                                       inOligos >> group;
+                    
+                    //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
+                    //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
+                    
+                    string temp = "";
+                    while (!inOligos.eof())    {
+                                               char c = inOligos.get();
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
+                                               else if (c == 32 || c == 9){;} //space or tab
+                                               else {  temp += c;  }
+                                       }
+                                       
+                    //then this is illumina data with 4 columns
+                    if (temp != "") {
+                        hasPairedBarcodes = true;
+                        string reverseBarcode = group; //reverseOligo(group); //reverse barcode
+                        group = temp;
+                        
+                        for(int i=0;i<reverseBarcode.length();i++){
+                            reverseBarcode[i] = toupper(reverseBarcode[i]);
+                            if(reverseBarcode[i] == 'U')       {       reverseBarcode[i] = 'T';        }
+                        }
+                        
+                        reverseBarcode = reverseOligo(reverseBarcode);
+                        oligosPair newPair(oligo, reverseBarcode);
+                        
+                        if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
+                        //check for repeat barcodes
+                        string tempPair = oligo+reverseBarcode;
+                        if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
+                        else { uniqueBarcodes.insert(tempPair); }
+                        
+                        pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
+                        barcodeNameVector.push_back(group);
+                    }else {
+                        //check for repeat barcodes
+                        map<string, int>::iterator itBar = barcodes.find(oligo);
+                        if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
+                        
+                        barcodes[oligo]=indexBarcode; indexBarcode++;
+                        barcodeNameVector.push_back(group);
+                    }
+                               }else if(type == "LINKER"){
+                                       linker.push_back(oligo);
+                               }else if(type == "SPACER"){
+                                       spacer.push_back(oligo);
+                               }
+                               else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
+                       }
+                       m->gobble(inOligos);
+               }
+               inOligos.close();
+               
+        if (hasPairedBarcodes || hasPrimer) {
+            pairedOligos = true;
+            if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true;  m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine();  return 0; }
+        }
+        
+               //add in potential combos
+               if(barcodeNameVector.size() == 0){
+                       barcodes[""] = 0;
+                       barcodeNameVector.push_back("");
+               }
+               
+               if(primerNameVector.size() == 0){
+                       primers[""] = 0;
+                       primerNameVector.push_back("");
+               }
+               
+               fastqFileNames.resize(barcodeNameVector.size());
+               for(int i=0;i<fastqFileNames.size();i++){
+                       fastqFileNames[i].assign(primerNameVector.size(), "");
+               }
+               
+               
+                       set<string> uniqueNames; //used to cleanup outputFileNames
+            if (pairedOligos) {
+                for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
+                    for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
+                        
+                        string primerName = primerNameVector[itPrimer->first];
+                        string barcodeName = barcodeNameVector[itBar->first];
+                        
+                        if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+                        else {
+                            string comboGroupName = "";
+                            string fastqFileName = "";
+                            
+                            if(primerName == ""){
+                                comboGroupName = barcodeNameVector[itBar->first];
+                            }
+                            else{
+                                if(barcodeName == ""){
+                                    comboGroupName = primerNameVector[itPrimer->first];
+                                }
+                                else{
+                                    comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
+                                }
+                            }
+                            
+                            
+                            ofstream temp;
+                            map<string, string> variables;
+                            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+                            variables["[group]"] = comboGroupName;
+                            fastqFileName = getOutputFileName("fastq", variables);
+                            if (uniqueNames.count(fastqFileName) == 0) {
+                                outputNames.push_back(fastqFileName);
+                                outputTypes["fastq"].push_back(fastqFileName);
+                                uniqueNames.insert(fastqFileName);
+                            }
+                            
+                            fastqFileNames[itBar->first][itPrimer->first] = fastqFileName;
+                            m->openOutputFile(fastqFileName, temp);            temp.close();
+                            
+                        }
+                    }
+                }
+            }else {
+                for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+                    for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
+                        
+                        string primerName = primerNameVector[itPrimer->second];
+                        string barcodeName = barcodeNameVector[itBar->second];
+                        
+                        if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+                        else {
+                            string comboGroupName = "";
+                            string fastqFileName = "";
+                            
+                            if(primerName == ""){
+                                comboGroupName = barcodeNameVector[itBar->second];
+                            }
+                            else{
+                                if(barcodeName == ""){
+                                    comboGroupName = primerNameVector[itPrimer->second];
+                                }
+                                else{
+                                    comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
+                                }
+                            }
+                            
+                            
+                            ofstream temp;
+                            map<string, string> variables;
+                            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+                            variables["[group]"] = comboGroupName;
+                            fastqFileName = getOutputFileName("fastq", variables);
+                            if (uniqueNames.count(fastqFileName) == 0) {
+                                outputNames.push_back(fastqFileName);
+                                outputTypes["fastq"].push_back(fastqFileName);
+                                uniqueNames.insert(fastqFileName);
+                            }
+                            
+                            fastqFileNames[itBar->second][itPrimer->second] = fastqFileName;
+                            m->openOutputFile(fastqFileName, temp);            temp.close();
+                            
+                        }
+                    }
+                }
+            }
+               
+        ofstream temp;
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+        variables["[group]"] = "scrap";
+        noMatchFile = getOutputFileName("fastq", variables);
+        m->openOutputFile(noMatchFile, temp);          temp.close();
+
+               return true;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ParseFastaQCommand", "getOligos");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+bool ParseFastaQCommand::readGroup(string groupfile){
+       try {
+        fastqFileNames.clear();
+        
+        groupMap = new GroupMap();
+        groupMap->readMap(groupfile);
+        
+        //like barcodeNameVector - no primer names
+        vector<string> groups = groupMap->getNamesOfGroups();
+               
+               fastqFileNames.resize(groups.size());
+        for (int i = 0; i < fastqFileNames.size(); i++) {
+            for (int j = 0; j < 1; j++) {
+                
+                map<string, string> variables;
+                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+                variables["[group]"] = groups[i];
+                string thisFilename = getOutputFileName("fastq",variables);
+                outputNames.push_back(thisFilename);
+                outputTypes["fastq"].push_back(thisFilename);
+                
+                ofstream temp;
+                m->openOutputFileBinary(thisFilename, temp); temp.close();
+                fastqFileNames[i].push_back(thisFilename);
+                barcodes[groups[i]] = i;
+            }
+        }
+        
+        map<string, string> variables;
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+        variables["[group]"] = "scrap";
+               noMatchFile = getOutputFileName("fastq",variables);
+        m->mothurRemove(noMatchFile);
+               
+               return true;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "ParseFastaQCommand", "readGroup");
+               exit(1);
+       }
+}
+//********************************************************************/
+string ParseFastaQCommand::reverseOligo(string oligo){
+       try {
+        string reverse = "";
+        
+        for(int i=oligo.length()-1;i>=0;i--){
+            
+            if(oligo[i] == 'A')                {       reverse += 'T'; }
+            else if(oligo[i] == 'T'){  reverse += 'A'; }
+            else if(oligo[i] == 'U'){  reverse += 'A'; }
+            
+            else if(oligo[i] == 'G'){  reverse += 'C'; }
+            else if(oligo[i] == 'C'){  reverse += 'G'; }
+            
+            else if(oligo[i] == 'R'){  reverse += 'Y'; }
+            else if(oligo[i] == 'Y'){  reverse += 'R'; }
+            
+            else if(oligo[i] == 'M'){  reverse += 'K'; }
+            else if(oligo[i] == 'K'){  reverse += 'M'; }
+            
+            else if(oligo[i] == 'W'){  reverse += 'W'; }
+            else if(oligo[i] == 'S'){  reverse += 'S'; }
+            
+            else if(oligo[i] == 'B'){  reverse += 'V'; }
+            else if(oligo[i] == 'V'){  reverse += 'B'; }
+            
+            else if(oligo[i] == 'D'){  reverse += 'H'; }
+            else if(oligo[i] == 'H'){  reverse += 'D'; }
+            
+            else                                               {       reverse += 'N'; }
+        }
+        
+        
+        return reverse;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "ParseFastaQCommand", "reverseOligo");
+               exit(1);
+       }
+}
+
+
+//**********************************************************************************************************************
 
 
 
 
 
 
index 5ae6b9c482fe6b4022bb4d4c5d4ae2e79ea3aad8..2ac141626b4b528d352d2860dc64e65826e22357 100644 (file)
 
 
 #include "command.hpp"
 
 
 #include "command.hpp"
+#include "trimoligos.h"
+#include "sequence.hpp"
+#include "groupmap.h"
+
+struct fastqRead2 {
+    string quality;
+       Sequence seq;
+    string wholeRead;
+       
+       fastqRead2() {  };
+       fastqRead2(Sequence s, string q, string w) : seq(s), quality(q), wholeRead(w){};
+       ~fastqRead2() {};
+};
+
 
 class ParseFastaQCommand : public Command {
 
 
 class ParseFastaQCommand : public Command {
 
@@ -35,11 +49,29 @@ public:
 private:
 
        vector<string> outputNames;     
 private:
 
        vector<string> outputNames;     
-       string outputDir, fastaQFile, format;
-       bool abort, fasta, qual, pacbio;
+       string outputDir, fastaQFile, format, oligosfile, groupfile;
+       bool abort, fasta, qual, pacbio, pairedOligos;
+    int pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, split;
+    GroupMap* groupMap;
+    
+    //oligos file data structures
+    vector<string> linker, spacer, primerNameVector, barcodeNameVector, revPrimer;
+       map<string, int> barcodes;
+       map<string, int> primers;
+    map<int, oligosPair> pairedBarcodes;
+    map<int, oligosPair> pairedPrimers;
+    vector<vector<string> > fastqFileNames;
+    string noMatchFile;
        
        vector<int> convertQual(string);
     vector<char> convertTable;
        
        vector<int> convertQual(string);
     vector<char> convertTable;
+    bool readOligos(string oligosFile);
+    bool readGroup(string oligosFile);
+    string reverseOligo(string oligo);
+    fastqRead2 readFastq(ifstream&, bool&);
+    int findGroup(fastqRead2, int&, int&, TrimOligos*&, int, int);
+    int findGroup(fastqRead2, int&, int&, string);
+    
 };
 
 #endif
 };
 
 #endif
index bccb75122ffbc9cebc5862f00df0b17d9e644f51..03bcebb591e35b19b10224ff6a4512bdecdf2dbb 100755 (executable)
@@ -17,7 +17,8 @@
 vector<string> SffInfoCommand::setParameters(){        \r
        try {           \r
                CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff);\r
 vector<string> SffInfoCommand::setParameters(){        \r
        try {           \r
                CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff);\r
-        CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(poligos);\r
+        CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos);\r
+        CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup);\r
                CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);\r
                CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt);\r
                CommandParameter pflow("flow", "Boolean", "", "T", "", "", "","flow",false,false); parameters.push_back(pflow);\r
                CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);\r
                CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt);\r
                CommandParameter pflow("flow", "Boolean", "", "T", "", "", "","flow",false,false); parameters.push_back(pflow);\r
@@ -46,11 +47,12 @@ string SffInfoCommand::getHelpString(){
        try {\r
                string helpString = "";\r
                helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n";\r
        try {\r
                string helpString = "";\r
                helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n";\r
-               helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs and trim. sff is required. \n";\r
+               helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, group, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs and trim. sff is required. \n";\r
                helpString += "The sff parameter allows you to enter the sff file you would like to extract data from.  You may enter multiple files by separating them by -'s.\n";\r
                helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated.  Default=True. \n";\r
                helpString += "The qfile parameter allows you to indicate if you would like a quality file generated.  Default=True. \n";\r
         helpString += "The oligos parameter allows you to provide an oligos file to split your sff file into separate sff files by barcode. \n";\r
                helpString += "The sff parameter allows you to enter the sff file you would like to extract data from.  You may enter multiple files by separating them by -'s.\n";\r
                helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated.  Default=True. \n";\r
                helpString += "The qfile parameter allows you to indicate if you would like a quality file generated.  Default=True. \n";\r
         helpString += "The oligos parameter allows you to provide an oligos file to split your sff file into separate sff files by barcode. \n";\r
+        helpString += "The group parameter allows you to provide a group file to split your sff file into separate sff files by group. \n";\r
         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";\r
                helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";\r
                helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";\r
         helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";\r
                helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";\r
                helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";\r
@@ -112,7 +114,7 @@ SffInfoCommand::SffInfoCommand(){
 SffInfoCommand::SffInfoCommand(string option)  {\r
        try {\r
                abort = false; calledHelp = false;   \r
 SffInfoCommand::SffInfoCommand(string option)  {\r
        try {\r
                abort = false; calledHelp = false;   \r
-               hasAccnos = false; hasOligos = false;\r
+               hasAccnos = false; hasOligos = false; hasGroup = false;\r
         split = 1;\r
                \r
                //allow user to run help\r
         split = 1;\r
                \r
                //allow user to run help\r
@@ -293,7 +295,7 @@ SffInfoCommand::SffInfoCommand(string option)  {
                                        bool ignore = false;\r
                                        if (oligosFileNames[i] == "current") { \r
                                                oligosFileNames[i] = m->getOligosFile(); \r
                                        bool ignore = false;\r
                                        if (oligosFileNames[i] == "current") { \r
                                                oligosFileNames[i] = m->getOligosFile(); \r
-                                               if (oligosFileNames[i] != "") {  m->mothurOut("Using " + oligosFileNames[i] + " as input file for the accnos parameter where you had given current."); m->mothurOutEndLine(); }\r
+                                               if (oligosFileNames[i] != "") {  m->mothurOut("Using " + oligosFileNames[i] + " as input file for the oligos parameter where you had given current."); m->mothurOutEndLine(); }\r
                                                else {  \r
                                                        m->mothurOut("You have no current oligosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; \r
                                                        //erase from file list\r
                                                else {  \r
                                                        m->mothurOut("You have no current oligosfile, ignoring current."); m->mothurOutEndLine(); ignore=true; \r
                                                        //erase from file list\r
@@ -349,12 +351,87 @@ SffInfoCommand::SffInfoCommand(string option)  {
                                //make sure there is at least one valid file left\r
                                if (oligosFileNames.size() == 0) { m->mothurOut("no valid oligos files."); m->mothurOutEndLine(); abort = true; }\r
                        }\r
                                //make sure there is at least one valid file left\r
                                if (oligosFileNames.size() == 0) { m->mothurOut("no valid oligos files."); m->mothurOutEndLine(); abort = true; }\r
                        }\r
+            \r
+            groupfile = validParameter.validFile(parameters, "group", false);\r
+                       if (groupfile == "not found") { groupfile = "";  }\r
+                       else {\r
+                               hasGroup = true;\r
+                               m->splitAtDash(groupfile, groupFileNames);\r
+                               \r
+                               //go through files and make sure they are good, if not, then disregard them\r
+                               for (int i = 0; i < groupFileNames.size(); i++) {\r
+                                       bool ignore = false;\r
+                                       if (groupFileNames[i] == "current") {\r
+                                               groupFileNames[i] = m->getGroupFile();\r
+                                               if (groupFileNames[i] != "") {  m->mothurOut("Using " + groupFileNames[i] + " as input file for the group parameter where you had given current."); m->mothurOutEndLine(); }\r
+                                               else {\r
+                                                       m->mothurOut("You have no current group file, ignoring current."); m->mothurOutEndLine(); ignore=true;\r
+                                                       //erase from file list\r
+                                                       groupFileNames.erase(groupFileNames.begin()+i);\r
+                                                       i--;\r
+                                               }\r
+                                       }\r
+                                       \r
+                                       if (!ignore) {\r
+                        \r
+                                               if (inputDir != "") {\r
+                                                       string path = m->hasPath(groupFileNames[i]);\r
+                                                       //if the user has not given a path then, add inputdir. else leave path alone.\r
+                                                       if (path == "") {       groupFileNames[i] = inputDir + groupFileNames[i];               }\r
+                                               }\r
+                        \r
+                                               ifstream in;\r
+                                               int ableToOpen = m->openInputFile(groupFileNames[i], in, "noerror");\r
+                        \r
+                                               //if you can't open it, try default location\r
+                                               if (ableToOpen == 1) {\r
+                                                       if (m->getDefaultPath() != "") { //default path is set\r
+                                                               string tryPath = m->getDefaultPath() + m->getSimpleName(groupFileNames[i]);\r
+                                                               m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();\r
+                                                               ifstream in2;\r
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+                                                               in2.close();\r
+                                                               groupFileNames[i] = tryPath;\r
+                                                       }\r
+                                               }\r
+                                               //if you can't open it, try default location\r
+                                               if (ableToOpen == 1) {\r
+                                                       if (m->getOutputDir() != "") { //default path is set\r
+                                                               string tryPath = m->getOutputDir() + m->getSimpleName(groupFileNames[i]);\r
+                                                               m->mothurOut("Unable to open " + groupFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();\r
+                                                               ifstream in2;\r
+                                                               ableToOpen = m->openInputFile(tryPath, in2, "noerror");\r
+                                                               in2.close();\r
+                                                               groupFileNames[i] = tryPath;\r
+                                                       }\r
+                                               }\r
+                                               in.close();\r
+                                               \r
+                                               if (ableToOpen == 1) {\r
+                                                       m->mothurOut("Unable to open " + groupFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();\r
+                                                       //erase from file list\r
+                                                       groupFileNames.erase(groupFileNames.begin()+i);\r
+                                                       i--;\r
+                                               }\r
+                                       }\r
+                               }\r
+                               \r
+                               //make sure there is at least one valid file left\r
+                               if (groupFileNames.size() == 0) { m->mothurOut("no valid group files."); m->mothurOutEndLine(); abort = true; }\r
+                       }\r
 \r
 \r
-                       if (hasOligos) {\r
+                       if (hasGroup) {\r
+                split = 2;\r
+                               if (groupFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a group file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
+                       }\r
+            \r
+            if (hasOligos) {\r
                 split = 2;\r
                 split = 2;\r
-                               if (oligosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a oligos file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
+                               if (oligosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide an oligos file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
                        }\r
             \r
                        }\r
             \r
+            if (hasGroup && hasOligos) { m->mothurOut("You must enter ONLY ONE of the following: oligos or group."); m->mothurOutEndLine(); abort = true;}\r
+            \r
                        if (hasAccnos) {\r
                                if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
                        }\r
                        if (hasAccnos) {\r
                                if (accnosFileNames.size() != filenames.size()) { abort = true; m->mothurOut("If you provide a accnos file, you must have one for each sff file."); m->mothurOutEndLine(); }\r
                        }\r
@@ -442,7 +519,8 @@ int SffInfoCommand::execute(){
             \r
             string oligos = "";\r
             if (hasOligos) { oligos = oligosFileNames[s]; }\r
             \r
             string oligos = "";\r
             if (hasOligos) { oligos = oligosFileNames[s]; }\r
-                       \r
+            if (hasGroup) { oligos = groupFileNames[s]; }\r
+            \r
                        int numReads = extractSffInfo(filenames[s], accnos, oligos);\r
 \r
                        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");\r
                        int numReads = extractSffInfo(filenames[s], accnos, oligos);\r
 \r
                        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to extract " + toString(numReads) + ".");\r
@@ -490,9 +568,10 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){
                \r
                if (accnos != "")       {  readAccnosFile(accnos);  }\r
                else                            {       seqNames.clear();               }\r
                \r
                if (accnos != "")       {  readAccnosFile(accnos);  }\r
                else                            {       seqNames.clear();               }\r
-         \r
-        if (oligos != "")   {   readOligos(oligos);  split = 2;   }\r
-\r
+        \r
+        if (hasOligos)   {   readOligos(oligos);    split = 2;      }\r
+        if (hasGroup)    {   readGroup(oligos);     split = 2;      }\r
+        \r
                ofstream outSfftxt, outFasta, outQual, outFlow;\r
                string outFastaFileName, outQualFileName;\r
         string rootName = outputDir + m->getRootName(m->getSimpleName(input));\r
                ofstream outSfftxt, outFasta, outQual, outFlow;\r
                string outFastaFileName, outQualFileName;\r
         string rootName = outputDir + m->getRootName(m->getSimpleName(input));\r
@@ -526,7 +605,7 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){
                //print common header\r
                if (sfftxt) {   printCommonHeader(outSfftxt, header);           }\r
                if (flow)       {       outFlow << header.numFlowsPerRead << endl;      }\r
                //print common header\r
                if (sfftxt) {   printCommonHeader(outSfftxt, header);           }\r
                if (flow)       {       outFlow << header.numFlowsPerRead << endl;      }\r
-                       \r
+               \r
                //read through the sff file\r
                while (!in.eof()) {\r
                        \r
                //read through the sff file\r
                while (!in.eof()) {\r
                        \r
@@ -551,7 +630,7 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){
                        }\r
                        \r
                        count++;\r
                        }\r
                        \r
                        count++;\r
-        \r
+            \r
                        //report progress\r
                        if((count+1) % 10000 == 0){     m->mothurOut(toString(count+1)); m->mothurOutEndLine();         }\r
                \r
                        //report progress\r
                        if((count+1) % 10000 == 0){     m->mothurOut(toString(count+1)); m->mothurOutEndLine();         }\r
                \r
@@ -574,13 +653,7 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){
             //create new common headers for each file with the correct number of reads\r
             adjustCommonHeader(header);\r
             \r
             //create new common headers for each file with the correct number of reads\r
             adjustCommonHeader(header);\r
             \r
-            //close files and delete ofstreams\r
-            for(int i=0;i<filehandles.size();i++){\r
-                               for(int j=0;j<filehandles[0].size();j++){\r
-                    (filehandles[i][j].begin()->second)->close(); delete (filehandles[i][j].begin()->second);\r
-                    (filehandlesHeaders[i][j].begin()->second)->close(); delete (filehandlesHeaders[i][j].begin()->second);\r
-                }\r
-            }\r
+            if (hasGroup) { delete groupMap; }\r
             \r
             //cout << "here" << endl;\r
                        map<string, string>::iterator it;\r
             \r
             //cout << "here" << endl;\r
                        map<string, string>::iterator it;\r
@@ -588,13 +661,13 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){
                        for(int i=0;i<filehandles.size();i++){\r
                                for(int j=0;j<filehandles[0].size();j++){\r
                     //cout << i << '\t' << '\t' << j  << '\t' << filehandles[i][j] << endl;\r
                        for(int i=0;i<filehandles.size();i++){\r
                                for(int j=0;j<filehandles[0].size();j++){\r
                     //cout << i << '\t' << '\t' << j  << '\t' << filehandles[i][j] << endl;\r
-                                       if (filehandles[i][j].begin()->first != "") {\r
-                                               if (namesToRemove.count(filehandles[i][j].begin()->first) == 0) {\r
-                                                       if(m->isBlank(filehandles[i][j].begin()->first)){\r
+                                       if (filehandles[i][j] != "") {\r
+                                               if (namesToRemove.count(filehandles[i][j]) == 0) {\r
+                                                       if(m->isBlank(filehandles[i][j])){\r
                                 //cout << i << '\t' << '\t' << j  << '\t' << filehandles[i][j] << " is blank removing" << endl;\r
                                 //cout << i << '\t' << '\t' << j  << '\t' << filehandles[i][j] << " is blank removing" << endl;\r
-                                                               m->mothurRemove(filehandles[i][j].begin()->first);\r
-                                m->mothurRemove(filehandlesHeaders[i][j].begin()->first);\r
-                                                               namesToRemove.insert(filehandles[i][j].begin()->first);\r
+                                                               m->mothurRemove(filehandles[i][j]);\r
+                                m->mothurRemove(filehandlesHeaders[i][j]);\r
+                                                               namesToRemove.insert(filehandles[i][j]);\r
                             }\r
                                                }\r
                                        }\r
                             }\r
                                                }\r
                                        }\r
@@ -604,11 +677,11 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){
             //append new header to reads\r
             for (int i = 0; i < filehandles.size(); i++) {\r
                 for (int j = 0; j < filehandles[i].size(); j++) {\r
             //append new header to reads\r
             for (int i = 0; i < filehandles.size(); i++) {\r
                 for (int j = 0; j < filehandles[i].size(); j++) {\r
-                    m->appendBinaryFiles(filehandles[i][j].begin()->first, filehandlesHeaders[i][j].begin()->first);\r
-                    m->renameFile(filehandlesHeaders[i][j].begin()->first, filehandles[i][j].begin()->first);\r
-                    m->mothurRemove(filehandlesHeaders[i][j].begin()->first);\r
+                    m->appendBinaryFiles(filehandles[i][j], filehandlesHeaders[i][j]);\r
+                    m->renameFile(filehandlesHeaders[i][j], filehandles[i][j]);\r
+                    m->mothurRemove(filehandlesHeaders[i][j]);\r
                     //cout << i << '\t' << '\t' << j  << '\t' << filehandles[i][j] << " done appending headers and removing " << filehandlesHeaders[i][j] << endl;\r
                     //cout << i << '\t' << '\t' << j  << '\t' << filehandles[i][j] << " done appending headers and removing " << filehandlesHeaders[i][j] << endl;\r
-                    if (numSplitReads[i][j] == 0) { m->mothurRemove(filehandles[i][j].begin()->first); }\r
+                    if (numSplitReads[i][j] == 0) { m->mothurRemove(filehandles[i][j]); }\r
                 }\r
             }\r
                        //cout << "here3" << endl;\r
                 }\r
             }\r
                        //cout << "here3" << endl;\r
@@ -733,7 +806,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){
         in.read(mybuffer,4);\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
         in.read(mybuffer,4);\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
-                (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(mybuffer, in.gcount());\r
+                out.close();\r
             }\r
         }\r
         outNoMatchHeader.write(mybuffer, in.gcount());\r
             }\r
         }\r
         outNoMatchHeader.write(mybuffer, in.gcount());\r
@@ -744,7 +820,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){
         in.read(mybuffer,4);\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
         in.read(mybuffer,4);\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
-               (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(mybuffer, in.gcount());\r
+                out.close();\r
             }\r
         }\r
         outNoMatchHeader.write(mybuffer, in.gcount());\r
             }\r
         }\r
         outNoMatchHeader.write(mybuffer, in.gcount());\r
@@ -765,7 +844,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){
         thisbuffer[7] = offset & 0xFF;\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) {\r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
         thisbuffer[7] = offset & 0xFF;\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) {\r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
-                (*(filehandlesHeaders[i][j].begin()->second)).write(thisbuffer, 8);\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(thisbuffer, 8);\r
+                out.close();\r
             }\r
         }\r
         outNoMatchHeader.write(thisbuffer, 8);\r
             }\r
         }\r
         outNoMatchHeader.write(thisbuffer, 8);\r
@@ -784,7 +866,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){
         thisbuffer2[3] = offset & 0xFF;\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) {\r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
         thisbuffer2[3] = offset & 0xFF;\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) {\r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
-                (*(filehandlesHeaders[i][j].begin()->second)).write(thisbuffer2, 4);\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(thisbuffer2, 4);\r
+                out.close();\r
             }\r
         }\r
         outNoMatchHeader.write(thisbuffer2, 4);\r
             }\r
         }\r
         outNoMatchHeader.write(thisbuffer2, 4);\r
@@ -809,7 +894,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){
                     thisbuffer[2] = (numSplitReads[i][j] >> 16) & 0xFF;\r
                     thisbuffer[3] = (numSplitReads[i][j] >> 24) & 0xFF;\r
                  }\r
                     thisbuffer[2] = (numSplitReads[i][j] >> 16) & 0xFF;\r
                     thisbuffer[3] = (numSplitReads[i][j] >> 24) & 0xFF;\r
                  }\r
-                (*(filehandlesHeaders[i][j].begin()->second)).write(thisbuffer, 4);\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(thisbuffer, 4);\r
+                out.close();\r
                 delete[] thisbuffer;\r
             }\r
         }\r
                 delete[] thisbuffer;\r
             }\r
         }\r
@@ -834,7 +922,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){
         in.read(mybuffer,2);\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
         in.read(mybuffer,2);\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
-                (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(mybuffer, in.gcount());\r
+                out.close();\r
             }\r
         }\r
         outNoMatchHeader.write(mybuffer, in.gcount());\r
             }\r
         }\r
         outNoMatchHeader.write(mybuffer, in.gcount());\r
@@ -845,7 +936,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){
         in.read(mybuffer,2);\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
         in.read(mybuffer,2);\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
-                (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(mybuffer, in.gcount());\r
+                out.close();\r
             }\r
         }\r
         outNoMatchHeader.write(mybuffer, in.gcount());\r
             }\r
         }\r
         outNoMatchHeader.write(mybuffer, in.gcount());\r
@@ -856,7 +950,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){
         in.read(mybuffer,2);\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
         in.read(mybuffer,2);\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
-                (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(mybuffer, in.gcount());\r
+                out.close();\r
             }\r
         }\r
         outNoMatchHeader.write(mybuffer, in.gcount());\r
             }\r
         }\r
         outNoMatchHeader.write(mybuffer, in.gcount());\r
@@ -867,7 +964,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){
         in.read(mybuffer,1);\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
         in.read(mybuffer,1);\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
-                (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(mybuffer, in.gcount());\r
+                out.close();\r
             }\r
         }\r
         outNoMatchHeader.write(mybuffer, in.gcount());\r
             }\r
         }\r
         outNoMatchHeader.write(mybuffer, in.gcount());\r
@@ -878,7 +978,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){
         in.read(mybuffer,header.numFlowsPerRead);\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
         in.read(mybuffer,header.numFlowsPerRead);\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
-                (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(mybuffer, in.gcount());\r
+                out.close();\r
             }\r
         }\r
         outNoMatchHeader.write(mybuffer, in.gcount());\r
             }\r
         }\r
         outNoMatchHeader.write(mybuffer, in.gcount());\r
@@ -889,7 +992,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){
         in.read(mybuffer,header.keyLength);\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
         in.read(mybuffer,header.keyLength);\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) {  \r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
-                (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, in.gcount());\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(mybuffer, in.gcount());\r
+                out.close();\r
             }\r
         }\r
         outNoMatchHeader.write(mybuffer, in.gcount());\r
             }\r
         }\r
         outNoMatchHeader.write(mybuffer, in.gcount());\r
@@ -904,7 +1010,10 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){
         mybuffer = new char[spot-spotInFile];\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
         mybuffer = new char[spot-spotInFile];\r
         for (int i = 0; i < filehandlesHeaders.size(); i++) { \r
             for (int j = 0; j < filehandlesHeaders[i].size(); j++) {\r
-                (*(filehandlesHeaders[i][j].begin()->second)).write(mybuffer, spot-spotInFile);\r
+                ofstream out;\r
+                m->openOutputFileBinaryAppend(filehandlesHeaders[i][j], out);\r
+                out.write(mybuffer, spot-spotInFile);\r
+                out.close();\r
             }\r
         }\r
         outNoMatchHeader.write(mybuffer, spot-spotInFile);\r
             }\r
         }\r
         outNoMatchHeader.write(mybuffer, spot-spotInFile);\r
@@ -1031,9 +1140,12 @@ bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads,
             \r
             if (split > 1) { \r
                \r
             \r
             if (split > 1) { \r
                \r
-                int barcodeIndex, primerIndex;\r
-                int trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex);\r
-                                \r
+                int barcodeIndex, primerIndex, trashCodeLength;\r
+                \r
+                if (hasOligos)      {  trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex);                }\r
+                else if (hasGroup)  {  trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex, "groupMode");   }\r
+                else {  m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); }\r
+\r
                 char * mybuffer;\r
                 mybuffer = new char [spot-startSpotInFile];\r
                 \r
                 char * mybuffer;\r
                 mybuffer = new char [spot-startSpotInFile];\r
                 \r
@@ -1044,7 +1156,10 @@ bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads,
                 \r
                 \r
                 if(trashCodeLength == 0){\r
                 \r
                 \r
                 if(trashCodeLength == 0){\r
-                    (*(filehandles[barcodeIndex][primerIndex].begin()->second)).write(mybuffer, in2.gcount());\r
+                    ofstream out;\r
+                    m->openOutputFileBinaryAppend(filehandles[barcodeIndex][primerIndex], out);\r
+                    out.write(mybuffer, in2.gcount());\r
+                    out.close();\r
                     numSplitReads[barcodeIndex][primerIndex]++;\r
                                }\r
                                else{\r
                     numSplitReads[barcodeIndex][primerIndex]++;\r
                                }\r
                                else{\r
@@ -1152,7 +1267,29 @@ int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& pr
                m->errorOut(e, "SffInfoCommand", "findGroup");\r
                exit(1);\r
        }\r
                m->errorOut(e, "SffInfoCommand", "findGroup");\r
                exit(1);\r
        }\r
-}     \r
+}\r
+//**********************************************************************************************************************\r
+int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer, string groupMode) {\r
+       try {\r
+        string trashCode = "";\r
+        primer = 0;\r
+        \r
+        string group = groupMap->getGroup(header.name);\r
+        if (group == "not found") {     trashCode += "g";   } //scrap for group\r
+        else { //find file group\r
+            map<string, int>::iterator it = barcodes.find(group);\r
+            if (it != barcodes.end()) {\r
+                barcode = it->second;\r
+            }else { trashCode += "g"; }\r
+        }\r
+        \r
+        return trashCode.length();\r
+    }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "findGroup");\r
+               exit(1);\r
+       }\r
+}\r
 //**********************************************************************************************************************\r
 int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) {\r
        try {\r
 //**********************************************************************************************************************\r
 int SffInfoCommand::decodeName(string& timestamp, string& region, string& xy, string name) {\r
        try {\r
@@ -1685,7 +1822,7 @@ bool SffInfoCommand::readOligos(string oligoFile){
                \r
                while(!inOligos.eof()){\r
             \r
                \r
                while(!inOligos.eof()){\r
             \r
-                       inOligos >> type; \r
+                       inOligos >> type;\r
             \r
                        if(type[0] == '#'){\r
                                while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there\r
             \r
                        if(type[0] == '#'){\r
                                while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there\r
@@ -1707,19 +1844,20 @@ bool SffInfoCommand::readOligos(string oligoFile){
                                        group = "";\r
                                        \r
                                        // get rest of line in case there is a primer name\r
                                        group = "";\r
                                        \r
                                        // get rest of line in case there is a primer name\r
-                                       while (!inOligos.eof()) {       \r
-                                               char c = inOligos.get(); \r
+                                       while (!inOligos.eof()) {\r
+                                               char c = inOligos.get();\r
                                                if (c == 10 || c == 13 || c == -1){     break;  }\r
                                                else if (c == 32 || c == 9){;} //space or tab\r
                                                else {  group += c;  }\r
                                                if (c == 10 || c == 13 || c == -1){     break;  }\r
                                                else if (c == 32 || c == 9){;} //space or tab\r
                                                else {  group += c;  }\r
-                                       } \r
+                                       }\r
                                        \r
                                        //check for repeat barcodes\r
                                        map<string, int>::iterator itPrime = primers.find(oligo);\r
                                        if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }\r
                                        \r
                                        \r
                                        //check for repeat barcodes\r
                                        map<string, int>::iterator itPrime = primers.find(oligo);\r
                                        if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }\r
                                        \r
-                                       primers[oligo]=indexPrimer; indexPrimer++;              \r
+                                       primers[oligo]=indexPrimer; indexPrimer++;\r
                                        primerNameVector.push_back(group);\r
                                        primerNameVector.push_back(group);\r
+                   \r
                                }else if(type == "REVERSE"){\r
                                        //Sequence oligoRC("reverse", oligo);\r
                                        //oligoRC.reverseComplement();\r
                                }else if(type == "REVERSE"){\r
                                        //Sequence oligoRC("reverse", oligo);\r
                                        //oligoRC.reverseComplement();\r
@@ -1728,6 +1866,7 @@ bool SffInfoCommand::readOligos(string oligoFile){
                                }\r
                                else if(type == "BARCODE"){\r
                                        inOligos >> group;\r
                                }\r
                                else if(type == "BARCODE"){\r
                                        inOligos >> group;\r
+                    \r
                                        \r
                                        //check for repeat barcodes\r
                                        map<string, int>::iterator itBar = barcodes.find(oligo);\r
                                        \r
                                        //check for repeat barcodes\r
                                        map<string, int>::iterator itBar = barcodes.find(oligo);\r
@@ -1743,31 +1882,27 @@ bool SffInfoCommand::readOligos(string oligoFile){
                                else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }\r
                        }\r
                        m->gobble(inOligos);\r
                                else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }\r
                        }\r
                        m->gobble(inOligos);\r
-               }       \r
+               }\r
                inOligos.close();\r
                inOligos.close();\r
-               \r
+    \r
                if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ split = 1;      }\r
                if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ split = 1;      }\r
-               \r
+    \r
                //add in potential combos\r
                if(barcodeNameVector.size() == 0){\r
                        barcodes[""] = 0;\r
                //add in potential combos\r
                if(barcodeNameVector.size() == 0){\r
                        barcodes[""] = 0;\r
-                       barcodeNameVector.push_back("");                        \r
+                       barcodeNameVector.push_back("");\r
                }\r
                \r
                if(primerNameVector.size() == 0){\r
                        primers[""] = 0;\r
                }\r
                \r
                if(primerNameVector.size() == 0){\r
                        primers[""] = 0;\r
-                       primerNameVector.push_back("");                 \r
+                       primerNameVector.push_back("");\r
                }\r
                \r
                filehandles.resize(barcodeNameVector.size());\r
                }\r
                \r
                filehandles.resize(barcodeNameVector.size());\r
-        for (int i = 0; i < filehandles.size(); i++) {\r
-            for (int j = 0; j < primerNameVector.size(); j++) {\r
-                ofstream* temp;\r
-                map<string, ofstream*> myMap; myMap[""] = temp;\r
-                filehandles[i].push_back(myMap);\r
-            }\r
-        }\r
-                       \r
+               for(int i=0;i<filehandles.size();i++){\r
+                       filehandles[i].assign(primerNameVector.size(), "");\r
+               }\r
+        \r
                if(split > 1){\r
                        set<string> uniqueNames; //used to cleanup outputFileNames\r
                        for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){\r
                if(split > 1){\r
                        set<string> uniqueNames; //used to cleanup outputFileNames\r
                        for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){\r
@@ -1793,8 +1928,8 @@ bool SffInfoCommand::readOligos(string oligoFile){
                                                }\r
                                        }\r
                                        \r
                                                }\r
                                        }\r
                                        \r
-                                       ofstream* temp = new ofstream;\r
-                    map<string, string> variables; \r
+                                       ofstream temp;\r
+                    map<string, string> variables;\r
                     variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
                     variables["[group]"] = comboGroupName;\r
                                        string thisFilename = getOutputFileName("sff",variables);\r
                     variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
                     variables["[group]"] = comboGroupName;\r
                                        string thisFilename = getOutputFileName("sff",variables);\r
@@ -1804,24 +1939,22 @@ bool SffInfoCommand::readOligos(string oligoFile){
                                                uniqueNames.insert(thisFilename);\r
                                        }\r
                                        \r
                                                uniqueNames.insert(thisFilename);\r
                                        }\r
                                        \r
-                    map<string, ofstream*> myMap; myMap[thisFilename] = temp;\r
-                    m->openOutputFileBinary(thisFilename, *(temp));\r
-                                       filehandles[itBar->second][itPrimer->second] = myMap;\r
-                    map<string, ofstream*>::iterator itOfstream = filehandles[itBar->second][itPrimer->second].find("");\r
-                    if (itOfstream != filehandles[itBar->second][itPrimer->second].end()) { filehandles[itBar->second][itPrimer->second].erase(itOfstream); } //remove blank entry so we dont mess with .begin() above. code above assumes only 1 file name in the map\r
+                                       filehandles[itBar->second][itPrimer->second] = thisFilename;\r
+                                       temp.open(thisFilename.c_str(), ios::binary);           temp.close();\r
                                }\r
                        }\r
                }\r
                numFPrimers = primers.size();\r
         numLinkers = linker.size();\r
         numSpacers = spacer.size();\r
                                }\r
                        }\r
                }\r
                numFPrimers = primers.size();\r
         numLinkers = linker.size();\r
         numSpacers = spacer.size();\r
-        map<string, string> variables; \r
+        map<string, string> variables;\r
         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
         variables["[group]"] = "scrap";\r
                noMatchFile = getOutputFileName("sff",variables);\r
         m->mothurRemove(noMatchFile);\r
         numNoMatch = 0;\r
         \r
         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
         variables["[group]"] = "scrap";\r
                noMatchFile = getOutputFileName("sff",variables);\r
         m->mothurRemove(noMatchFile);\r
         numNoMatch = 0;\r
         \r
+        \r
                bool allBlank = true;\r
                for (int i = 0; i < barcodeNameVector.size(); i++) {\r
                        if (barcodeNameVector[i] != "") {\r
                bool allBlank = true;\r
                for (int i = 0; i < barcodeNameVector.size(); i++) {\r
                        if (barcodeNameVector[i] != "") {\r
@@ -1838,15 +1971,10 @@ bool SffInfoCommand::readOligos(string oligoFile){
                \r
         filehandlesHeaders.resize(filehandles.size());\r
         numSplitReads.resize(filehandles.size());\r
                \r
         filehandlesHeaders.resize(filehandles.size());\r
         numSplitReads.resize(filehandles.size());\r
-        for (int i = 0; i < filehandles.size(); i++) { \r
-            numSplitReads[i].resize(filehandles[i].size(), 0); \r
+        for (int i = 0; i < filehandles.size(); i++) {\r
+            numSplitReads[i].resize(filehandles[i].size(), 0);\r
             for (int j = 0; j < filehandles[i].size(); j++) {\r
             for (int j = 0; j < filehandles[i].size(); j++) {\r
-                ofstream* temp = new ofstream;\r
-                map<string, ofstream* > myMap;\r
-                string thisHeader = (filehandles[i][j].begin())->first+"headers";\r
-                myMap[thisHeader] = temp;\r
-                m->openOutputFileBinary(thisHeader, *(temp));\r
-                filehandlesHeaders[i].push_back(myMap);\r
+                filehandlesHeaders[i].push_back(filehandles[i][j]+"headers");\r
             }\r
         }\r
         \r
             }\r
         }\r
         \r
@@ -1864,6 +1992,68 @@ bool SffInfoCommand::readOligos(string oligoFile){
                exit(1);\r
        }\r
 }\r
                exit(1);\r
        }\r
 }\r
+//***************************************************************************************************************\r
+\r
+bool SffInfoCommand::readGroup(string oligoFile){\r
+       try {\r
+        filehandles.clear();\r
+        numSplitReads.clear();\r
+        filehandlesHeaders.clear();\r
+        barcodes.clear();\r
+        \r
+        groupMap = new GroupMap();\r
+        groupMap->readMap(oligoFile);\r
+    \r
+        //like barcodeNameVector - no primer names\r
+        vector<string> groups = groupMap->getNamesOfGroups();\r
+               \r
+               filehandles.resize(groups.size());\r
+        for (int i = 0; i < filehandles.size(); i++) {\r
+            for (int j = 0; j < 1; j++) {\r
+                \r
+                map<string, string> variables;\r
+                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
+                variables["[group]"] = groups[i];\r
+                string thisFilename = getOutputFileName("sff",variables);\r
+                outputNames.push_back(thisFilename);\r
+                outputTypes["sff"].push_back(thisFilename);\r
+               \r
+                ofstream temp;\r
+                m->openOutputFileBinary(thisFilename, temp); temp.close();\r
+                filehandles[i].push_back(thisFilename);\r
+                barcodes[groups[i]] = i;\r
+            }\r
+        }\r
+        \r
+        map<string, string> variables;\r
+        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
+        variables["[group]"] = "scrap";\r
+               noMatchFile = getOutputFileName("sff",variables);\r
+        m->mothurRemove(noMatchFile);\r
+        numNoMatch = 0;\r
+        \r
+               \r
+        filehandlesHeaders.resize(groups.size());\r
+        numSplitReads.resize(filehandles.size());\r
+        for (int i = 0; i < filehandles.size(); i++) {\r
+            numSplitReads[i].resize(filehandles[i].size(), 0);\r
+            for (int j = 0; j < filehandles[i].size(); j++) {\r
+                ofstream temp ;\r
+                string thisHeader = filehandles[i][j]+"headers";\r
+                m->openOutputFileBinary(thisHeader, temp); temp.close();\r
+                filehandlesHeaders[i].push_back(thisHeader);\r
+            }\r
+        }\r
+               \r
+               return true;\r
+               \r
+       }\r
+       catch(exception& e) {\r
+               m->errorOut(e, "SffInfoCommand", "readGroup");\r
+               exit(1);\r
+       }\r
+}\r
+\r
 //********************************************************************/\r
 string SffInfoCommand::reverseOligo(string oligo){\r
        try {\r
 //********************************************************************/\r
 string SffInfoCommand::reverseOligo(string oligo){\r
        try {\r
index 541c8bad0b262af72b7637629806643bf6991b78..1909e2d43e6d83b3d48ef06922b2d369a0e104d5 100644 (file)
@@ -11,7 +11,7 @@
  */
 
 #include "command.hpp"
  */
 
 #include "command.hpp"
-
+#include "groupmap.h"
 
 /**********************************************************/
 struct CommonHeader {
 
 /**********************************************************/
 struct CommonHeader {
@@ -79,17 +79,18 @@ public:
        void help() { m->mothurOut(getHelpString()); }  
        
 private:
        void help() { m->mothurOut(getHelpString()); }  
        
 private:
-       string sffFilename, sfftxtFilename, outputDir, accnosName, currentFileName, oligosfile, noMatchFile;
-       vector<string> filenames, outputNames, accnosFileNames, oligosFileNames;
-       bool abort, fasta, qual, trim, flow, sfftxt, hasAccnos, hasOligos;
+       string sffFilename, sfftxtFilename, outputDir, accnosName, currentFileName, oligosfile, noMatchFile, groupfile;
+       vector<string> filenames, outputNames, accnosFileNames, oligosFileNames, groupFileNames;
+       bool abort, fasta, qual, trim, flow, sfftxt, hasAccnos, hasOligos, hasGroup;
        int mycount, split, numFPrimers, numLinkers, numSpacers, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, numNoMatch;
        set<string> seqNames;
     map<string, int> barcodes;
     map<string, int> primers;
        int mycount, split, numFPrimers, numLinkers, numSpacers, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, numNoMatch;
        set<string> seqNames;
     map<string, int> barcodes;
     map<string, int> primers;
+    GroupMap* groupMap;
     vector<string> linker, spacer, primerNameVector, barcodeNameVector, revPrimer;
     vector<vector<int> > numSplitReads;
     vector<string> linker, spacer, primerNameVector, barcodeNameVector, revPrimer;
     vector<vector<int> > numSplitReads;
-    vector<vector<map<string, ofstream*> > > filehandles;
-    vector<vector<map<string, ofstream*> > > filehandlesHeaders;
+    vector<vector<string> > filehandles;
+    vector<vector<string> > filehandlesHeaders;
     
        //extract sff file functions
        int extractSffInfo(string, string, string);
     
        //extract sff file functions
        int extractSffInfo(string, string, string);
@@ -98,6 +99,7 @@ private:
        bool readSeqData(ifstream&, seqRead&, int, Header&);
        int decodeName(string&, string&, string&, string);
     bool readOligos(string oligosFile);
        bool readSeqData(ifstream&, seqRead&, int, Header&);
        int decodeName(string&, string&, string&, string);
     bool readOligos(string oligosFile);
+    bool readGroup(string oligosFile);
        
        int printCommonHeader(ofstream&, CommonHeader&); 
        int printHeader(ofstream&, Header&);
        
        int printCommonHeader(ofstream&, CommonHeader&); 
        int printHeader(ofstream&, Header&);
@@ -110,6 +112,7 @@ private:
        bool sanityCheck(Header&, seqRead&);
     int adjustCommonHeader(CommonHeader);
     int findGroup(Header header, seqRead read, int& barcode, int& primer);
        bool sanityCheck(Header&, seqRead&);
     int adjustCommonHeader(CommonHeader);
     int findGroup(Header header, seqRead read, int& barcode, int& primer);
+    int findGroup(Header header, seqRead read, int& barcode, int& primer, string);
     string reverseOligo(string oligo);
     
        //parsesfftxt file functions
     string reverseOligo(string oligo);
     
        //parsesfftxt file functions
index c291ddda5369fcb1a87a3420867934e55acc61aa..65ccda469c006305c7357310c7f344f0d2d4c326 100644 (file)
@@ -7,14 +7,25 @@
 //
 
 #include "sracommand.h"
 //
 
 #include "sracommand.h"
+#include "sffinfocommand.h"
+#include "parsefastaqcommand.h"
 
 //**********************************************************************************************************************
 vector<string> SRACommand::setParameters(){
        try {
 
 //**********************************************************************************************************************
 vector<string> SRACommand::setParameters(){
        try {
-        CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","sra",false,false); parameters.push_back(psff);
-               CommandParameter pfastqfile("fastqfile", "InputTypes", "", "", "none", "none", "none","sra",false,false); parameters.push_back(pfastqfile);
+        CommandParameter psff("sff", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","sra",false,false); parameters.push_back(psff);
+        CommandParameter pgroup("group", "InputTypes", "", "", "groupOligos", "none", "none","sra",false,false); parameters.push_back(pgroup);
+        CommandParameter poligos("oligos", "InputTypes", "", "", "groupOligos", "none", "none","sra",false,false); parameters.push_back(poligos);
+        CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","sra",false,false); parameters.push_back(pfile);
+               CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","sra",false,false); parameters.push_back(pfastq);
         //choose only one multiple options
         CommandParameter pplatform("platform", "Multiple", "454-???-???", "454", "", "", "","",false,false); parameters.push_back(pplatform);
         //choose only one multiple options
         CommandParameter pplatform("platform", "Multiple", "454-???-???", "454", "", "", "","",false,false); parameters.push_back(pplatform);
+        CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
+               CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
+        CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
+               CommandParameter psdiffs("sdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(psdiffs);
+        CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
+        
          //every command must have inputdir and outputdir.  This allows mothur users to redirect input and output files.
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
          //every command must have inputdir and outputdir.  This allows mothur users to redirect input and output files.
                CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
                CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
@@ -35,6 +46,12 @@ string SRACommand::getHelpString(){
                helpString += "The sra command creates a sequence read archive from sff or fastq files.\n";
                helpString += "The sra command parameters are: sff, fastqfiles, oligos, platform....\n";
                helpString += "The sffiles parameter is used to provide a file containing a \n";
                helpString += "The sra command creates a sequence read archive from sff or fastq files.\n";
                helpString += "The sra command parameters are: sff, fastqfiles, oligos, platform....\n";
                helpString += "The sffiles parameter is used to provide a file containing a \n";
+        helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
+               helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
+               helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
+        helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
+               helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
+
                helpString += "The new command should be in the following format: \n";
                helpString += "new(...)\n";
                return helpString;
                helpString += "The new command should be in the following format: \n";
                helpString += "new(...)\n";
                return helpString;
@@ -102,34 +119,95 @@ SRACommand::SRACommand(string option)  {
                        else {
             
                 string path;
                        else {
             
                 string path;
-                               it = parameters.find("sfffiles");
+                               it = parameters.find("sff");
                                //user has given a template file
                                if(it != parameters.end()){
                                        path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                //user has given a template file
                                if(it != parameters.end()){
                                        path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
-                                       if (path == "") {       parameters["sfffiles"] = inputDir + it->second;         }
+                                       if (path == "") {       parameters["sff"] = inputDir + it->second;              }
                                }
                                
                                }
                                
-                               it = parameters.find("fastqfiles");
+                               it = parameters.find("fastq");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["fastq"] = inputDir + it->second;            }
+                               }
+                
+                it = parameters.find("file");
                                //user has given a template file
                                if(it != parameters.end()){
                                        path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                //user has given a template file
                                if(it != parameters.end()){
                                        path = m->hasPath(it->second);
                                        //if the user has not given a path then, add inputdir. else leave path alone.
-                                       if (path == "") {       parameters["fastqfiles"] = inputDir + it->second;               }
+                                       if (path == "") {       parameters["file"] = inputDir + it->second;             }
+                               }
+                
+                it = parameters.find("group");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["group"] = inputDir + it->second;            }
+                               }
+                
+                it = parameters.find("oligos");
+                               //user has given a template file
+                               if(it != parameters.end()){
+                                       path = m->hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["oligos"] = inputDir + it->second;           }
                                }
             }
             
                        //check for parameters
                                }
             }
             
                        //check for parameters
-            fastqfiles = validParameter.validFile(parameters, "fastqfiles", true);
-                       if (fastqfiles == "not open") { fastqfiles = "";  abort = true; }
-                       else if (fastqfiles == "not found") { fastqfiles = ""; }
+            fastqfile = validParameter.validFile(parameters, "fastq", true);
+                       if (fastqfile == "not open") { fastqfile = "";  abort = true; }
+                       else if (fastqfile == "not found") { fastqfile = ""; }
                        
                        
-                       sfffiles = validParameter.validFile(parameters, "sfffiles", true);
-                       if (sfffiles == "not open") {  sfffiles = "";  abort = true; }
-                       else if (sfffiles == "not found") { sfffiles = ""; }
+                       sfffile = validParameter.validFile(parameters, "sff", true);
+                       if (sfffile == "not open") {  sfffile = "";  abort = true; }
+                       else if (sfffile == "not found") { sfffile = ""; }
+            
+            file = validParameter.validFile(parameters, "file", true);
+                       if (file == "not open") {  file = "";  abort = true; }
+                       else if (file == "not found") { file = ""; }
+            
+            groupfile = validParameter.validFile(parameters, "group", true);
+                       if (groupfile == "not open") {  groupfile = "";  abort = true; }
+                       else if (groupfile == "not found") { groupfile = ""; }
+            else {  m->setGroupFile(groupfile); }
+            
+            oligosfile = validParameter.validFile(parameters, "oligos", true);
+                       if (oligosfile == "not found")      {   oligosfile = "";        }
+                       else if(oligosfile == "not open")       {       abort = true;           }
+                       else {  m->setOligosFile(oligosfile); }
+            
+            
+            file = validParameter.validFile(parameters, "file", true);
+                       if (file == "not open") {  file = "";  abort = true; }
+                       else if (file == "not found") { file = ""; }
                        
                        
-                       if ((fastqfiles == "") && (sfffiles == "")) {
-                m->mothurOut("No valid current files. You must provide a sfffiles or fastqfiles file before you can use the sra command."); m->mothurOutEndLine(); abort = true;
+                       if ((fastqfile == "") && (sfffile == "") && (sfffile == "")) {
+                m->mothurOut("[ERROR]: You must provide a file, sff file or fastq file before you can use the sra command."); m->mothurOutEndLine(); abort = true;
+            }
+            
+            if ((groupfile != "") && (oligosfile != "")) {
+                m->mothurOut("[ERROR]: You may not use a group file and an oligos file, only one."); m->mothurOutEndLine(); abort = true;
+            }
+            
+            if ((fastqfile != "") || (sfffile != "")) {
+                if ((groupfile == "") && (oligosfile == "")) {
+                    oligosfile = m->getOligosFile();
+                                       if (oligosfile != "") {  m->mothurOut("Using " + oligosfile + " as input file for the oligos parameter."); m->mothurOutEndLine(); }
+                                       else {
+                                               groupfile = m->getGroupFile();
+                        if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
+                        else {
+                            m->mothurOut("[ERROR]: You must provide groupfile or oligos file if splitting a fastq or sff file."); m->mothurOutEndLine(); abort = true;
+                        }
+                                       }
+                }
             }
                                    
             //use only one Mutliple type
             }
                                    
             //use only one Mutliple type
@@ -139,6 +217,25 @@ SRACommand::SRACommand(string option)  {
                        if ((platform == "454") || (platform == "????") || (platform == "????") || (platform == "????")) { }
                        else { m->mothurOut("Not a valid platform option.  Valid platform options are 454, ...."); m->mothurOutEndLine(); abort = true; }
             
                        if ((platform == "454") || (platform == "????") || (platform == "????") || (platform == "????")) { }
                        else { m->mothurOut("Not a valid platform option.  Valid platform options are 454, ...."); m->mothurOutEndLine(); abort = true; }
             
+            
+            string temp = validParameter.validFile(parameters, "bdiffs", false);               if (temp == "not found"){       temp = "0";             }
+                       m->mothurConvert(temp, bdiffs);
+                       
+                       temp = validParameter.validFile(parameters, "pdiffs", false);           if (temp == "not found"){       temp = "0";             }
+                       m->mothurConvert(temp, pdiffs);
+                       
+            temp = validParameter.validFile(parameters, "ldiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, ldiffs);
+            
+            temp = validParameter.validFile(parameters, "sdiffs", false);              if (temp == "not found") { temp = "0"; }
+                       m->mothurConvert(temp, sdiffs);
+                       
+                       temp = validParameter.validFile(parameters, "tdiffs", false);           if (temp == "not found") { int tempTotal = pdiffs + bdiffs + ldiffs + sdiffs;  temp = toString(tempTotal); }
+                       m->mothurConvert(temp, tdiffs);
+                       
+                       if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
+            
+
                                
                }
                
                                
                }
                
@@ -149,13 +246,20 @@ SRACommand::SRACommand(string option)  {
        }
 }
 //**********************************************************************************************************************
        }
 }
 //**********************************************************************************************************************
-
 int SRACommand::execute(){
        try {
                
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
         
 int SRACommand::execute(){
        try {
                
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
         
-
+        //parse files
+        vector<string> filesBySample;
+        
+        if (file != "")             {       readFile(filesBySample);        }
+        else if (sfffile != "")     {       parseSffFile(filesBySample);    }
+        else if (fastqfile != "")   {       parseFastqFile(filesBySample);  }
+        
+        
+        
                
         //output files created by command
                m->mothurOutEndLine();
                
         //output files created by command
                m->mothurOutEndLine();
@@ -171,5 +275,96 @@ int SRACommand::execute(){
        }
 }
 //**********************************************************************************************************************
        }
 }
 //**********************************************************************************************************************
+int SRACommand::readFile(vector<string>& files){
+       try {
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SRACommand", "readFile");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int SRACommand::parseSffFile(vector<string>& files){
+       try {
+        //run sffinfo to parse sff file into individual sampled sff files
+        string commandString = "sff=" + sfffile;
+        if (groupfile != "") { commandString += ", group=" + groupfile; }
+        else if (oligosfile != "") {
+            commandString += ", oligos=" + oligosfile;
+            //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
+            if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
+            if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
+            if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
+            if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
+            if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
+        }
+        m->mothurOutEndLine();
+        m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+        m->mothurOut("Running command: sffinfo(" + commandString + ")"); m->mothurOutEndLine();
+        m->mothurCalling = true;
+        
+        Command* sffinfoCommand = new SffInfoCommand(commandString);
+        sffinfoCommand->execute();
+        
+        map<string, vector<string> > filenames = sffinfoCommand->getOutputFiles();
+        map<string, vector<string> >::iterator it = filenames.find("sff");
+        if (it != filenames.end()) { files = it->second; }
+        else { m->control_pressed = true; } // error in sffinfo
+        
+        delete sffinfoCommand;
+        m->mothurCalling = false;
+        m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SRACommand", "readFile");
+               exit(1);
+       }
+}
+
+//**********************************************************************************************************************
+int SRACommand::parseFastqFile(vector<string>& files){
+       try {
+        
+        //run sffinfo to parse sff file into individual sampled sff files
+        string commandString = "fastq=" + fastqfile;
+        if (groupfile != "") { commandString += ", group=" + groupfile; }
+        else if (oligosfile != "") {
+            commandString += ", oligos=" + oligosfile;
+            //add in pdiffs, bdiffs, ldiffs, sdiffs, tdiffs
+            if (pdiffs != 0) { commandString += ", pdiffs=" + toString(pdiffs); }
+            if (bdiffs != 0) { commandString += ", bdiffs=" + toString(bdiffs); }
+            if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
+            if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
+            if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
+        }
+        m->mothurOutEndLine();
+        m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+        m->mothurOut("Running command: fastq.info(" + commandString + ")"); m->mothurOutEndLine();
+        m->mothurCalling = true;
+        
+        Command* fastqinfoCommand = new ParseFastaQCommand(commandString);
+        fastqinfoCommand->execute();
+        
+        map<string, vector<string> > filenames = fastqinfoCommand->getOutputFiles();
+        map<string, vector<string> >::iterator it = filenames.find("fastq");
+        if (it != filenames.end()) { files = it->second; }
+        else { m->control_pressed = true; } // error in sffinfo
+        
+        delete fastqinfoCommand;
+        m->mothurCalling = false;
+        m->mothurOut("/******************************************/"); m->mothurOutEndLine();
+        
+        return 0;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "SRACommand", "readFile");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 
 
 
 
index 0f0a945d6118ad7ed17c4f08c7e2dce1dfa45bb0..7bf764b640534d11b5d40242d210bc77064a00de 100644 (file)
@@ -35,8 +35,14 @@ public:
     
 private:
     bool abort;
     
 private:
     bool abort;
-    string sfffiles, fastqfiles, platform, outputDir;
+    int tdiffs, bdiffs, pdiffs, sdiffs, ldiffs;
+    string sfffile, fastqfile, platform, outputDir, groupfile, file, oligosfile;
     vector<string> outputNames;
     vector<string> outputNames;
+    
+    int readFile(vector<string>&);
+    int parseSffFile(vector<string>&);
+    int parseFastqFile(vector<string>&);
+    
 };
 
 /**************************************************************************************************/
 };
 
 /**************************************************************************************************/