]> git.donarmstrong.com Git - mothur.git/commitdiff
added oligos class. added check orient parameter to trim.flows, sffinfo, fastq.info...
authorSarah Westcott <mothur.westcott@gmail.com>
Wed, 9 Apr 2014 13:13:12 +0000 (09:13 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Wed, 9 Apr 2014 13:13:12 +0000 (09:13 -0400)
27 files changed:
Mothur.xcodeproj/project.pbxproj
aligncommand.cpp
getmimarkspackagecommand.cpp
getmimarkspackagecommand.h
makecontigscommand.cpp
makecontigscommand.h
mothur.h
mothurout.cpp
mothurout.h
oligos.cpp [new file with mode: 0644]
oligos.h [new file with mode: 0644]
parsefastaqcommand.cpp
parsefastaqcommand.h
pcrseqscommand.h
prcseqscommand.cpp
qualityscores.cpp
qualityscores.h
sffinfocommand.cpp
sffinfocommand.h
sracommand.cpp
sracommand.h
trimflowscommand.cpp
trimflowscommand.h
trimoligos.cpp
trimoligos.h
trimseqscommand.cpp
trimseqscommand.h

index 0448c02cbbc5d21fc8c353c1cdb60bb9c7ad08c8..57b6624d0e7ecf0f95f4a74d74fb58b93577435b 100644 (file)
@@ -10,6 +10,7 @@
                219C1DE01552C4BD004209F9 /* newcommandtemplate.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 219C1DDF1552C4BD004209F9 /* newcommandtemplate.cpp */; };
                219C1DE41559BCCF004209F9 /* getcoremicrobiomecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 219C1DE31559BCCD004209F9 /* getcoremicrobiomecommand.cpp */; };
                483C952E188F0CAD0035E7B7 /* sharedrjsd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 483C952D188F0CAD0035E7B7 /* sharedrjsd.cpp */; };
+               4893DE2918EEF28100C615DF /* oligos.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 4893DE2818EEF28100C615DF /* oligos.cpp */; };
                48A85BAD18E1AF2000199B6F /* getmimarkspackagecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48A85BAC18E1AF2000199B6F /* getmimarkspackagecommand.cpp */; };
                48E981CF189C38FB0042BE9D /* mergesfffilecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 48E981CE189C38FB0042BE9D /* mergesfffilecommand.cpp */; };
                7E6BE10A12F710D8007ADDBE /* refchimeratest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 7E6BE10912F710D8007ADDBE /* refchimeratest.cpp */; };
                219C1DE51559BCF2004209F9 /* getcoremicrobiomecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getcoremicrobiomecommand.h; sourceTree = "<group>"; };
                483C952C188F0C960035E7B7 /* sharedrjsd.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sharedrjsd.h; sourceTree = "<group>"; };
                483C952D188F0CAD0035E7B7 /* sharedrjsd.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sharedrjsd.cpp; sourceTree = "<group>"; };
+               4893DE2718EEF27300C615DF /* oligos.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = oligos.h; sourceTree = "<group>"; };
+               4893DE2818EEF28100C615DF /* oligos.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = oligos.cpp; sourceTree = "<group>"; };
                48A85BAB18E1AF0600199B6F /* getmimarkspackagecommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = getmimarkspackagecommand.h; sourceTree = "<group>"; };
                48A85BAC18E1AF2000199B6F /* getmimarkspackagecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getmimarkspackagecommand.cpp; sourceTree = "<group>"; };
                48E981CD189C38E60042BE9D /* mergesfffilecommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = mergesfffilecommand.h; sourceTree = "<group>"; };
                                A7E9B74012D37EC400DA6239 /* listvector.hpp */,
                                A7E9B75F12D37EC400DA6239 /* nameassignment.cpp */,
                                A7E9B76012D37EC400DA6239 /* nameassignment.hpp */,
+                               4893DE2718EEF27300C615DF /* oligos.h */,
+                               4893DE2818EEF28100C615DF /* oligos.cpp */,
                                A7E9B77712D37EC400DA6239 /* ordervector.cpp */,
                                A7E9B77812D37EC400DA6239 /* ordervector.hpp */,
                                A7E9B79F12D37EC400DA6239 /* qualityscores.cpp */,
                                A7E9B8DC12D37EC400DA6239 /* gotohoverlap.cpp in Sources */,
                                A7E9B8DD12D37EC400DA6239 /* gower.cpp in Sources */,
                                A7E9B8DE12D37EC400DA6239 /* groupmap.cpp in Sources */,
+                               4893DE2918EEF28100C615DF /* oligos.cpp in Sources */,
                                A7E9B8DF12D37EC400DA6239 /* hamming.cpp in Sources */,
                                A7E9B8E012D37EC400DA6239 /* hcluster.cpp in Sources */,
                                A7E9B8E112D37EC400DA6239 /* hclustercommand.cpp in Sources */,
index 3222f8c2d72a32bef9898eb71dd9d270056de0c3..bcc59675d3b545f68197a3f56911a769ebe249bb 100644 (file)
@@ -110,7 +110,7 @@ AlignCommand::AlignCommand(){
 //**********************************************************************************************************************
 AlignCommand::AlignCommand(string option)  {
        try {
-               abort = false; calledHelp = false;  
+               abort = false; calledHelp = false;
                ReferenceDB* rdb = ReferenceDB::getInstance();
                
                //allow user to run help
@@ -860,6 +860,8 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
        try {
                int num = 0;
                processIDS.resize(0);
+        bool recalc = false;
+        
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 1;
                
@@ -882,12 +884,48 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
                                
                                exit(0);
                        }else { 
-                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
-                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
-                               exit(0);
+                               m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(process) + "\n"); processors = process;
+                for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                recalc = true;
+                               break;
                        }
                }
                
+        if (recalc) {
+            for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+            vector<unsigned long long> positions;
+                       positions = m->divideFile(filename, processors);
+                       for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(new linePair(positions[i], positions[(i+1)]));  }
+            
+            num = 0;
+            processIDS.resize(0);
+            process = 1;
+            
+            while (process != processors) {
+                pid_t pid = fork();
+                
+                if (pid > 0) {
+                    processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                    process++;
+                }else if (pid == 0){
+                    num = driver(lines[process], alignFileName + toString(m->mothurGetpid(process)) + ".temp", reportFileName + toString(m->mothurGetpid(process)) + ".temp", accnosFName + m->mothurGetpid(process) + ".temp", filename);
+                    
+                    //pass numSeqs to parent
+                    ofstream out;
+                    string tempFile = alignFileName + toString(m->mothurGetpid(process)) + ".num.temp";
+                    m->openOutputFile(tempFile, out);
+                    out << num << endl;
+                    out.close();
+                    
+                    exit(0);
+                }else {
+                    m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+                    for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                    exit(0);
+                }
+            }
+        }
+        
                //do my part
                num = driver(lines[0], alignFileName, reportFileName, accnosFName, filename);
                
index 955cb794a2d5a579abd9da5358195713654ed9cb..67194cea33f77f8853e3668aa5ed39284ca0db26 100644 (file)
@@ -9,6 +9,7 @@
 #include "getmimarkspackagecommand.h"
 #include "groupmap.h"
 
+
 //**********************************************************************************************************************
 vector<string> GetMIMarksPackageCommand::setParameters(){
        try {
@@ -106,7 +107,7 @@ GetMIMarksPackageCommand::GetMIMarksPackageCommand(string option)  {
                        outputTypes["tsv"] = tempOutNames;
             
                        //if the user changes the input directory command factory will send this info to us in the output parameter
-                       string inputDir = validParameter.validFile(parameters, "inputdir", false);
+                       inputDir = validParameter.validFile(parameters, "inputdir", false);
                        if (inputDir == "not found"){   inputDir = "";          }
                        else {
                 
@@ -192,12 +193,10 @@ int GetMIMarksPackageCommand::execute(){
                
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
         
-        if (oligosfile != "") { readOligos();   }
+        if (oligosfile != "") { Oligos oligos(oligosfile); Groups = oligos.getGroupNames();  }
         else if (file != "")  { readFile();     }
         else {  GroupMap groupmap(groupfile); groupmap.readMap(); Groups = groupmap.getNamesOfGroups(); }
         
-        for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) {  Groups.push_back(*it); }
-        
         if (outputDir == "") { outputDir += m->hasPath(inputfile); }
         map<string, string> variables;
                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputfile));
@@ -332,155 +331,7 @@ int GetMIMarksPackageCommand::execute(){
        }
 }
 //***************************************************************************************************************
-int GetMIMarksPackageCommand::readOligos(){
-       try {
-               ifstream inOligos;
-               m->openInputFile(oligosfile, inOligos);
-               
-               string type, oligo, roligo, group;
-        vector<string> primerNameVector, barcodeNameVector;
-        set<string> uniquePrimers;
-        set<string> uniqueBarcodes;
-               
-               while(!inOligos.eof()){
-            
-                       inOligos >> type;
-            
-                       if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
-            
-                       if(type[0] == '#'){
-                               while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
-                               m->gobble(inOligos);
-                       }
-                       else{
-                               m->gobble(inOligos);
-                               //make type case insensitive
-                               for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
-                               
-                               inOligos >> oligo;
-                
-                if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
-                               
-                               for(int i=0;i<oligo.length();i++){
-                                       oligo[i] = toupper(oligo[i]);
-                                       if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
-                               }
-                               
-                               if(type == "FORWARD"){
-                                       group = "";
-                                       
-                                       // get rest of line in case there is a primer name
-                                       while (!inOligos.eof()) {
-                                               char c = inOligos.get();
-                                               if (c == 10 || c == 13 || c == -1){     break;  }
-                                               else if (c == 32 || c == 9){;} //space or tab
-                                               else {  group += c;  }
-                                       }
-                                       
-                                       primerNameVector.push_back(group);
-                               }
-                else if (type == "PRIMER"){
-                    m->gobble(inOligos);
-                                       
-                    inOligos >> roligo;
-                    
-                    for(int i=0;i<roligo.length();i++){
-                        roligo[i] = toupper(roligo[i]);
-                        if(roligo[i] == 'U')   {       roligo[i] = 'T';        }
-                    }
-                    
-                    group = "";
-                    
-                                       // get rest of line in case there is a primer name
-                                       while (!inOligos.eof()) {
-                                               char c = inOligos.get();
-                                               if (c == 10 || c == 13 || c == -1){     break;  }
-                                               else if (c == 32 || c == 9){;} //space or tab
-                                               else {  group += c;  }
-                                       }
-                    
-                                       primerNameVector.push_back(group);
-                }else if(type == "BARCODE"){
-                                       inOligos >> group;
-                    
-                    //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
-                    //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
-                    
-                    string temp = "";
-                    while (!inOligos.eof())    {
-                                               char c = inOligos.get();
-                                               if (c == 10 || c == 13 || c == -1){     break;  }
-                                               else if (c == 32 || c == 9){;} //space or tab
-                                               else {  temp += c;  }
-                                       }
-                                       
-                    //then this is illumina data with 4 columns
-                    if (temp != "") {
-                        
-                        string reverseBarcode = group; //reverseOligo(group); //reverse barcode
-                        group = temp;
-                        
-                        barcodeNameVector.push_back(group);
-                    }else {
-                        barcodeNameVector.push_back(group);
-                    }
-                               }
-                       }
-                       m->gobble(inOligos);
-               }
-               inOligos.close();
-        
-               //add in potential combos
-               if(barcodeNameVector.size() == 0){
-                       barcodeNameVector.push_back("");
-               }
-               
-               if(primerNameVector.size() == 0){
-                       primerNameVector.push_back("");
-               }
-        
-        
-        for(int i = 0; i <  barcodeNameVector.size(); i++){
-            for(int j = 0; j < primerNameVector.size(); j++){
-                
-                string primerName = primerNameVector[j];
-                string barcodeName = barcodeNameVector[i];
-                
-                if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
-                else if ((primerName == "") && (barcodeName == "")) { }
-                else {
-                    string comboGroupName = "";
-                    
-                    if(primerName == ""){
-                        comboGroupName = barcodeNameVector[i];
-                    }
-                    else{
-                        if(barcodeName == ""){
-                            comboGroupName = primerNameVector[j];
-                        }
-                        else{
-                            comboGroupName = barcodeNameVector[i] + "." + primerNameVector[j];
-                        }
-                    }
-                    uniqueNames.insert(comboGroupName);
-                }
-            }
-        }
-        
-        
-        
-        if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
-        
-        
-               return true;
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "GetMIMarksPackageCommand", "readOligos");
-               exit(1);
-       }
-}
-//**********************************************************************************************************************
+
 // going to have to rework this to allow for other options --
 /*
  file option 1
@@ -506,7 +357,7 @@ int GetMIMarksPackageCommand::readOligos(){
 
 int GetMIMarksPackageCommand::readFile(){
        try {
-        //vector<string> theseFiles;
+        Oligos oligos;
         inputfile = file;
         
         ifstream in;
@@ -534,6 +385,14 @@ int GetMIMarksPackageCommand::readFile(){
             
             if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", thisFileName1 = " + thisFileName1 + ", thisFileName2 = " + thisFileName2  + ".\n"); }
             
+            if (inputDir != "") {
+                string path = m->hasPath(thisFileName2);
+                if (path == "") {  thisFileName2 = inputDir + thisFileName2;  }
+                
+                path = m->hasPath(thisFileName1);
+                if (path == "") {  thisFileName1 = inputDir + thisFileName1;  }
+            }
+            
             //check to make sure both are able to be opened
             ifstream in2;
             int openForward = m->openInputFile(thisFileName1, in2, "noerror");
@@ -603,13 +462,15 @@ int GetMIMarksPackageCommand::readFile(){
             if ((pieces.size() == 2) && (openForward != 1) && (openReverse != 1)) { //good pair and sff or fastq and oligos
                     oligosfile = thisFileName2;
                     if (m->debug) { m->mothurOut("[DEBUG]: about to read oligos\n"); }
-                    readOligos();
+                    oligos.read(oligosfile);
             }else if((pieces.size() == 3) && (openForward != 1) && (openReverse != 1)) { //good pair and paired read
                 Groups.push_back(group);
             }
         }
         in.close();
         
+        Groups = oligos.getGroupNames();
+        
         inputfile = file;
         
         return 0;
index 8bd2e4973c624eaa83d735c349a833181a39daea..65219d7758b0da0e1f795fb70f957bca5befad64 100644 (file)
@@ -10,6 +10,7 @@
 #define Mothur_getmimarkspackagecommand_h
 
 #include "command.hpp"
+#include "oligos.h"
 
 /**************************************************************************************************/
 
@@ -33,12 +34,10 @@ public:
     
 private:
     bool abort, requiredonly;
-    string oligosfile, groupfile, package, inputfile, file;
+    string oligosfile, groupfile, package, inputfile, file, inputDir;
     string outputDir;
     vector<string> outputNames, Groups;
-    set<string> uniqueNames;
     
-    int readOligos();
     int readFile();
 };
 
index 15f002bffdbdc96e65ba18522955d500aa0fa8b0..0d563fe846d2179ea7ca9565e05122847747d468 100644 (file)
@@ -24,7 +24,7 @@ vector<string> MakeContigsCommand::setParameters(){
                CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(ppdiffs);
                CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false,true); parameters.push_back(pbdiffs);
         CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
-
+        CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
         CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "","",false,false); parameters.push_back(palign);
         CommandParameter pallfiles("allfiles", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pallfiles);
         CommandParameter ptrimoverlap("trimoverlap", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ptrimoverlap);
@@ -71,6 +71,7 @@ string MakeContigsCommand::getHelpString(){
                //helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
                helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
                helpString += "The mistmatch parameter allows you to specify the penalty for having different bases.  The default is -1.0.\n";
+        helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. If found the sequence is flipped. The default is false.\n";
         helpString += "The deltaq parameter allows you to specify the delta allowed between quality scores of a mismatched base.  For example in the overlap, if deltaq=5 and in the alignment seqA, pos 200 has a quality score of 30 and the same position in seqB has a quality score of 20, you take the base from seqA (30-20 >= 5).  If the quality score in seqB is 28 then the base in the consensus will be an N (30-28<5) The default is 6.\n";
                helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
                helpString += "The gapextend parameter allows you to specify the penalty for extending a gap in an alignment.  The default is -1.0.\n";
@@ -153,7 +154,7 @@ MakeContigsCommand::MakeContigsCommand(string option)  {
                        
             
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
-                       string inputDir = validParameter.validFile(parameters, "inputdir", false);              
+                       inputDir = validParameter.validFile(parameters, "inputdir", false);             
                        if (inputDir == "not found"){   inputDir = "";          }
                        else { 
                                string path;
@@ -378,6 +379,9 @@ MakeContigsCommand::MakeContigsCommand(string option)  {
                                abort=true;
                        }
             
+            temp = validParameter.validFile(parameters, "checkorient", false);         if (temp == "not found") { temp = "F"; }
+                       reorient = m->isTrue(temp);
+            
             //fill convert table - goes from solexa to sanger. Used fq_all2std.pl as a reference.
             for (int i = -64; i < 65; i++) { 
                 char temp = (char) ((int)(33 + 10*log(1+pow(10,(i/10.0)))/log(10)+0.499));
@@ -440,14 +444,17 @@ int MakeContigsCommand::execute(){
             groupCounts.clear();
             groupMap.clear();
             vector<vector<string> > fastaFileNames;
+            map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
             createOligosGroup = false;
+            oligos = new Oligos();
+            numBarcodes = 0; numFPrimers= 0; numLinkers= 0; numSpacers = 0; numRPrimers = 0;
             string outputGroupFileName;
             map<string, string> variables; 
             string thisOutputDir = outputDir;
             if (outputDir == "") {  thisOutputDir = m->hasPath(filesToProcess[l][0][0]); }
             variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(filesToProcess[l][0][0]));
             variables["[tag]"] = "";
-            if(oligosfile != ""){  createOligosGroup = getOligos(fastaFileNames, variables["[filename]"]);  }
+            if(oligosfile != ""){  createOligosGroup = getOligos(fastaFileNames, variables["[filename]"], uniqueFastaNames);  }
             if (createOligosGroup || createFileGroup) {
                 outputGroupFileName = getOutputFileName("group",variables);
             }
@@ -468,10 +475,10 @@ int MakeContigsCommand::execute(){
             //remove temp fasta and qual files
             for (int i = 0; i < processors; i++) { for(int j = 0; j < filesToProcess[l][i].size(); j++) { m->mothurRemove(filesToProcess[l][i][j]); }  }
             
-            if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]); }  return 0; }
+            if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]); }  delete oligos; return 0; }
             
             if(allFiles){
-                map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
+                // so we don't add the same groupfile multiple times
                 map<string, string>::iterator it;
                 set<string> namesToRemove;
                 for(int i=0;i<fastaFileNames.size();i++){
@@ -481,11 +488,7 @@ int MakeContigsCommand::execute(){
                                 if(m->isBlank(fastaFileNames[i][j])){
                                     m->mothurRemove(fastaFileNames[i][j]);
                                     namesToRemove.insert(fastaFileNames[i][j]);
-                                }else{ 
-                                    it = uniqueFastaNames.find(fastaFileNames[i][j]);
-                                    if (it == uniqueFastaNames.end()) {        
-                                        uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i]; 
-                                    }  
+                                    uniqueFastaNames.erase(fastaFileNames[i][j]); //remove from list for group file print
                                 }
                             }
                         }
@@ -502,8 +505,8 @@ int MakeContigsCommand::execute(){
                     m->openInputFile(it->first, in);
                     
                     ofstream out;
-                    string thisGroupName = thisOutputDir + m->getRootName(m->getSimpleName(it->first));
-                    thisGroupName += getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
+                    variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(it->first));
+                    string thisGroupName = getOutputFileName("group",variables); outputNames.push_back(thisGroupName); outputTypes["group"].push_back(thisGroupName);
                     m->openOutputFile(thisGroupName, out);
                     
                     while (!in.eof()){
@@ -566,6 +569,7 @@ int MakeContigsCommand::execute(){
                 }
             }
             m->mothurOut("Done.\n");
+            delete oligos;
         }
         
         m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
@@ -822,7 +826,7 @@ int MakeContigsCommand::createProcesses(vector< vector<string> > files, string o
                 }
             }
                                 
-                       contigsData* tempcontig = new contigsData(group, files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, barcodes, primers, tempFASTAFileNames, barcodeNameVector, primerNameVector, pdiffs, bdiffs, tdiffs, createOligosGroup, createFileGroup, allFiles, trimOverlap, h);
+                       contigsData* tempcontig = new contigsData(group, files[h], (outputFasta + extension), (outputScrapFasta + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, insert, deltaq, tempFASTAFileNames, oligosfile, reorient, pdiffs, bdiffs, tdiffs, createOligosGroup, createFileGroup, allFiles, trimOverlap, h);
                        pDataArray.push_back(tempcontig);
             
                        hThreadArray[h] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
@@ -942,7 +946,12 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
         m->openOutputFile(outputMisMatches, outMisMatch);
         outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n";  
         
-        TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, primers, barcodes);
+        TrimOligos trimOligos(pdiffs, bdiffs, 0, 0, oligos->getPairedPrimers(), oligos->getPairedBarcodes());
+        
+        TrimOligos* rtrimOligos = NULL;
+        if (reorient) {
+            rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos->getReorientedPairedPrimers(), oligos->getReorientedPairedBarcodes()); numBarcodes = oligos->getReorientedPairedBarcodes().size();
+        }
         
         while ((!inFFasta.eof()) && (!inRFasta.eof())) {
             
@@ -973,8 +982,15 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
             
             int barcodeIndex = 0;
             int primerIndex = 0;
+            Sequence savedFSeq(fSeq.getName(), fSeq.getAligned());  Sequence savedRSeq(rSeq.getName(), rSeq.getAligned());
+            Sequence savedFindex(findexBarcode.getName(), findexBarcode.getAligned()); Sequence savedRIndex(rindexBarcode.getName(), rindexBarcode.getAligned());
+            QualityScores* savedFQual = NULL; QualityScores* savedRQual = NULL;
+            if (thisfqualfile != "") {
+                savedFQual = new QualityScores(fQual->getName(), fQual->getQualityScores());
+                savedRQual = new QualityScores(rQual->getName(), rQual->getQualityScores());
+            }
                         
-            if(barcodes.size() != 0){
+            if(numBarcodes != 0){
                 if (thisfqualfile != "") {
                     if ((thisfindexfile != "") || (thisrindexfile != "")) {
                         success = trimOligos.stripBarcode(findexBarcode, rindexBarcode, *fQual, *rQual, barcodeIndex);
@@ -988,7 +1004,7 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
                 else{ currentSeqsDiffs += success;  }
             }
             
-            if(primers.size() != 0){
+            if(numFPrimers != 0){
                 if (thisfqualfile != "") {
                     success = trimOligos.stripForward(fSeq, rSeq, *fQual, *rQual, primerIndex);
                 }else {
@@ -1000,6 +1016,58 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
             
             if (currentSeqsDiffs > tdiffs)     {       trashCode += 't';   }
             
+            if (reorient && (trashCode != "")) { //if you failed and want to check the reverse
+                int thisSuccess = 0;
+                string thisTrashCode = "";
+                int thisCurrentSeqsDiffs = 0;
+                
+                int thisBarcodeIndex = 0;
+                int thisPrimerIndex = 0;
+                
+                if(numBarcodes != 0){
+                    if (thisfqualfile != "") {
+                        if ((thisfindexfile != "") || (thisrindexfile != "")) {
+                            thisSuccess = rtrimOligos->stripBarcode(savedFindex, savedRIndex, *savedFQual, *savedRQual, thisBarcodeIndex);
+                        }else {
+                            thisSuccess = rtrimOligos->stripBarcode(savedFSeq, savedRSeq, *savedFQual, *savedRQual, thisBarcodeIndex);
+                        }
+                    }else {
+                        thisSuccess = rtrimOligos->stripBarcode(savedFSeq, savedRSeq, thisBarcodeIndex);
+                    }
+                    if(thisSuccess > bdiffs)           {       thisTrashCode += 'b';   }
+                    else{ thisCurrentSeqsDiffs += thisSuccess;  }
+                }
+
+                if(numFPrimers != 0){
+                    if (thisfqualfile != "") {
+                        thisSuccess = rtrimOligos->stripForward(savedFSeq, savedRSeq, *savedFQual, *savedRQual, thisPrimerIndex);
+                    }else {
+                        thisSuccess = rtrimOligos->stripForward(savedFSeq, savedRSeq, thisPrimerIndex);
+                    }
+                    if(thisSuccess > pdiffs)           {       thisTrashCode += 'f';   }
+                    else{ thisCurrentSeqsDiffs += thisSuccess;  }
+                }
+                
+                if (thisCurrentSeqsDiffs > tdiffs)     {       thisTrashCode += 't';   }
+                
+                if (thisTrashCode == "") {
+                    trashCode = thisTrashCode;
+                    success = thisSuccess;
+                    currentSeqsDiffs = thisCurrentSeqsDiffs;
+                    barcodeIndex = thisBarcodeIndex;
+                    primerIndex = thisPrimerIndex;
+                    savedFSeq.reverseComplement();
+                    savedRSeq.reverseComplement();
+                    fSeq.setAligned(savedFSeq.getAligned());
+                    rSeq.setAligned(savedRSeq.getAligned());
+                    if(thisfqualfile != ""){
+                        savedFQual->flipQScores(); savedRQual->flipQScores();
+                        fQual->setScores(savedFQual->getScores()); rQual->setScores(savedRQual->getScores());
+                    }
+                }else { trashCode += "(" + thisTrashCode + ")";  }
+            }
+
+            
             //flip the reverse reads
             rSeq.reverseComplement();
             if (thisfqualfile != "") { rQual->flipQScores(); }
@@ -1021,7 +1089,7 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
             if (thisfqualfile != "") {
                 scores1 = fQual->getQualityScores();
                 scores2 = rQual->getQualityScores();
-                delete fQual; delete rQual;
+                delete fQual; delete rQual;  delete savedFQual; delete savedRQual;
             }
             
             // if (num < 5) {  cout << fSeq.getStartPos() << '\t' << fSeq.getEndPos() << '\t' << rSeq.getStartPos() << '\t' << rSeq.getEndPos() << endl; }
@@ -1088,18 +1156,7 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
                 if (m->debug) { m->mothurOut(fSeq.getName()); }
                 
                 if (createOligosGroup) {
-                    if(barcodes.size() != 0){
-                        string thisGroup = barcodeNameVector[barcodeIndex];
-                        if (primers.size() != 0) { 
-                            if (primerNameVector[primerIndex] != "") { 
-                                if(thisGroup != "") {
-                                    thisGroup += "." + primerNameVector[primerIndex]; 
-                                }else {
-                                    thisGroup = primerNameVector[primerIndex]; 
-                                }
-                            } 
-                        }
-                        
+                        string thisGroup = oligos->getGroupName(barcodeIndex, primerIndex);
                         if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
                         
                         int pos = thisGroup.find("ignore");
@@ -1110,8 +1167,6 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
                             if (it == groupCounts.end()) {     groupCounts[thisGroup] = 1; }
                             else { groupCounts[it->first] ++; }
                         }else { ignore = true; }
-                        
-                    }
                 }else if (createFileGroup) {
                     int pos = group.find("ignore");
                     if (pos == string::npos) {
@@ -1123,7 +1178,7 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
                     }else { ignore = true; }
                 }
                 if (m->debug) { m->mothurOut("\n"); }
-                
+            
                 if(allFiles && !ignore){
                     ofstream output;
                     m->openOutputFileAppend(fastaFileNames[barcodeIndex][primerIndex], output);
@@ -1159,6 +1214,7 @@ int MakeContigsCommand::driver(vector<string> files, string outputFasta, string
             inRQual.close();
         }
         delete alignment;
+        if (reorient) { delete rtrimOligos; }
         
         if (m->control_pressed) {  m->mothurRemove(outputFasta); m->mothurRemove(outputScrapFasta);m->mothurRemove(outputMisMatches);  }
     
@@ -1749,6 +1805,24 @@ vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
             
             if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", forward = " + forward + ", reverse = " + reverse + ", forwardIndex = " + findex + ", reverseIndex = " + rindex + ".\n"); }
             
+            if (inputDir != "") {
+                string path = m->hasPath(forward);
+                if (path == "") {  forward = inputDir + forward;  }
+                
+                path = m->hasPath(reverse);
+                if (path == "") {  reverse = inputDir + reverse;  }
+                
+                if (findex != "") {
+                    path = m->hasPath(findex);
+                    if (path == "") {  findex = inputDir + findex;  }
+                }
+                
+                if (rindex != "") {
+                    path = m->hasPath(rindex);
+                    if (path == "") {  rindex = inputDir + rindex;  }
+                }
+            }
+            
             //check to make sure both are able to be opened
             ifstream in2;
             int openForward = m->openInputFile(forward, in2, "noerror");
@@ -1905,155 +1979,53 @@ vector< vector<string> > MakeContigsCommand::readFileNames(string filename){
 //illumina data requires paired forward and reverse data
 //BARCODE   atgcatgc   atgcatgc    groupName 
 //PRIMER   atgcatgc   atgcatgc    groupName  
-//PRIMER   atgcatgc   atgcatgc  
-bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, string rootname){
+//PRIMER   atgcatgc   atgcatgc
+bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, string rootname, map<string, string>& fastaFile2Group){
        try {
-               ifstream in;
-               m->openInputFile(oligosfile, in);
-               
-               ofstream test;
-               
-               string type, foligo, roligo, group;
+        if (m->debug) { m->mothurOut("[DEBUG]: oligosfile = " + oligosfile + "\n"); }
         
-               int indexPrimer = 0;
-               int indexBarcode = 0;
-        set<string> uniquePrimers;
-        set<string> uniqueBarcodes;
-               
-               while(!in.eof()){
-            
-                       in >> type; 
+        bool allBlank = false;
+        oligos->read(oligosfile, false);
+        
+        if (m->control_pressed) { return false; } //error in reading oligos
+        
+        if (oligos->hasPairedBarcodes()) {
+            numFPrimers = oligos->getPairedPrimers().size();
+            numBarcodes = oligos->getPairedBarcodes().size();
+        }else {
+            m->mothurOut("[ERROR]: make.contigs requires paired barcodes and primers. You can set one end to NONE if you are using an index file.\n"); m->control_pressed = true;
+        }
     
-                       if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
-            
-                       if(type[0] == '#'){
-                               while (!in.eof())       {       char c = in.get();  if (c == 10 || c == 13){    break;  }       } // get rest of line if there's any crap there
-                               m->gobble(in);
-                       }
-                       else{
-                               m->gobble(in);
-                               //make type case insensitive
-                               for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
-                               
-                               in >> foligo;
-                
-                if (m->debug) { m->mothurOut("[DEBUG]: reading - " + foligo + ".\n"); }
-                               
-                               for(int i=0;i<foligo.length();i++){
-                                       foligo[i] = toupper(foligo[i]);
-                                       if(foligo[i] == 'U')    {       foligo[i] = 'T';        }
-                               }
-                               
-                               if(type == "PRIMER"){
-                                       m->gobble(in);
-                                       
-                    in >> roligo;
-                    
-                    for(int i=0;i<roligo.length();i++){
-                        roligo[i] = toupper(roligo[i]);
-                        if(roligo[i] == 'U')   {       roligo[i] = 'T';        }
-                    }
-                    //roligo = reverseOligo(roligo);
-                    
-                    if (m->debug) { m->mothurOut("[DEBUG]: reading - " + roligo + ".\n"); }
-                    
-                    group = "";
-                    
-                                       // get rest of line in case there is a primer name
-                                       while (!in.eof())       {       
-                                               char c = in.get(); 
-                                               if (c == 10 || c == 13 || c == -1){     break;  }
-                                               else if (c == 32 || c == 9){;} //space or tab
-                                               else {  group += c;  }
-                                       } 
-                    
-                    oligosPair newPrimer(foligo, roligo);
-                    
-                    if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
-                    
-                                       //check for repeat barcodes
-                    string tempPair = foligo+roligo;
-                    if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
-                    else { uniquePrimers.insert(tempPair); }
-                                       
-                    if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
-                                       primers[indexPrimer]=newPrimer; indexPrimer++;          
-                                       primerNameVector.push_back(group);
-                               }else if(type == "BARCODE"){
-                                       m->gobble(in);
-                                       
-                    in >> roligo;
-                    
-                    for(int i=0;i<roligo.length();i++){
-                        roligo[i] = toupper(roligo[i]);
-                        if(roligo[i] == 'U')   {       roligo[i] = 'T';        }
-                    }
-                    //roligo = reverseOligo(roligo);
-                    
-                    oligosPair newPair(foligo, roligo);
-                    
-                    if ((foligo == "NONE") || (roligo == "NONE")) { if (!noneOk) { m->mothurOut("[ERROR]: barcodes must be paired unless you are using an index file.\n"); m->control_pressed = true; } }
-                    
-                    group = "";
-                    while (!in.eof())  {       
-                                               char c = in.get(); 
-                                               if (c == 10 || c == 13 || c == -1){     break;  }
-                                               else if (c == 32 || c == 9){;} //space or tab
-                                               else {  group += c;  }
-                                       } 
-                                       
-                    if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
-                        
-                    //check for repeat barcodes
-                    string tempPair = foligo+roligo;
-                    if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
-                    else { uniqueBarcodes.insert(tempPair); }
-                        
-                    barcodes[indexBarcode]=newPair; indexBarcode++;
-                                       barcodeNameVector.push_back(group);
-                               }else if(type == "LINKER"){
-                                       linker.push_back(foligo);
-                    m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n");
-                               }else if(type == "SPACER"){
-                                       spacer.push_back(foligo);
-                    m->mothurOut("[WARNING]: make.contigs is not setup to remove spacers, ignoring.\n");
-                               }
-                               else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are primer, barcode, linker and spacer. Ignoring " + foligo + "."); m->mothurOutEndLine(); }
-                       }
-                       m->gobble(in);
-               }       
-               in.close();
-               
-               if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
-               
-               //add in potential combos
-               if(barcodeNameVector.size() == 0){
-            oligosPair temp("", "");
-                       barcodes[0] = temp;
-                       barcodeNameVector.push_back("");                        
-               }
-               
-               if(primerNameVector.size() == 0){
-            oligosPair temp("", "");
-                       primers[0] = temp;
-                       primerNameVector.push_back("");                 
-               }
-               
-               fastaFileNames.resize(barcodeNameVector.size());
+        if (m->control_pressed) { return false; }
+    
+        numLinkers = oligos->getLinkers().size();
+        numSpacers = oligos->getSpacers().size();
+        numRPrimers = oligos->getReversePrimers().size();
+        if (numLinkers != 0) { m->mothurOut("[WARNING]: make.contigs is not setup to remove linkers, ignoring.\n"); }
+        if (numSpacers != 0) { m->mothurOut("[WARNING]: make.contigs is not setup to remove spacers, ignoring.\n"); }
+       
+        vector<string> groupNames = oligos->getGroupNames();
+        if (groupNames.size() == 0) { allFiles = 0; allBlank = true;  }
+        
+        
+        fastaFileNames.resize(oligos->getBarcodeNames().size());
                for(int i=0;i<fastaFileNames.size();i++){
-                       fastaFileNames[i].assign(primerNameVector.size(), "");
+            for(int j=0;j<oligos->getPrimerNames().size();j++){  fastaFileNames[i].push_back(""); }
                }
-               
-               if(allFiles){
-                       set<string> uniqueNames; //used to cleanup outputFileNames
-                       for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
-                               for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
-                                       
-                                       string primerName = primerNameVector[itPrimer->first];
-                                       string barcodeName = barcodeNameVector[itBar->first];
+    
+        if (allFiles) {
+            set<string> uniqueNames; //used to cleanup outputFileNames
+            map<int, oligosPair> barcodes = oligos->getPairedBarcodes();
+            map<int, oligosPair> primers = oligos->getPairedPrimers();
+            for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+                for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
                     
-                    if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing 
-                                       else {
+                    string primerName = oligos->getPrimerName(itPrimer->first);
+                    string barcodeName = oligos->getBarcodeName(itBar->first);
+                    
+                    if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+                    else if ((primerName == "") && (barcodeName == "")) { } //do nothing
+                    else {
                         string comboGroupName = "";
                         string fastaFileName = "";
                         string qualFileName = "";
@@ -2061,108 +2033,58 @@ bool MakeContigsCommand::getOligos(vector<vector<string> >& fastaFileNames, stri
                         string countFileName = "";
                         
                         if(primerName == ""){
-                            comboGroupName = barcodeNameVector[itBar->first];
-                        }
-                        else{
+                            comboGroupName = barcodeName;
+                        }else{
                             if(barcodeName == ""){
-                                comboGroupName = primerNameVector[itPrimer->first];
+                                comboGroupName = primerName;
                             }
                             else{
-                                comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
+                                comboGroupName = barcodeName + "." + primerName;
                             }
                         }
                         
                         
                         ofstream temp;
-                        fastaFileName = rootname + comboGroupName + ".fasta";
+                        map<string, string> variables;
+                        variables["[filename]"] = rootname;
+                        variables["[tag]"] = comboGroupName;
+                        fastaFileName = getOutputFileName("fasta", variables);
                         if (uniqueNames.count(fastaFileName) == 0) {
                             outputNames.push_back(fastaFileName);
                             outputTypes["fasta"].push_back(fastaFileName);
                             uniqueNames.insert(fastaFileName);
+                            fastaFile2Group[fastaFileName] = comboGroupName;
                         }
                         
                         fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
                         m->openOutputFile(fastaFileName, temp);                temp.close();
+                        cout << fastaFileName << endl;
                     }
-                               }
-                       }
-               }
-               
-               bool allBlank = true;
-               for (int i = 0; i < barcodeNameVector.size(); i++) {
-                       if (barcodeNameVector[i] != "") {
-                               allBlank = false;
-                               break;
-                       }
-               }
-               for (int i = 0; i < primerNameVector.size(); i++) {
-                       if (primerNameVector[i] != "") {
-                               allBlank = false;
-                               break;
-                       }
-               }
-        
-               if (allBlank) {
-                       m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
-                       allFiles = false;
-                       return false;
-               }
-               
-               return true;
-               
+                }
+            }
+        }
+
+        if (allBlank) {
+            m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
+            allFiles = false;
+            return false;
+        }
+
+        return true;
+
        }
        catch(exception& e) {
                m->errorOut(e, "MakeContigsCommand", "getOligos");
                exit(1);
        }
 }
-//********************************************************************/
-string MakeContigsCommand::reverseOligo(string oligo){
-       try {
-        string reverse = "";
-        
-        for(int i=oligo.length()-1;i>=0;i--){
-            
-            if(oligo[i] == 'A')                {       reverse += 'T'; }
-            else if(oligo[i] == 'T'){  reverse += 'A'; }
-            else if(oligo[i] == 'U'){  reverse += 'A'; }
-            
-            else if(oligo[i] == 'G'){  reverse += 'C'; }
-            else if(oligo[i] == 'C'){  reverse += 'G'; }
-            
-            else if(oligo[i] == 'R'){  reverse += 'Y'; }
-            else if(oligo[i] == 'Y'){  reverse += 'R'; }
-            
-            else if(oligo[i] == 'M'){  reverse += 'K'; }
-            else if(oligo[i] == 'K'){  reverse += 'M'; }
-            
-            else if(oligo[i] == 'W'){  reverse += 'W'; }
-            else if(oligo[i] == 'S'){  reverse += 'S'; }
-            
-            else if(oligo[i] == 'B'){  reverse += 'V'; }
-            else if(oligo[i] == 'V'){  reverse += 'B'; }
-            
-            else if(oligo[i] == 'D'){  reverse += 'H'; }
-            else if(oligo[i] == 'H'){  reverse += 'D'; }
-            
-            else                                               {       reverse += 'N'; }
-        }
-        
-        
-        return reverse;
-    }
-       catch(exception& e) {
-               m->errorOut(e, "MakeContigsCommand", "reverseOligo");
-               exit(1);
-       }
-}
 //**********************************************************************************************************************
 vector<int> MakeContigsCommand::convertQual(string qual) {
        try {
                vector<int> qualScores;
         bool negativeScores = false;
                
-               for (int i = 0; i < qual.length(); i++) { 
+               for (int i = 0; i < qual.length(); i++) {
             
             int temp = 0;
             temp = int(qual[i]);
index 1ea38358c7ec4c56e2d1adb926f7767301daef9d..67e86bc4ad124a55da7271ca7685cbea3f3946a5 100644 (file)
@@ -18,6 +18,7 @@
 #include "blastalign.hpp"
 #include "noalign.hpp"
 #include "trimoligos.h"
+#include "oligos.h"
 
 struct fastqRead {
        vector<int> scores;
@@ -62,18 +63,12 @@ public:
     void help() { m->mothurOut(getHelpString()); }     
     
 private:
-    bool abort, allFiles, trimOverlap, createFileGroup, createOligosGroup, makeCount, noneOk;
-    string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, findexfile, rindexfile, file, format;
+    bool abort, allFiles, trimOverlap, createFileGroup, createOligosGroup, makeCount, noneOk, reorient;
+    string outputDir, ffastqfile, rfastqfile, align, oligosfile, rfastafile, ffastafile, rqualfile, fqualfile, findexfile, rindexfile, file, format, inputDir;
        float match, misMatch, gapOpen, gapExtend;
-       int processors, longestBase, insert, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, deltaq;
+       int processors, longestBase, insert, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, deltaq, numBarcodes, numFPrimers, numLinkers, numSpacers, numRPrimers;
     vector<string> outputNames;
-    
-    map<int, oligosPair> barcodes;
-       map<int, oligosPair> primers;
-    vector<string>  linker;
-    vector<string>  spacer;
-       vector<string> primerNameVector;        
-       vector<string> barcodeNameVector;
+    Oligos* oligos;
        vector<char> convertTable;
     
        map<string, int> groupCounts; 
@@ -89,8 +84,7 @@ private:
     //bool checkReads(fastqRead&, fastqRead&, string, string);
     int createProcesses(vector< vector<string> >, string, string, string, vector<vector<string> >, int);
     int driver(vector<string>, string, string, string, vector<vector<string> >, int, string);
-    bool getOligos(vector<vector<string> >&, string);
-    string reverseOligo(string);
+    bool getOligos(vector<vector<string> >&, string, map<string, string>&);
     vector<pairFastqRead> getReads(bool ignoref, bool ignorer, fastqRead forward, fastqRead reverse, map<string, fastqRead>& uniques, bool);
     vector<pairFastqRead> mergeReads(vector<pairFastqRead> frReads, vector<pairFastqRead> friReads, map<string, pairFastqRead>& pairUniques);
 };
@@ -105,22 +99,19 @@ struct contigsData {
        string outputFasta; 
     string outputScrapFasta; 
        string outputMisMatches;
-       string align, group;
+       string align, group, oligosfile;
     vector<string> files;
     vector<vector<string> > fastaFileNames;
        MothurOut* m;
        float match, misMatch, gapOpen, gapExtend;
        int count, insert, threadID, pdiffs, bdiffs, tdiffs, deltaq;
-    bool allFiles, createOligosGroup, createFileGroup, done, trimOverlap;
+    bool allFiles, createOligosGroup, createFileGroup, done, trimOverlap, reorient;
     map<string, int> groupCounts; 
     map<string, string> groupMap;
-    vector<string> primerNameVector;   
-       vector<string> barcodeNameVector;
-    map<int, oligosPair> barcodes;
-       map<int, oligosPair> primers;
+    
        
        contigsData(){}
-       contigsData(string g, vector<string> f, string of, string osf, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int delt, map<int, oligosPair> br, map<int, oligosPair> pr, vector<vector<string> > ffn, vector<string>bnv, vector<string> pnv, int pdf, int bdf, int tdf, bool cg, bool cfg, bool all, bool to, int tid) {
+       contigsData(string g, vector<string> f, string of, string osf, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int thr, int delt, vector<vector<string> > ffn, string olig, bool ro, int pdf, int bdf, int tdf, bool cg, bool cfg, bool all, bool to, int tid) {
         files = f;
                outputFasta = of;
         outputMisMatches = om;
@@ -135,10 +126,7 @@ struct contigsData {
                count = 0;
         outputScrapFasta = osf;
         fastaFileNames = ffn;
-        barcodes = br;
-        primers = pr;
-        barcodeNameVector = bnv;
-        primerNameVector = pnv;
+        oligosfile = olig;
         pdiffs = pdf;
         bdiffs = bdf;
         tdiffs = tdf;
@@ -148,6 +136,7 @@ struct contigsData {
         createFileGroup = cfg;
                threadID = tid;
         deltaq = delt;
+        reorient = ro;
         done=false;
        }
 };
@@ -204,7 +193,17 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
         
         outMisMatch << "Name\tLength\tOverlap_Length\tOverlap_Start\tOverlap_End\tMisMatches\tNum_Ns\n";  
         
-        TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->primers, pDataArray->barcodes);
+        Oligos oligos;
+        if (pDataArray->oligosfile != "") { oligos.read(pDataArray->oligosfile);  }
+        int numFPrimers = oligos.getPairedPrimers().size();
+        int numBarcodes = oligos.getPairedBarcodes().size();
+
+        
+        TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes());
+        TrimOligos* rtrimOligos = NULL;
+        if (pDataArray->reorient) {
+            rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size();
+        }
         
         while ((!inFFasta.eof()) && (!inRFasta.eof())) {
             
@@ -236,8 +235,15 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
 
             int barcodeIndex = 0;
             int primerIndex = 0;
+            Sequence savedFSeq(fSeq.getName(), fSeq.getAligned());  Sequence savedRSeq(rSeq.getName(), rSeq.getAligned());
+            Sequence savedFindex(findexBarcode.getName(), findexBarcode.getAligned()); Sequence savedRIndex(rindexBarcode.getName(), rindexBarcode.getAligned());
+            QualityScores* savedFQual = NULL; QualityScores* savedRQual = NULL;
+            if (thisfqualfile != "") {
+                savedFQual = new QualityScores(fQual->getName(), fQual->getQualityScores());
+                savedRQual = new QualityScores(rQual->getName(), rQual->getQualityScores());
+            }
             
-            if(pDataArray->barcodes.size() != 0){
+            if(numBarcodes != 0){
                 if (thisfqualfile != "") {
                     if ((thisfindexfile != "") || (thisrindexfile != "")) {
                         success = trimOligos.stripBarcode(findexBarcode, rindexBarcode, *fQual, *rQual, barcodeIndex);
@@ -251,7 +257,7 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
                 else{ currentSeqsDiffs += success;  }
             }
             
-            if(pDataArray->primers.size() != 0){
+            if(numFPrimers != 0){
                 if (thisfqualfile != "") {
                     success = trimOligos.stripForward(fSeq, rSeq, *fQual, *rQual, primerIndex);
                 }else {
@@ -263,6 +269,57 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
             
             if (currentSeqsDiffs > pDataArray->tdiffs) {       trashCode += 't';   }
             
+            if (pDataArray->reorient && (trashCode != "")) { //if you failed and want to check the reverse
+                int thisSuccess = 0;
+                string thisTrashCode = "";
+                int thisCurrentSeqsDiffs = 0;
+                
+                int thisBarcodeIndex = 0;
+                int thisPrimerIndex = 0;
+                
+                if(numBarcodes != 0){
+                    if (thisfqualfile != "") {
+                        if ((thisfindexfile != "") || (thisrindexfile != "")) {
+                            thisSuccess = rtrimOligos->stripBarcode(savedFindex, savedRIndex, *savedFQual, *savedRQual, thisBarcodeIndex);
+                        }else {
+                            thisSuccess = rtrimOligos->stripBarcode(savedFSeq, savedRSeq, *savedFQual, *savedRQual, thisBarcodeIndex);
+                        }
+                    }else {
+                        thisSuccess = rtrimOligos->stripBarcode(savedFSeq, savedRSeq, thisBarcodeIndex);
+                    }
+                    if(thisSuccess > pDataArray->bdiffs)               {       thisTrashCode += 'b';   }
+                    else{ thisCurrentSeqsDiffs += thisSuccess;  }
+                }
+                
+                if(numFPrimers != 0){
+                    if (thisfqualfile != "") {
+                        thisSuccess = rtrimOligos->stripForward(savedFSeq, savedRSeq, *savedFQual, *savedRQual, thisPrimerIndex);
+                    }else {
+                        thisSuccess = rtrimOligos->stripForward(savedFSeq, savedRSeq, thisPrimerIndex);
+                    }
+                    if(thisSuccess > pDataArray->pdiffs)               {       thisTrashCode += 'f';   }
+                    else{ thisCurrentSeqsDiffs += thisSuccess;  }
+                }
+                
+                if (thisCurrentSeqsDiffs > pDataArray->tdiffs) {       thisTrashCode += 't';   }
+                
+                if (thisTrashCode == "") {
+                    trashCode = thisTrashCode;
+                    success = thisSuccess;
+                    currentSeqsDiffs = thisCurrentSeqsDiffs;
+                    barcodeIndex = thisBarcodeIndex;
+                    primerIndex = thisPrimerIndex;
+                    savedFSeq.reverseComplement();
+                    savedRSeq.reverseComplement();
+                    fSeq.setAligned(savedFSeq.getAligned());
+                    rSeq.setAligned(savedRSeq.getAligned());
+                    if(thisfqualfile != ""){
+                        savedFQual->flipQScores(); savedRQual->flipQScores();
+                        fQual->setScores(savedFQual->getScores()); rQual->setScores(savedRQual->getScores());
+                    }
+                }else { trashCode += "(" + thisTrashCode + ")";  }
+            }
+            
             //flip the reverse reads
             rSeq.reverseComplement();
             if (thisfqualfile != "") { rQual->flipQScores(); }
@@ -284,7 +341,7 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
             if (thisfqualfile != "") {
                 scores1 = fQual->getQualityScores();
                 scores2 = rQual->getQualityScores();
-                delete fQual; delete rQual;
+                delete fQual; delete rQual; delete savedFQual; delete savedRQual;
             }
             
             int overlapStart = fSeq.getStartPos();
@@ -344,29 +401,17 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
             if(trashCode.length() == 0){
                 bool ignore = false;
                 if (pDataArray->createOligosGroup) {
-                    if(pDataArray->barcodes.size() != 0){
-                        string thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
-                        if (pDataArray->primers.size() != 0) { 
-                            if (pDataArray->primerNameVector[primerIndex] != "") { 
-                                if(thisGroup != "") {
-                                    thisGroup += "." + pDataArray->primerNameVector[primerIndex]; 
-                                }else {
-                                    thisGroup = pDataArray->primerNameVector[primerIndex]; 
-                                }
-                            } 
-                        }
-                        
-                        if (pDataArray->m->debug) { pDataArray->m->mothurOut(", group= " + thisGroup + "\n"); }
-                        
-                        int pos = thisGroup.find("ignore");
-                        if (pos == string::npos) {
-                            pDataArray->groupMap[fSeq.getName()] = thisGroup; 
+                    string thisGroup = oligos.getGroupName(barcodeIndex, primerIndex);
+                    if (pDataArray->m->debug) { pDataArray->m->mothurOut(", group= " + thisGroup + "\n"); }
+                    
+                    int pos = thisGroup.find("ignore");
+                    if (pos == string::npos) {
+                        pDataArray->groupMap[fSeq.getName()] = thisGroup;
                         
-                            map<string, int>::iterator it = pDataArray->groupCounts.find(thisGroup);
-                            if (it == pDataArray->groupCounts.end()) { pDataArray->groupCounts[thisGroup] = 1; }
-                            else { pDataArray->groupCounts[it->first] ++; }
-                        }else { ignore = true; }
-                    }
+                        map<string, int>::iterator it = pDataArray->groupCounts.find(thisGroup);
+                        if (it == pDataArray->groupCounts.end()) {     pDataArray->groupCounts[thisGroup] = 1; }
+                        else { pDataArray->groupCounts[it->first] ++; }
+                    }else { ignore = true; }
                 }else if (pDataArray->createFileGroup) {
                     int pos = pDataArray->group.find("ignore");
                     if (pos == string::npos) {
@@ -414,6 +459,7 @@ static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){
             inRQual.close();
         }
         delete alignment;
+        if (pDataArray->reorient) { delete rtrimOligos; }
         
         pDataArray->done = true;
         if (pDataArray->m->control_pressed) {  pDataArray->m->mothurRemove(pDataArray->outputFasta);  pDataArray->m->mothurRemove(pDataArray->outputMisMatches);  pDataArray->m->mothurRemove(pDataArray->outputScrapFasta); }
index 102b44d57030561244c8ba6205f7c02c831b9b36..b4fc3ce1c41171c0cde75a83f6365e12873984bc 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -210,6 +210,16 @@ struct distlinePair {
        int end;
        
 };
+/************************************************************/
+struct oligosPair {
+       string forward;
+       string reverse;
+       
+       oligosPair() { forward = ""; reverse = "";  }
+       oligosPair(string f, string r) : forward(f), reverse(r) {}
+       ~oligosPair() {}
+};
+
 /************************************************************/
 struct seqPriorityNode {
        int numIdentical;
index baa7710eddc63901f570725af70b5d847918cb11..81436871d6fd14b8f40c7101b397c1637887bc96 100644 (file)
@@ -4157,6 +4157,7 @@ double MothurOut::min(vector<double>& featureVector){
                exit(1);
        }
 }
+
 /**************************************************************************************************/
 
 
index a57fb136ba3d41b210e671e2fe85848a4d40f359..0f34d0296020b8a3ebcf457084c10a80e25c27bb 100644 (file)
@@ -166,6 +166,7 @@ class MothurOut {
         bool isSubset(vector<string>, vector<string>); //bigSet, subset
         int checkName(string&);
         map<string, vector<string> > parseClasses(string);
+        
                
                //math operation
         double max(vector<double>&); //returns largest value in vector
diff --git a/oligos.cpp b/oligos.cpp
new file mode 100644 (file)
index 0000000..f058ad8
--- /dev/null
@@ -0,0 +1,556 @@
+//
+//  oligos.cpp
+//  Mothur
+//
+//  Created by Sarah Westcott on 4/4/14.
+//  Copyright (c) 2014 Schloss Lab. All rights reserved.
+//
+
+#include "oligos.h"
+
+/**************************************************************************************************/
+
+Oligos::Oligos(string o){
+       try {
+               m = MothurOut::getInstance();
+        hasPPrimers = false; hasPBarcodes = false; pairedOligos = false; reversePairs = true;
+        indexBarcode = 0; indexPairedBarcode = 0; indexPrimer = 0; indexPairedPrimer = 0;
+               oligosfile = o;
+        readOligos();
+               if (pairedOligos) {
+            numBarcodes = pairedBarcodes.size();
+            numFPrimers = pairedPrimers.size();
+        }else {
+            numBarcodes = barcodes.size();
+            numFPrimers = primers.size();
+        }
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Oligos", "Oligos");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+
+Oligos::Oligos(){
+       try {
+               m = MothurOut::getInstance();
+        hasPPrimers = false; hasPBarcodes = false; pairedOligos = false; reversePairs = true;
+        indexBarcode = 0; indexPairedBarcode = 0; indexPrimer = 0; indexPairedPrimer = 0;
+        numFPrimers = 0; numBarcodes = 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Oligos", "Oligos");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int Oligos::read(string o){
+       try {
+               oligosfile = o;
+        readOligos();
+        if (pairedOligos) {
+            numBarcodes = pairedBarcodes.size();
+            numFPrimers = pairedPrimers.size();
+        }else {
+            numBarcodes = barcodes.size();
+            numFPrimers = primers.size();
+        }
+        return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Oligos", "read");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
+int Oligos::read(string o, bool reverse){
+       try {
+               oligosfile = o;
+        reversePairs = reverse;
+        readOligos();
+        if (pairedOligos) {
+            numBarcodes = pairedBarcodes.size();
+            numFPrimers = pairedPrimers.size();
+        }else {
+            numBarcodes = barcodes.size();
+            numFPrimers = primers.size();
+        }
+        return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Oligos", "read");
+               exit(1);
+       }
+}
+//***************************************************************************************************************
+
+int Oligos::readOligos(){
+       try {
+               ifstream inOligos;
+               m->openInputFile(oligosfile, inOligos);
+               
+               string type, oligo, roligo, group;
+               
+               while(!inOligos.eof()){
+            
+                       inOligos >> type;
+            
+                       if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
+            
+                       if(type[0] == '#'){
+                               while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
+                               m->gobble(inOligos);
+                       }
+                       else{
+                               m->gobble(inOligos);
+                               //make type case insensitive
+                               for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
+                               
+                               inOligos >> oligo;
+                
+                if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
+                               
+                               for(int i=0;i<oligo.length();i++){
+                                       oligo[i] = toupper(oligo[i]);
+                                       if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
+                               }
+                               
+                               if(type == "FORWARD"){
+                                       group = "";
+                                       
+                                       // get rest of line in case there is a primer name
+                                       while (!inOligos.eof()) {
+                                               char c = inOligos.get();
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
+                                               else if (c == 32 || c == 9){;} //space or tab
+                                               else {  group += c;  }
+                                       }
+                                       
+                                       //check for repeat barcodes
+                                       map<string, int>::iterator itPrime = primers.find(oligo);
+                                       if (itPrime != primers.end()) { m->mothurOut("[WARNING]: primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
+                                       
+                    if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); }  }
+                    
+                                       primers[oligo]=indexPrimer; indexPrimer++;
+                                       primerNameVector.push_back(group);
+                               }
+                else if (type == "PRIMER"){
+                    m->gobble(inOligos);
+                                       
+                    inOligos >> roligo;
+                    
+                    for(int i=0;i<roligo.length();i++){
+                        roligo[i] = toupper(roligo[i]);
+                        if(roligo[i] == 'U')   {       roligo[i] = 'T';        }
+                    }
+                    if (reversePairs) {  roligo = reverseOligo(roligo); }
+                    group = "";
+                    
+                                       // get rest of line in case there is a primer name
+                                       while (!inOligos.eof()) {
+                                               char c = inOligos.get();
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
+                                               else if (c == 32 || c == 9){;} //space or tab
+                                               else {  group += c;  }
+                                       }
+                    
+                    oligosPair newPrimer(oligo, roligo);
+                    
+                    if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
+                                       
+                                       //check for repeat barcodes
+                    string tempPair = oligo+roligo;
+                    if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
+                    else { uniquePrimers.insert(tempPair); }
+                                       
+                    if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
+                    
+                                       pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
+                                       primerNameVector.push_back(group);
+                    hasPPrimers = true;
+                }
+                               else if(type == "REVERSE"){
+                    string oligoRC = reverseOligo(oligo);
+                                       revPrimer.push_back(oligoRC);
+                               }
+                               else if(type == "BARCODE"){
+                                       inOligos >> group;
+                    
+                    //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
+                    //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
+                    
+                    string temp = "";
+                    while (!inOligos.eof())    {
+                                               char c = inOligos.get();
+                                               if (c == 10 || c == 13 || c == -1){     break;  }
+                                               else if (c == 32 || c == 9){;} //space or tab
+                                               else {  temp += c;  }
+                                       }
+                                       
+                    //then this is illumina data with 4 columns
+                    if (temp != "") {
+                        hasPBarcodes = true;
+                        string reverseBarcode = group; //reverseOligo(group); //reverse barcode
+                        group = temp;
+                        
+                        for(int i=0;i<reverseBarcode.length();i++){
+                            reverseBarcode[i] = toupper(reverseBarcode[i]);
+                            if(reverseBarcode[i] == 'U')       {       reverseBarcode[i] = 'T';        }
+                        }
+                        
+                        if (reversePairs) {   reverseBarcode = reverseOligo(reverseBarcode); }
+                        oligosPair newPair(oligo, reverseBarcode);
+                        
+                        if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
+                        
+                        //check for repeat barcodes
+                        string tempPair = oligo+reverseBarcode;
+                        if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
+                        else { uniqueBarcodes.insert(tempPair); }
+                        
+                        pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
+                        barcodeNameVector.push_back(group);
+                    }else {
+                        //check for repeat barcodes
+                        map<string, int>::iterator itBar = barcodes.find(oligo);
+                        if (itBar != barcodes.end()) { m->mothurOut("[WARNING]: barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
+                        
+                        barcodes[oligo]=indexBarcode; indexBarcode++;
+                        barcodeNameVector.push_back(group);
+                    }
+                               }else if(type == "LINKER"){
+                                       linker.push_back(oligo);
+                               }else if(type == "SPACER"){
+                                       spacer.push_back(oligo);
+                               }
+                               else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
+                       }
+                       m->gobble(inOligos);
+               }
+               inOligos.close();
+               
+        if (hasPBarcodes || hasPPrimers) {
+            pairedOligos = true;
+            if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true;  m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine();  return 0; }
+        }
+        
+        
+               //add in potential combos
+               if(barcodeNameVector.size() == 0){
+            if (pairedOligos) {
+                oligosPair newPair("", "");
+                pairedBarcodes[0] = newPair;
+            }else {
+                barcodes[""] = 0;
+            }
+                       barcodeNameVector.push_back("");
+               }
+               
+               if(primerNameVector.size() == 0){
+            if (pairedOligos) {
+                oligosPair newPair("", "");
+                pairedPrimers[0] = newPair;
+            }else {
+                primers[""] = 0;
+            }
+                       primerNameVector.push_back("");
+               }
+               
+               
+        if (pairedOligos) {
+            for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
+                for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
+                    
+                    string primerName = primerNameVector[itPrimer->first];
+                    string barcodeName = barcodeNameVector[itBar->first];
+                    
+                    if (m->debug) {  m->mothurOut("[DEBUG]: primerName = " + primerName + " barcodeName = " + barcodeName + "\n");  }
+                    
+                    if ((primerName == "ignore") || (barcodeName == "ignore")) { if (m->debug) {  m->mothurOut("[DEBUG]: in ignore. \n");  }  } //do nothing
+                    else if ((primerName == "") && (barcodeName == "")) { if (m->debug) {  m->mothurOut("[DEBUG]: in blank. \n");  }  } //do nothing
+                    else {
+                        string comboGroupName = "";
+                        string fastqFileName = "";
+                        
+                        if(primerName == ""){
+                            comboGroupName = barcodeNameVector[itBar->first];
+                        }
+                        else{
+                            if(barcodeName == ""){
+                                comboGroupName = primerNameVector[itPrimer->first];
+                            }
+                            else{
+                                comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
+                            }
+                        }
+                        
+                        if (m->debug) {  m->mothurOut("[DEBUG]: comboGroupName = " + comboGroupName +  "\n");  }
+                        
+                        uniqueNames.insert(comboGroupName);
+                        
+                        map<string, vector<string> >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName);
+                        if (itGroup2Barcode == Group2Barcode.end()) {
+                            vector<string> tempBarcodes; tempBarcodes.push_back((itBar->second).forward+"."+(itBar->second).reverse);
+                            Group2Barcode[comboGroupName] = tempBarcodes;
+                        }else {
+                            Group2Barcode[comboGroupName].push_back((itBar->second).forward+"."+(itBar->second).reverse);
+                        }
+                        
+                        itGroup2Barcode = Group2Primer.find(comboGroupName);
+                        if (itGroup2Barcode == Group2Primer.end()) {
+                            vector<string> tempPrimers; tempPrimers.push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse);
+                            Group2Primer[comboGroupName] = tempPrimers;
+                        }else {
+                            Group2Primer[comboGroupName].push_back((itPrimer->second).forward+"."+(itPrimer->second).reverse);
+                        }
+                    }
+                }
+            }
+        }else {
+            for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+                for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
+                    
+                    string primerName = primerNameVector[itPrimer->second];
+                    string barcodeName = barcodeNameVector[itBar->second];
+                    
+                    if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+                    else if ((primerName == "") && (barcodeName == "")) { } //do nothing
+                    else {
+                        string comboGroupName = "";
+                        string fastqFileName = "";
+                        
+                        if(primerName == ""){
+                            comboGroupName = barcodeNameVector[itBar->second];
+                        }
+                        else{
+                            if(barcodeName == ""){
+                                comboGroupName = primerNameVector[itPrimer->second];
+                            }
+                            else{
+                                comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
+                            }
+                        }
+                        uniqueNames.insert(comboGroupName);
+                        
+                        map<string, vector<string> >::iterator itGroup2Barcode = Group2Barcode.find(comboGroupName);
+                        if (itGroup2Barcode == Group2Barcode.end()) {
+                            vector<string> tempBarcodes; tempBarcodes.push_back(itBar->first);
+                            Group2Barcode[comboGroupName] = tempBarcodes;
+                        }else {
+                            Group2Barcode[comboGroupName].push_back(itBar->first);
+                        }
+                        
+                        itGroup2Barcode = Group2Primer.find(comboGroupName);
+                        if (itGroup2Barcode == Group2Primer.end()) {
+                            vector<string> tempPrimers; tempPrimers.push_back(itPrimer->first);
+                            Group2Primer[comboGroupName] = tempPrimers;
+                        }else {
+                            Group2Primer[comboGroupName].push_back(itPrimer->first);
+                        }
+                    }
+                }
+            }
+        }
+        
+        
+        if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
+               
+        
+        Groups.clear();
+        for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) {  Groups.push_back(*it); }
+        
+        return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Oligos", "readOligos");
+               exit(1);
+       }
+}
+//********************************************************************/
+vector<string> Oligos::getBarcodes(string groupName){
+       try {
+        vector<string> thisGroupsBarcodes;
+        
+        map<string, vector<string> >::iterator it = Group2Barcode.find(groupName);
+        
+        if (it == Group2Barcode.end()) {  m->mothurOut("[ERROR]: no barcodes found for group " + groupName + ".\n"); m->control_pressed=true;
+        }else { thisGroupsBarcodes = it->second; }
+        
+        return thisGroupsBarcodes;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "Oligos", "getBarcodes");
+               exit(1);
+       }
+}
+//********************************************************************/
+vector<string> Oligos::getPrimers(string groupName){
+       try {
+        vector<string> thisGroupsPrimers;
+        
+        map<string, vector<string> >::iterator it = Group2Primer.find(groupName);
+        
+        if (it == Group2Primer.end()) {  m->mothurOut("[ERROR]: no primers found for group " + groupName + ".\n"); m->control_pressed=true;
+        }else { thisGroupsPrimers = it->second; }
+        
+        return thisGroupsPrimers;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "Oligos", "getPrimers");
+               exit(1);
+       }
+}
+//********************************************************************/
+//can't have paired and unpaired so this function will either run the paired map or the unpaired
+map<int, oligosPair> Oligos::getReorientedPairedPrimers(){
+       try {
+        map<int, oligosPair> rpairedPrimers;
+        
+        for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
+            string forward = (it->second).reverse;
+            if (reversePairs) { forward = reverseOligo(forward); }
+            string reverse = (it->second).forward;
+            if (reversePairs) { reverse = reverseOligo(reverse); }
+            oligosPair tempPair(forward, reverse); //reversePrimer, rc ForwardPrimer
+            rpairedPrimers[it->first] = tempPair;
+        }
+        
+        
+        for (map<string, int>::iterator it = primers.begin(); it != primers.end(); it++) {
+            oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
+            rpairedPrimers[it->second] = tempPair;
+        }
+        
+        return rpairedPrimers;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "Oligos", "getReorientedPairedPrimers");
+               exit(1);
+       }
+}
+//********************************************************************/
+//can't have paired and unpaired so this function will either run the paired map or the unpaired
+map<int, oligosPair> Oligos::getReorientedPairedBarcodes(){
+       try {
+        map<int, oligosPair> rpairedBarcodes;
+        
+        for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
+            string forward = (it->second).reverse;
+            if (reversePairs) { forward = reverseOligo(forward); }
+            string reverse = (it->second).forward;
+            if (reversePairs) { reverse = reverseOligo(reverse); }
+            oligosPair tempPair(forward, reverse); //reversePrimer, rc ForwardPrimer
+            rpairedBarcodes[it->first] = tempPair;
+        }
+        
+        for (map<string, int>::iterator it = barcodes.begin(); it != barcodes.end(); it++) {
+            oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
+            rpairedBarcodes[it->second] = tempPair;
+        }
+
+        return rpairedBarcodes;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "Oligos", "getReorientedPairedBarcodes");
+               exit(1);
+       }
+}
+
+//********************************************************************/
+string Oligos::reverseOligo(string oligo){
+       try {
+        string reverse = "";
+        
+        for(int i=oligo.length()-1;i>=0;i--){
+            
+            if(oligo[i] == 'A')                {       reverse += 'T'; }
+            else if(oligo[i] == 'T'){  reverse += 'A'; }
+            else if(oligo[i] == 'U'){  reverse += 'A'; }
+            
+            else if(oligo[i] == 'G'){  reverse += 'C'; }
+            else if(oligo[i] == 'C'){  reverse += 'G'; }
+            
+            else if(oligo[i] == 'R'){  reverse += 'Y'; }
+            else if(oligo[i] == 'Y'){  reverse += 'R'; }
+            
+            else if(oligo[i] == 'M'){  reverse += 'K'; }
+            else if(oligo[i] == 'K'){  reverse += 'M'; }
+            
+            else if(oligo[i] == 'W'){  reverse += 'W'; }
+            else if(oligo[i] == 'S'){  reverse += 'S'; }
+            
+            else if(oligo[i] == 'B'){  reverse += 'V'; }
+            else if(oligo[i] == 'V'){  reverse += 'B'; }
+            
+            else if(oligo[i] == 'D'){  reverse += 'H'; }
+            else if(oligo[i] == 'H'){  reverse += 'D'; }
+            
+            else                                               {       reverse += 'N'; }
+        }
+        
+        
+        return reverse;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "Oligos", "reverseOligo");
+               exit(1);
+       }
+}
+//********************************************************************/
+string Oligos::getBarcodeName(int index){
+       try {
+        string name = "";
+        
+        if ((index >= 0) && (index < barcodeNameVector.size())) { name = barcodeNameVector[index]; }
+            
+        return name;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "Oligos", "getBarcodeName");
+               exit(1);
+       }
+}
+//********************************************************************/
+string Oligos::getPrimerName(int index){
+    try {
+        string name = "";
+        
+        if ((index >= 0) && (index < primerNameVector.size())) { name = primerNameVector[index]; }
+        
+        return name;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "Oligos", "getPrimerName");
+        exit(1);
+    }
+}
+//********************************************************************/
+string Oligos::getGroupName(int barcodeIndex, int primerIndex){
+    try {
+        
+        string thisGroup = "";
+            if(numBarcodes != 0){
+                thisGroup = getBarcodeName(barcodeIndex);
+                if (numFPrimers != 0) {
+                    if (getPrimerName(primerIndex) != "") {
+                        if(thisGroup != "") {
+                            thisGroup += "." + getPrimerName(primerIndex);
+                        }else {
+                            thisGroup = getPrimerName(primerIndex);
+                        }
+                    } 
+                }
+            }
+        
+        return thisGroup;
+    }
+    catch(exception& e) {
+        m->errorOut(e, "Oligos", "getGroupName");
+        exit(1);
+    }
+}
+
+/**************************************************************************************************/
+
diff --git a/oligos.h b/oligos.h
new file mode 100644 (file)
index 0000000..d5bc303
--- /dev/null
+++ b/oligos.h
@@ -0,0 +1,92 @@
+//
+//  oligos.h
+//  Mothur
+//
+//  Created by Sarah Westcott on 4/4/14.
+//  Copyright (c) 2014 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_oligos_h
+#define Mothur_oligos_h
+
+
+#include "mothurout.h"
+
+
+/**************************************************************************************************/
+
+class Oligos {
+    
+public:
+       Oligos(string);
+       Oligos();
+    ~Oligos() {}
+    
+    int read(string);
+    int read(string, bool); //read without reversing the paired barcodes, for make.contigs.
+    bool hasPairedPrimers() { return hasPPrimers; }
+    bool hasPairedBarcodes() { return hasPBarcodes; }
+    
+    //for processing with trimOligos class
+    map<int, oligosPair> getPairedPrimers()                 { return pairedPrimers;     }
+    map<int, oligosPair> getPairedBarcodes()                { return pairedBarcodes;    }
+    map<string, int> getPrimers()                           { return primers;           }
+    map<string, int> getBarcodes()                          { return barcodes;          }
+    
+    map<int, oligosPair> getReorientedPairedPrimers();
+    map<int, oligosPair> getReorientedPairedBarcodes();
+    map<string, int> getReorientedPrimers();
+    map<string, int> getReorientedBarcodes();
+    
+    
+    vector<string> getLinkers()                             { return linker;            }
+    vector<string> getSpacers()                             { return spacer;            }
+    vector<string> getReversePrimers()                      { return revPrimer;         }
+    vector<string> getPrimerNames()                         { return primerNameVector;  }
+    vector<string> getBarcodeNames()                        { return barcodeNameVector; }
+    vector<string> getGroupNames()                          { return Groups;            }
+        
+    
+    //for printing and other formatting uses
+    vector<string> getBarcodes(string); //get barcodes for a group. For paired barcodes will return forward.reverse
+    vector<string> getPrimers(string); //get primers for a group. For paired primers will return forward.reverse
+    string getBarcodeName(int);
+    string getPrimerName(int);
+    string getGroupName(int, int);
+    
+               
+private:
+    
+    set<string> uniqueNames; 
+    vector<string> Groups;
+       vector<string> revPrimer;
+    map<string, vector<string> > Group2Barcode;
+    map<string, vector<string> > Group2Primer;
+    map<int, oligosPair> pairedBarcodes;
+    map<int, oligosPair> pairedPrimers;
+    map<string, int> primers;
+       map<string, int> barcodes;
+    vector<string>  linker;
+    vector<string>  spacer;
+       vector<string> primerNameVector;
+       vector<string> barcodeNameVector;
+    bool hasPPrimers, hasPBarcodes, pairedOligos, reversePairs;
+    string oligosfile;
+    int numBarcodes, numFPrimers;
+    MothurOut* m;
+    
+    int indexPrimer;
+    int indexBarcode;
+    int indexPairedPrimer;
+    int indexPairedBarcode;
+    set<string> uniquePrimers;
+    set<string> uniqueBarcodes;
+    
+    int readOligos();
+    string reverseOligo(string);
+};
+
+/**************************************************************************************************/
+
+
+#endif
index 2f6f5bbb4f99fbdf712cf5830e7e80e066d977d7..7202efdce3bc179d9d1236edd2fc31ae841abcd4 100644 (file)
@@ -16,6 +16,7 @@ vector<string> ParseFastaQCommand::setParameters(){
                CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pfastq);
         CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos);
         CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup);
+        CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
         CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs);
                CommandParameter pbdiffs("bdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pbdiffs);
         CommandParameter pldiffs("ldiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pldiffs);
@@ -51,6 +52,7 @@ string ParseFastaQCommand::getHelpString(){
                helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
                helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
+        helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. If found the sequence is flipped. The default is false.\n";
                helpString += "The format parameter is used to indicate whether your sequences are sanger, solexa, illumina1.8+ or illumina, default=sanger.\n";
         helpString += "The fasta parameter allows you to indicate whether you want a fasta file generated. Default=T.\n";
         helpString += "The qfile parameter allows you to indicate whether you want a quality file generated. Default=T.\n";
@@ -211,6 +213,9 @@ ParseFastaQCommand::ParseFastaQCommand(string option){
                        }
 
                        if ((!fasta) && (!qual)) { m->mothurOut("[ERROR]: no outputs selected. Aborting."); m->mothurOutEndLine(); abort=true; }
+            
+            temp = validParameter.validFile(parameters, "checkorient", false);         if (temp == "not found") { temp = "F"; }
+                       reorient = m->isTrue(temp);
 
                }               
        }
@@ -235,14 +240,17 @@ int ParseFastaQCommand::execute(){
                if (fasta) { m->openOutputFile(fastaFile, outFasta);  outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile);      }
                if (qual) { m->openOutputFile(qualFile, outQual);       outputNames.push_back(qualFile);  outputTypes["qfile"].push_back(qualFile);             }
         
-        TrimOligos* trimOligos = NULL;
-        int numBarcodes, numPrimers; numBarcodes = 0; numPrimers = 0;
+        TrimOligos* trimOligos = NULL; TrimOligos* rtrimOligos = NULL;
+        pairedOligos = false; numBarcodes = 0; numPrimers= 0; numLinkers= 0; numSpacers = 0; numRPrimers = 0;
         if (oligosfile != "")       {
             readOligos(oligosfile);
-            numPrimers = primers.size(); numBarcodes = barcodes.size();
             //find group read belongs to
-            if (pairedOligos)   {   trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); numPrimers = pairedPrimers.size(); }
-            else                {   trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);  }
+            if (pairedOligos)   {   trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes()); numBarcodes = oligos.getPairedBarcodes().size(); numPrimers = oligos.getPairedPrimers().size(); }
+            else                {   trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers());  numPrimers = oligos.getPrimers().size(); numBarcodes = oligos.getBarcodes().size();  }
+            
+            if (reorient) {
+                rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size();
+            }
 
         }
         else if (groupfile != "")   { readGroup(groupfile);     }
@@ -290,7 +298,7 @@ int ParseFastaQCommand::execute(){
                 
                 if (split > 1) {
                     int barcodeIndex, primerIndex, trashCodeLength;
-                    if (oligosfile != "")      {  trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, trimOligos, numBarcodes, numPrimers);    }
+                    if (oligosfile != "")      {  trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, trimOligos, rtrimOligos, numBarcodes, numPrimers);    }
                     else if (groupfile != "")  {  trashCodeLength = findGroup(thisRead, barcodeIndex, primerIndex, "groupMode");   }
                     else {  m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); }
                     
@@ -322,7 +330,7 @@ int ParseFastaQCommand::execute(){
         if (split > 1) {
             
             if (groupfile != "")        { delete groupMap;      }
-            else if (oligosfile != "")  { delete trimOligos;    }
+            else if (oligosfile != "")  { delete trimOligos; if (reorient) { delete rtrimOligos; }   }
            
                        map<string, string>::iterator it;
                        set<string> namesToRemove;
@@ -460,7 +468,7 @@ vector<int> ParseFastaQCommand::convertQual(string qual) {
        }
 }
 //**********************************************************************************************************************
-int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer, TrimOligos*& trimOligos, int numBarcodes, int numPrimers) {
+int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos, int numBarcodes, int numPrimers) {
        try {
         int success = 1;
         string trashCode = "";
@@ -469,7 +477,11 @@ int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer
         Sequence currSeq(thisRead.seq.getName(), thisRead.seq.getAligned());
         QualityScores currQual; currQual.setScores(convertQual(thisRead.quality));
         
-        if(linker.size() != 0){
+        //for reorient
+        Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
+        QualityScores savedQual(currQual.getName(), currQual.getScores());
+        
+        if(numLinkers != 0){
             success = trimOligos->stripLinker(currSeq, currQual);
             if(success > ldiffs)               {       trashCode += 'k';       }
             else{ currentSeqsDiffs += success;  }
@@ -482,7 +494,7 @@ int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer
             else{ currentSeqsDiffs += success;  }
         }
         
-        if(spacer.size() != 0){
+        if(numSpacers != 0){
             success = trimOligos->stripSpacer(currSeq, currQual);
             if(success > sdiffs)               {       trashCode += 's';       }
             else{ currentSeqsDiffs += success;  }
@@ -497,27 +509,49 @@ int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer
         
         if (currentSeqsDiffs > tdiffs) {       trashCode += 't';   }
         
-        if(revPrimer.size() != 0){
+        if(numRPrimers != 0){
             success = trimOligos->stripReverse(currSeq, currQual);
             if(!success)                               {       trashCode += 'r';       }
         }
         
-        if (trashCode.length() == 0) { //is this sequence in the ignore group
-            string thisGroup = "";
+        if (reorient && (trashCode != "")) { //if you failed and want to check the reverse
+            int thisSuccess = 0;
+            string thisTrashCode = "";
+            int thisCurrentSeqsDiffs = 0;
             
-            if(barcodes.size() != 0){
-                thisGroup = barcodeNameVector[barcode];
-                if (numPrimers != 0) {
-                    if (primerNameVector[primer] != "") {
-                        if(thisGroup != "") {
-                            thisGroup += "." + primerNameVector[primer];
-                        }else {
-                            thisGroup = primerNameVector[primer];
-                        }
-                    }
-                }
+            int thisBarcodeIndex = 0;
+            int thisPrimerIndex = 0;
+            //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
+            if(numBarcodes != 0){
+                thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
+                if(thisSuccess > bdiffs)               { thisTrashCode += "b"; }
+                else{ thisCurrentSeqsDiffs += thisSuccess;  }
+            }
+            //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
+            if(numPrimers != 0){
+                thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, true);
+                if(thisSuccess > pdiffs)               { thisTrashCode += "f"; }
+                else{ thisCurrentSeqsDiffs += thisSuccess;  }
             }
             
+            if (thisCurrentSeqsDiffs > tdiffs) {       thisTrashCode += 't';   }
+            
+            if (thisTrashCode == "") {
+                trashCode = thisTrashCode;
+                success = thisSuccess;
+                currentSeqsDiffs = thisCurrentSeqsDiffs;
+                barcode = thisBarcodeIndex;
+                primer = thisPrimerIndex;
+                savedSeq.reverseComplement();
+                currSeq.setAligned(savedSeq.getAligned());
+                savedQual.flipQScores();
+                currQual.setScores(savedQual.getScores());
+            }else { trashCode += "(" + thisTrashCode + ")";  }
+        }
+        
+        if (trashCode.length() == 0) { //is this sequence in the ignore group
+            string thisGroup = oligos.getGroupName(barcode, primer);
+            
             int pos = thisGroup.find("ignore");
             if (pos != string::npos) {  trashCode += "i"; }
         }
@@ -538,13 +572,7 @@ int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer
         
         string group = groupMap->getGroup(thisRead.seq.getName());
         if (group == "not found") {     trashCode += "g";   } //scrap for group
-        else { //find file group
-            map<string, int>::iterator it = barcodes.find(group);
-            if (it != barcodes.end()) {
-                barcode = it->second;
-            }else { trashCode += "g"; }
-        }
-        
+                
         return trashCode.length();
     }
        catch(exception& e) {
@@ -556,276 +584,139 @@ int ParseFastaQCommand::findGroup(fastqRead2 thisRead, int& barcode, int& primer
 
 bool ParseFastaQCommand::readOligos(string oligoFile){
        try {
-               ifstream inOligos;
-               m->openInputFile(oligoFile, inOligos);
-               
-               string type, oligo, roligo, group;
-        bool hasPrimer = false; bool hasPairedBarcodes = false; pairedOligos = false;
-        
-               int indexPrimer = 0;
-               int indexBarcode = 0;
-        int indexPairedPrimer = 0;
-               int indexPairedBarcode = 0;
-        set<string> uniquePrimers;
-        set<string> uniqueBarcodes;
-               
-               while(!inOligos.eof()){
-            
-                       inOligos >> type;
-            
-                       if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
-            
-                       if(type[0] == '#'){
-                               while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
-                               m->gobble(inOligos);
-                       }
-                       else{
-                               m->gobble(inOligos);
-                               //make type case insensitive
-                               for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
-                               
-                               inOligos >> oligo;
-                
-                if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
-                               
-                               for(int i=0;i<oligo.length();i++){
-                                       oligo[i] = toupper(oligo[i]);
-                                       if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
-                               }
-                               
-                               if(type == "FORWARD"){
-                                       group = "";
-                                       
-                                       // get rest of line in case there is a primer name
-                                       while (!inOligos.eof()) {
-                                               char c = inOligos.get();
-                                               if (c == 10 || c == 13 || c == -1){     break;  }
-                                               else if (c == 32 || c == 9){;} //space or tab
-                                               else {  group += c;  }
-                                       }
-                                       
-                                       //check for repeat barcodes
-                                       map<string, int>::iterator itPrime = primers.find(oligo);
-                                       if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
-                                       
-                    if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); }  }
-                    
-                                       primers[oligo]=indexPrimer; indexPrimer++;
-                                       primerNameVector.push_back(group);
-                               }
-                else if (type == "PRIMER"){
-                    m->gobble(inOligos);
-                                       
-                    inOligos >> roligo;
-                    
-                    for(int i=0;i<roligo.length();i++){
-                        roligo[i] = toupper(roligo[i]);
-                        if(roligo[i] == 'U')   {       roligo[i] = 'T';        }
-                    }
-                    roligo = reverseOligo(roligo);
-                    
-                    group = "";
-                    
-                                       // get rest of line in case there is a primer name
-                                       while (!inOligos.eof()) {
-                                               char c = inOligos.get();
-                                               if (c == 10 || c == 13 || c == -1){     break;  }
-                                               else if (c == 32 || c == 9){;} //space or tab
-                                               else {  group += c;  }
-                                       }
-                    
-                    oligosPair newPrimer(oligo, roligo);
-                    
-                    if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
-                                       
-                                       //check for repeat barcodes
-                    string tempPair = oligo+roligo;
-                    if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
-                    else { uniquePrimers.insert(tempPair); }
-                                       
-                    if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
-                    
-                                       pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
-                                       primerNameVector.push_back(group);
-                    hasPrimer = true;
-                }
-                               else if(type == "REVERSE"){
-                                       //Sequence oligoRC("reverse", oligo);
-                                       //oligoRC.reverseComplement();
-                    string oligoRC = reverseOligo(oligo);
-                                       revPrimer.push_back(oligoRC);
-                               }
-                               else if(type == "BARCODE"){
-                                       inOligos >> group;
-                    
-                    //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
-                    //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
-                    
-                    string temp = "";
-                    while (!inOligos.eof())    {
-                                               char c = inOligos.get();
-                                               if (c == 10 || c == 13 || c == -1){     break;  }
-                                               else if (c == 32 || c == 9){;} //space or tab
-                                               else {  temp += c;  }
-                                       }
-                                       
-                    //then this is illumina data with 4 columns
-                    if (temp != "") {
-                        hasPairedBarcodes = true;
-                        string reverseBarcode = group; //reverseOligo(group); //reverse barcode
-                        group = temp;
-                        
-                        for(int i=0;i<reverseBarcode.length();i++){
-                            reverseBarcode[i] = toupper(reverseBarcode[i]);
-                            if(reverseBarcode[i] == 'U')       {       reverseBarcode[i] = 'T';        }
-                        }
-                        
-                        reverseBarcode = reverseOligo(reverseBarcode);
-                        oligosPair newPair(oligo, reverseBarcode);
-                        
-                        if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
-                        //check for repeat barcodes
-                        string tempPair = oligo+reverseBarcode;
-                        if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
-                        else { uniqueBarcodes.insert(tempPair); }
-                        
-                        pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
-                        barcodeNameVector.push_back(group);
-                    }else {
-                        //check for repeat barcodes
-                        map<string, int>::iterator itBar = barcodes.find(oligo);
-                        if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
-                        
-                        barcodes[oligo]=indexBarcode; indexBarcode++;
-                        barcodeNameVector.push_back(group);
-                    }
-                               }else if(type == "LINKER"){
-                                       linker.push_back(oligo);
-                               }else if(type == "SPACER"){
-                                       spacer.push_back(oligo);
-                               }
-                               else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
-                       }
-                       m->gobble(inOligos);
-               }
-               inOligos.close();
-               
-        if (hasPairedBarcodes || hasPrimer) {
+        bool allBlank = false;
+        oligos.read(oligosfile);
+        
+        if (m->control_pressed) { return false; } //error in reading oligos
+        
+        if (oligos.hasPairedBarcodes()) {
             pairedOligos = true;
-            if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true;  m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine();  return 0; }
+            numPrimers = oligos.getPairedPrimers().size();
+            numBarcodes = oligos.getPairedBarcodes().size();
+        }else {
+            pairedOligos = false;
+            numPrimers = oligos.getPrimers().size();
+            numBarcodes = oligos.getBarcodes().size();
         }
         
-               //add in potential combos
-               if(barcodeNameVector.size() == 0){
-                       barcodes[""] = 0;
-                       barcodeNameVector.push_back("");
-               }
-               
-               if(primerNameVector.size() == 0){
-                       primers[""] = 0;
-                       primerNameVector.push_back("");
-               }
-               
-               fastqFileNames.resize(barcodeNameVector.size());
+        numLinkers = oligos.getLinkers().size();
+        numSpacers = oligos.getSpacers().size();
+        numRPrimers = oligos.getReversePrimers().size();
+        
+        vector<string> groupNames = oligos.getGroupNames();
+        if (groupNames.size() == 0) { allBlank = true;  }
+        
+        
+        fastqFileNames.resize(oligos.getBarcodeNames().size());
                for(int i=0;i<fastqFileNames.size();i++){
-                       fastqFileNames[i].assign(primerNameVector.size(), "");
+            for(int j=0;j<oligos.getPrimerNames().size();j++){  fastqFileNames[i].push_back(""); }
                }
-               
-               
-                       set<string> uniqueNames; //used to cleanup outputFileNames
-            if (pairedOligos) {
-                for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
-                    for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
-                        
-                        string primerName = primerNameVector[itPrimer->first];
-                        string barcodeName = barcodeNameVector[itBar->first];
+        
+        set<string> uniqueNames; //used to cleanup outputFileNames
+        if (pairedOligos) {
+            map<int, oligosPair> barcodes = oligos.getPairedBarcodes();
+            map<int, oligosPair> primers = oligos.getPairedPrimers();
+            for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+                for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
+                    
+                    string primerName = oligos.getPrimerName(itPrimer->first);
+                    string barcodeName = oligos.getBarcodeName(itBar->first);
+                    
+                    if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+                    else if ((primerName == "") && (barcodeName == "")) { } //do nothing
+                    else {
+                        string comboGroupName = "";
+                        string fastaFileName = "";
+                        string qualFileName = "";
+                        string nameFileName = "";
+                        string countFileName = "";
                         
-                        if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
-                        else {
-                            string comboGroupName = "";
-                            string fastqFileName = "";
-                            
-                            if(primerName == ""){
-                                comboGroupName = barcodeNameVector[itBar->first];
+                        if(primerName == ""){
+                            comboGroupName = barcodeName;
+                        }else{
+                            if(barcodeName == ""){
+                                comboGroupName = primerName;
                             }
                             else{
-                                if(barcodeName == ""){
-                                    comboGroupName = primerNameVector[itPrimer->first];
-                                }
-                                else{
-                                    comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
-                                }
+                                comboGroupName = barcodeName + "." + primerName;
                             }
-                            
-                            
-                            ofstream temp;
-                            map<string, string> variables;
-                            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
-                            variables["[group]"] = comboGroupName;
-                            fastqFileName = getOutputFileName("fastq", variables);
-                            if (uniqueNames.count(fastqFileName) == 0) {
-                                outputNames.push_back(fastqFileName);
-                                outputTypes["fastq"].push_back(fastqFileName);
-                                uniqueNames.insert(fastqFileName);
-                            }
-                            
-                            fastqFileNames[itBar->first][itPrimer->first] = fastqFileName;
-                            m->openOutputFile(fastqFileName, temp);            temp.close();
-                            
                         }
+                        
+                        ofstream temp;
+                        map<string, string> variables;
+                        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+                        variables["[group]"] = comboGroupName;
+                        string fastqFileName = getOutputFileName("fastq", variables);
+                        if (uniqueNames.count(fastqFileName) == 0) {
+                            outputNames.push_back(fastqFileName);
+                            outputTypes["fastq"].push_back(fastqFileName);
+                            uniqueNames.insert(fastqFileName);
+                        }
+                        
+                        fastqFileNames[itBar->first][itPrimer->first] = fastqFileName;
+                        m->openOutputFile(fastqFileName, temp);                temp.close();
                     }
                 }
-            }else {
-                for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
-                    for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
-                        
-                        string primerName = primerNameVector[itPrimer->second];
-                        string barcodeName = barcodeNameVector[itBar->second];
+            }
+        }else {
+            map<string, int> barcodes = oligos.getBarcodes() ;
+            map<string, int> primers = oligos.getPrimers();
+            for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+                for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
+                    
+                    string primerName = oligos.getPrimerName(itPrimer->second);
+                    string barcodeName = oligos.getBarcodeName(itBar->second);
+                   
+                    if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+                    else if ((primerName == "") && (barcodeName == "")) { } //do nothing
+                    else {
+                        string comboGroupName = "";
+                        string fastaFileName = "";
+                        string qualFileName = "";
+                        string nameFileName = "";
+                        string countFileName = "";
                         
-                        if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
-                        else {
-                            string comboGroupName = "";
-                            string fastqFileName = "";
-                            
-                            if(primerName == ""){
-                                comboGroupName = barcodeNameVector[itBar->second];
+                        if(primerName == ""){
+                            comboGroupName = barcodeName;
+                        }else{
+                            if(barcodeName == ""){
+                                comboGroupName = primerName;
                             }
                             else{
-                                if(barcodeName == ""){
-                                    comboGroupName = primerNameVector[itPrimer->second];
-                                }
-                                else{
-                                    comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
-                                }
+                                comboGroupName = barcodeName + "." + primerName;
                             }
-                            
-                            
-                            ofstream temp;
-                            map<string, string> variables;
-                            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
-                            variables["[group]"] = comboGroupName;
-                            fastqFileName = getOutputFileName("fastq", variables);
-                            if (uniqueNames.count(fastqFileName) == 0) {
-                                outputNames.push_back(fastqFileName);
-                                outputTypes["fastq"].push_back(fastqFileName);
-                                uniqueNames.insert(fastqFileName);
-                            }
-                            
-                            fastqFileNames[itBar->second][itPrimer->second] = fastqFileName;
-                            m->openOutputFile(fastqFileName, temp);            temp.close();
-                            
                         }
+                        
+                        ofstream temp;
+                        map<string, string> variables;
+                        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
+                        variables["[group]"] = comboGroupName;
+                        string fastqFileName = getOutputFileName("fastq", variables);
+                        if (uniqueNames.count(fastqFileName) == 0) {
+                            outputNames.push_back(fastqFileName);
+                            outputTypes["fastq"].push_back(fastqFileName);
+                            uniqueNames.insert(fastqFileName);
+                        }
+                        
+                        fastqFileNames[itBar->second][itPrimer->second] = fastqFileName;
+                        m->openOutputFile(fastqFileName, temp);                temp.close();
                     }
                 }
             }
-               
+        }
+        
+        if (allBlank) {
+            m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
+            return false;
+        }
+       
         ofstream temp;
         map<string, string> variables;
         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaQFile));
         variables["[group]"] = "scrap";
         noMatchFile = getOutputFileName("fastq", variables);
         m->openOutputFile(noMatchFile, temp);          temp.close();
-
+       
                return true;
                
        }
@@ -859,7 +750,6 @@ bool ParseFastaQCommand::readGroup(string groupfile){
                 ofstream temp;
                 m->openOutputFileBinary(thisFilename, temp); temp.close();
                 fastqFileNames[i].push_back(thisFilename);
-                barcodes[groups[i]] = i;
             }
         }
         
@@ -877,48 +767,6 @@ bool ParseFastaQCommand::readGroup(string groupfile){
                exit(1);
        }
 }
-//********************************************************************/
-string ParseFastaQCommand::reverseOligo(string oligo){
-       try {
-        string reverse = "";
-        
-        for(int i=oligo.length()-1;i>=0;i--){
-            
-            if(oligo[i] == 'A')                {       reverse += 'T'; }
-            else if(oligo[i] == 'T'){  reverse += 'A'; }
-            else if(oligo[i] == 'U'){  reverse += 'A'; }
-            
-            else if(oligo[i] == 'G'){  reverse += 'C'; }
-            else if(oligo[i] == 'C'){  reverse += 'G'; }
-            
-            else if(oligo[i] == 'R'){  reverse += 'Y'; }
-            else if(oligo[i] == 'Y'){  reverse += 'R'; }
-            
-            else if(oligo[i] == 'M'){  reverse += 'K'; }
-            else if(oligo[i] == 'K'){  reverse += 'M'; }
-            
-            else if(oligo[i] == 'W'){  reverse += 'W'; }
-            else if(oligo[i] == 'S'){  reverse += 'S'; }
-            
-            else if(oligo[i] == 'B'){  reverse += 'V'; }
-            else if(oligo[i] == 'V'){  reverse += 'B'; }
-            
-            else if(oligo[i] == 'D'){  reverse += 'H'; }
-            else if(oligo[i] == 'H'){  reverse += 'D'; }
-            
-            else                                               {       reverse += 'N'; }
-        }
-        
-        
-        return reverse;
-    }
-       catch(exception& e) {
-               m->errorOut(e, "ParseFastaQCommand", "reverseOligo");
-               exit(1);
-       }
-}
-
-
 //**********************************************************************************************************************
 
 
index 2ac141626b4b528d352d2860dc64e65826e22357..60c80f48f6f45015d5d493aa7b4e8c772ee920bd 100644 (file)
@@ -15,6 +15,7 @@
 #include "trimoligos.h"
 #include "sequence.hpp"
 #include "groupmap.h"
+#include "oligos.h"
 
 struct fastqRead2 {
     string quality;
@@ -50,16 +51,11 @@ private:
 
        vector<string> outputNames;     
        string outputDir, fastaQFile, format, oligosfile, groupfile;
-       bool abort, fasta, qual, pacbio, pairedOligos;
-    int pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, split;
+       bool abort, fasta, qual, pacbio, pairedOligos, reorient;
+    int pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, split, numBarcodes, numPrimers, numLinkers, numSpacers, numRPrimers;
     GroupMap* groupMap;
+    Oligos oligos;
     
-    //oligos file data structures
-    vector<string> linker, spacer, primerNameVector, barcodeNameVector, revPrimer;
-       map<string, int> barcodes;
-       map<string, int> primers;
-    map<int, oligosPair> pairedBarcodes;
-    map<int, oligosPair> pairedPrimers;
     vector<vector<string> > fastqFileNames;
     string noMatchFile;
        
@@ -67,9 +63,8 @@ private:
     vector<char> convertTable;
     bool readOligos(string oligosFile);
     bool readGroup(string oligosFile);
-    string reverseOligo(string oligo);
     fastqRead2 readFastq(ifstream&, bool&);
-    int findGroup(fastqRead2, int&, int&, TrimOligos*&, int, int);
+    int findGroup(fastqRead2, int&, int&, TrimOligos*&, TrimOligos*&, int, int);
     int findGroup(fastqRead2, int&, int&, string);
     
 };
index ba9062437d78df8aa337bc5948b603b1acf8e351..ad69961b80fb43bad986000cabb2efae71fcfe92 100644 (file)
@@ -16,6 +16,7 @@
 #include "alignment.hpp"
 #include "needlemanoverlap.hpp"
 #include "counttable.h"
+#include "oligos.h"
 
 class PcrSeqsCommand : public Command {
 public:
@@ -45,25 +46,23 @@ private:
     };
     
     vector<linePair> lines;
-       bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
-    bool abort, keepprimer, keepdots, fileAligned;
+    bool abort, keepprimer, keepdots, fileAligned, pairedOligos;
        string fastafile, oligosfile, taxfile, groupfile, namefile, countfile, ecolifile, outputDir, nomatch;
-       int start, end, processors, length, pdiffs;
+       int start, end, processors, length, pdiffs, numFPrimers, numRPrimers;
+    Oligos oligos;
        
-    vector<string> revPrimer, outputNames;
-       map<string, int> primers;
+    vector<string> outputNames;
     
     int writeAccnos(set<string>);
     int readName(set<string>&);
     int readGroup(set<string>);
     int readTax(set<string>);
     int readCount(set<string>);
-    bool readOligos();
+    int readOligos();
     bool readEcoli();
        int driverPcr(string, string, string, string, set<string>&, linePair, int&, bool&);
        int createProcesses(string, string, string, set<string>&);
     bool isAligned(string, map<int, int>&);
-    string reverseOligo(string);
     int adjustDots(string, string, int, int);
     
 };
@@ -79,22 +78,17 @@ struct pcrData {
        unsigned long long fend;
        int count, start, end, length, pdiffs, pstart, pend;
        MothurOut* m;
-       map<string, int> primers;
-    vector<string> revPrimer;
     set<string> badSeqNames;
     bool keepprimer, keepdots, fileAligned, adjustNeeded;
        
-       
        pcrData(){}
-       pcrData(string f, string gf, string bfn, string loc, MothurOut* mout, string ol, string ec, map<string, int> pr, vector<string> rpr, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) {
+       pcrData(string f, string gf, string bfn, string loc, MothurOut* mout, string ol, string ec, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) {
                filename = f;
         goodFasta = gf;
         badFasta = bfn;
                m = mout;
         oligosfile = ol;
         ecolifile = ec;
-        primers = pr;
-        revPrimer = rpr;
         nomatch = nm;
         keepprimer = kp;
         keepdots = kd;
@@ -144,7 +138,26 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
         map<string, int> faked;
         set<int> locations; //locations = beginning locations
         
-        TrimOligos trim(pDataArray->pdiffs, 0, pDataArray->primers, faked, pDataArray->revPrimer);
+        Oligos oligos(pDataArray->oligosfile);
+        int numFPrimers, numRPrimers;
+        map<string, int> primers;
+        map<string, int> barcodes; //not used
+        vector<string> revPrimer;
+        if (oligos.hasPairedBarcodes()) {
+            numFPrimers = oligos.getPairedPrimers().size();
+            map<int, oligosPair> primerPairs = oligos.getPairedPrimers();
+            for (map<int, oligosPair>::iterator it = primerPairs.begin(); it != primerPairs.end(); it++) {
+                primers[(it->second).forward] = it->first;
+                revPrimer.push_back((it->second).reverse);
+            }
+        }else {
+            numFPrimers = oligos.getPrimers().size();
+            primers = oligos.getPrimers();
+            revPrimer = oligos.getReversePrimers();
+        }
+        numRPrimers = oligos.getReversePrimers().size();
+        
+        TrimOligos trim(pDataArray->pdiffs, 0, primers, barcodes, revPrimer);
                
                for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
             pDataArray->count++;
@@ -179,7 +192,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                     ///////////////////////////////////////////////////////////////
                     
                     //process primers
-                    if (pDataArray->primers.size() != 0) {
+                    if (numFPrimers != 0) {
                         int primerStart = 0; int primerEnd = 0;
                         bool good = trim.findForward(currSeq, primerStart, primerEnd);
                         
@@ -224,7 +237,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                     }
                     
                     //process reverse primers
-                    if (pDataArray->revPrimer.size() != 0) {
+                    if (numRPrimers != 0) {
                         int primerStart = 0; int primerEnd = 0;
                         bool good = trim.findReverse(currSeq, primerStart, primerEnd);
                          
@@ -327,7 +340,7 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
         
         if (pDataArray->fileAligned && !pDataArray->keepdots) { //print out smallest start value and largest end value
             if (locations.size() > 1) { pDataArray->adjustNeeded = true; }
-            if (pDataArray->primers.size() != 0)    {   set<int>::iterator it = locations.begin();  pDataArray->pstart = *it;  }
+            if (numFPrimers != 0)    {   set<int>::iterator it = locations.begin();  pDataArray->pstart = *it;  }
         }
         
         return 0;
index ee15bed0a2362a9f9921cd45789ebb83384c0ea5..722aca537d4b88ddc38c5e1442d348c5ba11dc03 100644 (file)
@@ -312,7 +312,7 @@ int PcrSeqsCommand::execute(){
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
         int start = time(NULL);
-        fileAligned = true;
+        fileAligned = true; pairedOligos = false;
         
         string thisOutputDir = outputDir;
                if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
@@ -326,7 +326,7 @@ int PcrSeqsCommand::execute(){
                
                
         length = 0;
-               if(oligosfile != ""){    readOligos();     if (m->debug) { m->mothurOut("[DEBUG]: read oligos file. numprimers = " + toString(primers.size()) + ", revprimers = " + toString(revPrimer.size()) + ".\n"); } }  if (m->control_pressed) {  return 0; }
+               if(oligosfile != ""){    readOligos();     if (m->debug) { m->mothurOut("[DEBUG]: read oligos file. numprimers = " + toString(numFPrimers) + ", revprimers = " + toString(numRPrimers) + ".\n"); } }  if (m->control_pressed) {  return 0; }
         if(ecolifile != "") {    readEcoli();      }  if (m->control_pressed) {  return 0; }
         
         vector<unsigned long long> positions; 
@@ -529,7 +529,7 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string
             if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
             
                        // Allocate memory for thread data.
-                       pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, locationsFile+extension, m, oligosfile, ecolifile, primers, revPrimer, nomatch, keepprimer, keepdots, start, end, length, pdiffs, lines[i].start, lines[i].end);
+                       pcrData* tempPcr = new pcrData(filename, goodFileName+extension, badFileName+extension, locationsFile+extension, m, oligosfile, ecolifile, nomatch, keepprimer, keepdots, start, end, length, pdiffs, lines[i].start, lines[i].end);
                        pDataArray.push_back(tempPcr);
                        
                        //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
@@ -586,8 +586,8 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string
                 
                 string name = "";
                 int thisStart = -1; int thisEnd = -1;
-                if (primers.size() != 0)    { inLocations >> name >> thisStart; m->gobble(inLocations); }
-                if (revPrimer.size() != 0)  { inLocations >> name >> thisEnd;   m->gobble(inLocations); }
+                if (numFPrimers != 0)    { inLocations >> name >> thisStart; m->gobble(inLocations); }
+                if (numRPrimers != 0)  { inLocations >> name >> thisEnd;   m->gobble(inLocations); }
                 else { pend = -1; break; }
                 
                 int myDiff = 0;
@@ -642,8 +642,22 @@ int PcrSeqsCommand::driverPcr(string filename, string goodFasta, string badFasta
         set<int> locations; //locations[0] = beginning locations, 
         
         //pdiffs, bdiffs, primers, barcodes, revPrimers
-        map<string, int> faked;
-        TrimOligos trim(pdiffs, 0, primers, faked, revPrimer);
+        map<string, int> primers;
+        map<string, int> barcodes; //not used
+        vector<string> revPrimer;
+        if (pairedOligos) {
+            map<int, oligosPair> primerPairs = oligos.getPairedPrimers();
+            for (map<int, oligosPair>::iterator it = primerPairs.begin(); it != primerPairs.end(); it++) {
+                primers[(it->second).forward] = it->first;
+                revPrimer.push_back((it->second).reverse);
+            }
+            if (pdiffs != 0) { m->mothurOut("[WARNING]: Pcr.seqs is only designed to allow diffs in forward primers. Reverse primers must be an exact match.\n"); }
+        }else{
+            primers = oligos.getPrimers();
+            revPrimer = oligos.getReversePrimers();
+        }
+        
+        TrimOligos trim(pdiffs, 0, primers, barcodes, revPrimer);
         
                while (!done) {
             
@@ -871,8 +885,8 @@ int PcrSeqsCommand::adjustDots(string goodFasta, string locations, int pstart, i
             
             string name = "";
             int thisStart = -1; int thisEnd = -1;
-            if (primers.size() != 0)    { inLocations >> name >> thisStart; m->gobble(inLocations); }
-            if (revPrimer.size() != 0)  { inLocations >> name >> thisEnd;   m->gobble(inLocations); }
+            if (numFPrimers != 0)    { inLocations >> name >> thisStart; m->gobble(inLocations); }
+            if (numRPrimers != 0)  { inLocations >> name >> thisEnd;   m->gobble(inLocations); }
             
             
             //cout << seq.getName() << '\t' << thisStart << '\t' << thisEnd << '\t' << seq.getAligned().length() << endl;
@@ -926,130 +940,6 @@ int PcrSeqsCommand::adjustDots(string goodFasta, string locations, int pstart, i
         exit(1);
     }
 }
-//********************************************************************/
-string PcrSeqsCommand::reverseOligo(string oligo){
-       try {
-        string reverse = "";
-       
-        for(int i=oligo.length()-1;i>=0;i--){
-            
-            if(oligo[i] == 'A')                {       reverse += 'T'; }
-            else if(oligo[i] == 'T'){  reverse += 'A'; }
-            else if(oligo[i] == 'U'){  reverse += 'A'; }
-            
-            else if(oligo[i] == 'G'){  reverse += 'C'; }
-            else if(oligo[i] == 'C'){  reverse += 'G'; }
-            
-            else if(oligo[i] == 'R'){  reverse += 'Y'; }
-            else if(oligo[i] == 'Y'){  reverse += 'R'; }
-            
-            else if(oligo[i] == 'M'){  reverse += 'K'; }
-            else if(oligo[i] == 'K'){  reverse += 'M'; }
-            
-            else if(oligo[i] == 'W'){  reverse += 'W'; }
-            else if(oligo[i] == 'S'){  reverse += 'S'; }
-            
-            else if(oligo[i] == 'B'){  reverse += 'V'; }
-            else if(oligo[i] == 'V'){  reverse += 'B'; }
-            
-            else if(oligo[i] == 'D'){  reverse += 'H'; }
-            else if(oligo[i] == 'H'){  reverse += 'D'; }
-
-            else                                               {       reverse += 'N'; }
-        }
-
-        
-        return reverse;
-    }
-       catch(exception& e) {
-               m->errorOut(e, "PcrSeqsCommand", "reverseOligo");
-               exit(1);
-       }
-}
-
-//***************************************************************************************************************
-bool PcrSeqsCommand::readOligos(){
-       try {
-               ifstream inOligos;
-               m->openInputFile(oligosfile, inOligos);
-               
-               string type, oligo, group;
-        int primerCount = 0;
-               
-               while(!inOligos.eof()){
-            
-                       inOligos >> type; 
-            
-                       if(type[0] == '#'){ //ignore
-                               while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
-                               m->gobble(inOligos);
-                       }else{
-                               m->gobble(inOligos);
-                               //make type case insensitive
-                               for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
-                               
-                               inOligos >> oligo;
-                               
-                               for(int i=0;i<oligo.length();i++){
-                                       oligo[i] = toupper(oligo[i]);
-                                       if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
-                               }
-                               
-                               if(type == "FORWARD"){
-                                       // get rest of line in case there is a primer name
-                                       while (!inOligos.eof()) {       
-                        char c = inOligos.get(); 
-                        if (c == 10 || c == 13 || c == -1){    break;  }
-                        else if (c == 32 || c == 9){;} //space or tab
-                                       } 
-                                       primers[oligo] = primerCount; primerCount++;
-                    //cout << "for oligo = " << oligo  << endl;
-                }else if(type == "REVERSE"){
-                    string oligoRC = reverseOligo(oligo);
-                    revPrimer.push_back(oligoRC);
-                    //cout << "rev oligo = " << oligo << " reverse = " << oligoRC << endl;
-                               }else if(type == "BARCODE"){
-                    inOligos >> group;
-                }else if(type == "PRIMER"){
-                                       m->gobble(inOligos);
-                    primers[oligo] = primerCount; primerCount++;
-                                       
-                    string roligo="";
-                    inOligos >> roligo;
-                    
-                    for(int i=0;i<roligo.length();i++){
-                        roligo[i] = toupper(roligo[i]);
-                        if(roligo[i] == 'U')   {       roligo[i] = 'T';        }
-                    }
-                    revPrimer.push_back(reverseOligo(roligo));
-                    
-                    // get rest of line in case there is a primer name
-                                       while (!inOligos.eof()) {
-                        char c = inOligos.get();
-                        if (c == 10 || c == 13 || c == -1){    break;  }
-                        else if (c == 32 || c == 9){;} //space or tab
-                                       }
-                    //cout << "prim oligo = " << oligo << " reverse = " << roligo << endl;
-                               }else if((type == "LINKER")||(type == "SPACER")) {;}
-                               else{   m->mothurOut(type + " is not recognized as a valid type. Choices are primer, forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
-                       }
-                       m->gobble(inOligos);
-               }       
-               inOligos.close();
-               
-               if ((primers.size() == 0) && (revPrimer.size() == 0)) {
-                       m->mothurOut("[ERROR]: your oligos file does not contain valid primers or reverse primers.  Please correct."); m->mothurOutEndLine();
-            m->control_pressed = true;
-                       return false;
-               }
-        
-        return true;
-        
-    }catch(exception& e) {
-               m->errorOut(e, "PcrSeqsCommand", "readOligos");
-               exit(1);
-       }
-}
 //***************************************************************************************************************
 bool PcrSeqsCommand::readEcoli(){
        try {
@@ -1321,6 +1211,35 @@ int PcrSeqsCommand::readCount(set<string> badSeqNames){
                exit(1);
        }
 }
+//***************************************************************************************************************
+
+int PcrSeqsCommand::readOligos(){
+       try {
+        oligos.read(oligosfile);
+        
+        if (m->control_pressed) { return false; } //error in reading oligos
+        
+        if (oligos.hasPairedBarcodes()) {
+            pairedOligos = true;
+            numFPrimers = oligos.getPairedPrimers().size();
+        }else {
+            pairedOligos = false;
+            numFPrimers = oligos.getPrimers().size();
+        }
+        numRPrimers = oligos.getReversePrimers().size();
+        
+        if (oligos.getLinkers().size() != 0) { m->mothurOut("[WARNING]: pcr.seqs is not setup to remove linkers, ignoring.\n"); }
+        if (oligos.getSpacers().size() != 0) { m->mothurOut("[WARNING]: pcr.seqs is not setup to remove spacers, ignoring.\n"); }
+
+        return true;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "PcrSeqsCommand", "readOligos");
+               exit(1);
+       }
+}
+
 /**************************************************************************************/
 
 
index 3a1687bf9e174d77ddded9eaf331d6f1a78da734..916dab3a59743e4bcb860bd9891dcfbda74504b4 100644 (file)
@@ -23,7 +23,19 @@ QualityScores::QualityScores(){
                exit(1);
        }                                                       
 }
+/**************************************************************************************************/
 
+QualityScores::QualityScores(string n, vector<int> s){
+       try {
+               m = MothurOut::getInstance();
+               setName(n);
+        setScores(s);
+       }
+       catch(exception& e) {
+               m->errorOut(e, "QualityScores", "QualityScores");
+               exit(1);
+       }
+}
 /**************************************************************************************************/
 
 QualityScores::QualityScores(ifstream& qFile){
@@ -344,10 +356,9 @@ double QualityScores::calculateAverage(bool logTransform){
         if (logTransform)   {  aveQScore += pow(10.0, qScores[i]);  }
         else                {  aveQScore += qScores[i];             }
        }
-       aveQScore /= (double) seqLength;
     
-    if (logTransform)   {  aveQScore = log10(aveQScore /(double) seqLength);  }
-    else                {  aveQScore /= (double) seqLength;                 }
+    if (logTransform)   {  aveQScore = log10(aveQScore /(double) seqLength);    }
+    else                {  aveQScore /= (double) seqLength;                     }
        
        return aveQScore;
 }
@@ -366,6 +377,8 @@ bool QualityScores::cullQualAverage(Sequence& sequence, double qAverage, bool lo
                        
                double aveQScore = calculateAverage(logTransform);
                
+        if (m->debug) { m->mothurOut("[DEBUG]: " + sequence.getName() + " average = " + toString(aveQScore) + "\n"); }
+        
                if(aveQScore >= qAverage)       {       success = 1;    }
                else                                            {       success = 0;    }
                
index a802636fd364071ab15aa84c148ebb38fead14b4..756b36ffce2cd217f46a6ca4c31060b897a02439 100644 (file)
@@ -22,6 +22,7 @@
 class QualityScores {
 public:
        QualityScores();
+    QualityScores(string n, vector<int> qs);
        QualityScores(ifstream&);
        string getName();
        int getLength(){    return (int)qScores.size();  }
index 26d48320e685d28e94e8e5dc417e875484710b13..dc8eb54abb749bffe4d0ffab55e53331f1933b32 100755 (executable)
@@ -18,6 +18,7 @@ vector<string> SffInfoCommand::setParameters(){
        try {           \r
                CommandParameter psff("sff", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(psff);\r
         CommandParameter poligos("oligos", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(poligos);\r
+        CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);\r
         CommandParameter pgroup("group", "InputTypes", "", "", "oligosGroup", "none", "none","",false,false); parameters.push_back(pgroup);\r
                CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);\r
                CommandParameter psfftxt("sfftxt", "String", "", "", "", "", "","",false,false); parameters.push_back(psfftxt);\r
@@ -47,7 +48,7 @@ string SffInfoCommand::getHelpString(){
        try {\r
                string helpString = "";\r
                helpString += "The sffinfo command reads a sff file and extracts the sequence data, or you can use it to parse a sfftxt file.\n";\r
-               helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, group, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs and trim. sff is required. \n";\r
+               helpString += "The sffinfo command parameters are sff, fasta, qfile, accnos, flow, sfftxt, oligos, group, bdiffs, tdiffs, ldiffs, sdiffs, pdiffs, checkorient and trim. sff is required. \n";\r
                helpString += "The sff parameter allows you to enter the sff file you would like to extract data from.  You may enter multiple files by separating them by -'s.\n";\r
                helpString += "The fasta parameter allows you to indicate if you would like a fasta formatted file generated.  Default=True. \n";\r
                helpString += "The qfile parameter allows you to indicate if you would like a quality file generated.  Default=True. \n";\r
@@ -58,6 +59,7 @@ string SffInfoCommand::getHelpString(){
                helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";\r
         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";\r
                helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";\r
+        helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. The default is false.\n";\r
                helpString += "The flow parameter allows you to indicate if you would like a flowgram file generated.  Default=True. \n";\r
                helpString += "The sfftxt parameter allows you to indicate if you would like a sff.txt file generated.  Default=False. \n";\r
                helpString += "If you want to parse an existing sfftxt file into flow, fasta and quality file, enter the file name using the sfftxt parameter. \n";\r
@@ -492,6 +494,8 @@ SffInfoCommand::SffInfoCommand(string option)  {
                                else {  m->mothurOut("[ERROR]: you must provide a valid sff or sfftxt file."); m->mothurOutEndLine(); abort=true;  }\r
                        }\r
             \r
+            temp = validParameter.validFile(parameters, "checkorient", false);         if (temp == "not found") { temp = "F"; }\r
+                       reorient = m->isTrue(temp);\r
             \r
                }\r
        }\r
@@ -563,13 +567,22 @@ int SffInfoCommand::execute(){
 //**********************************************************************************************************************\r
 int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){\r
        try {\r
+        oligosObject = new Oligos();\r
                currentFileName = input;\r
                if (outputDir == "") {  outputDir += m->hasPath(input); }\r
                \r
                if (accnos != "")       {  readAccnosFile(accnos);  }\r
                else                            {       seqNames.clear();               }\r
         \r
-        if (hasOligos)   {   readOligos(oligos);    split = 2;      }\r
+        TrimOligos* trimOligos = NULL; TrimOligos* rtrimOligos = NULL;\r
+        if (hasOligos)   {\r
+            readOligos(oligos);    split = 2;\r
+            if (m->control_pressed) { delete oligosObject; return 0; }\r
+            trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligosObject->getPrimers(), oligosObject->getBarcodes(), oligosObject->getReversePrimers(), oligosObject->getLinkers(), oligosObject->getSpacers());  numFPrimers = oligosObject->getPrimers().size(); numBarcodes = oligosObject->getBarcodes().size();\r
+            if (reorient) {\r
+                rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligosObject->getReorientedPairedPrimers(), oligosObject->getReorientedPairedBarcodes()); numBarcodes = oligosObject->getReorientedPairedBarcodes().size();\r
+            }\r
+        }\r
         if (hasGroup)    {   readGroup(oligos);     split = 2;      }\r
         \r
                ofstream outSfftxt, outFasta, outQual, outFlow;\r
@@ -599,8 +612,8 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){
                int count = 0;\r
                \r
                //check magic number and version\r
-               if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); return count; }\r
-               if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); return count; }\r
+               if (header.magicNumber != 779314790) { m->mothurOut("Magic Number is not correct, not a valid .sff file"); m->mothurOutEndLine(); delete oligosObject; if (hasOligos)   { delete trimOligos; if (reorient) { delete  rtrimOligos; } } return count; }\r
+               if (header.version != "0001") { m->mothurOut("Version is not supported, only support version 0001."); m->mothurOutEndLine(); delete oligosObject; if (hasOligos)   { delete trimOligos; if (reorient) { delete  rtrimOligos; } } return count; }\r
        \r
                //print common header\r
                if (sfftxt) {   printCommonHeader(outSfftxt, header);           }\r
@@ -613,7 +626,7 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){
                                                \r
                        //read data\r
                        seqRead read;  Header readheader;\r
-            readSeqData(in, read, header.numFlowsPerRead, readheader);\r
+            readSeqData(in, read, header.numFlowsPerRead, readheader, trimOligos, rtrimOligos);\r
             \r
             bool okay = sanityCheck(readheader, read);\r
             if (!okay) { break; }\r
@@ -700,6 +713,9 @@ int SffInfoCommand::extractSffInfo(string input, string accnos, string oligos){
             else { outputNames.push_back(noMatchFile); outputTypes["sff"].push_back(noMatchFile); }\r
         }\r
         \r
+        delete oligosObject;\r
+        if (hasOligos)   { delete trimOligos; if (reorient) { delete  rtrimOligos; } }\r
+        \r
                return count;\r
        }\r
        catch(exception& e) {\r
@@ -1036,7 +1052,7 @@ int SffInfoCommand::adjustCommonHeader(CommonHeader header){
        }\r
 }\r
 //**********************************************************************************************************************\r
-bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header){\r
+bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads, Header& header, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos){\r
        try {\r
         unsigned long long startSpotInFile = in.tellg();\r
                if (!in.eof()) {\r
@@ -1144,7 +1160,7 @@ bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads,
                \r
                 int barcodeIndex, primerIndex, trashCodeLength;\r
                 \r
-                if (hasOligos)      {  trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex);                }\r
+                if (hasOligos)      {  trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex, trimOligos, rtrimOligos);                }\r
                 else if (hasGroup)  {  trashCodeLength = findGroup(header, read, barcodeIndex, primerIndex, "groupMode");   }\r
                 else {  m->mothurOut("[ERROR]: uh oh, we shouldn't be here...\n"); }\r
 \r
@@ -1189,10 +1205,8 @@ bool SffInfoCommand::readSeqData(ifstream& in, seqRead& read, int numFlowReads,
        }\r
 }\r
 //**********************************************************************************************************************\r
-int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer) {\r
+int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& primer, TrimOligos*& trimOligos, TrimOligos*& rtrimOligos) {\r
        try {\r
-        //find group read belongs to\r
-        TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);\r
         \r
         int success = 1;\r
         string trashCode = "";\r
@@ -1229,55 +1243,81 @@ int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& pr
         Sequence currSeq(header.name, seq);\r
         QualityScores currQual;\r
         \r
+        //for reorient\r
+        Sequence savedSeq(currSeq.getName(), currSeq.getAligned());\r
+        QualityScores savedQual(currQual.getName(), currQual.getScores());\r
+        \r
         if(numLinkers != 0){\r
-            success = trimOligos.stripLinker(currSeq, currQual);\r
+            success = trimOligos->stripLinker(currSeq, currQual);\r
             if(success > ldiffs)               {       trashCode += 'k';       }\r
             else{ currentSeqsDiffs += success;  }\r
             \r
         }\r
         \r
-        if(barcodes.size() != 0){\r
-            success = trimOligos.stripBarcode(currSeq, currQual, barcode);\r
+        if(numBarcodes != 0){\r
+            success = trimOligos->stripBarcode(currSeq, currQual, barcode);\r
             if(success > bdiffs)               {       trashCode += 'b';       }\r
             else{ currentSeqsDiffs += success;  }\r
         }\r
         \r
         if(numSpacers != 0){\r
-            success = trimOligos.stripSpacer(currSeq, currQual);\r
+            success = trimOligos->stripSpacer(currSeq, currQual);\r
             if(success > sdiffs)               {       trashCode += 's';       }\r
             else{ currentSeqsDiffs += success;  }\r
             \r
         }\r
         \r
         if(numFPrimers != 0){\r
-            success = trimOligos.stripForward(currSeq, currQual, primer, true);\r
+            success = trimOligos->stripForward(currSeq, currQual, primer, true);\r
             if(success > pdiffs)               {       trashCode += 'f';       }\r
             else{ currentSeqsDiffs += success;  }\r
         }\r
         \r
         if (currentSeqsDiffs > tdiffs) {       trashCode += 't';   }\r
         \r
-        if(revPrimer.size() != 0){\r
-            success = trimOligos.stripReverse(currSeq, currQual);\r
+        if(numRPrimers != 0){\r
+            success = trimOligos->stripReverse(currSeq, currQual);\r
             if(!success)                               {       trashCode += 'r';       }\r
         }\r
 \r
-        if (trashCode.length() == 0) { //is this sequence in the ignore group\r
-            string thisGroup = "";\r
+        if (reorient && (trashCode != "")) { //if you failed and want to check the reverse\r
+            int thisSuccess = 0;\r
+            string thisTrashCode = "";\r
+            int thisCurrentSeqsDiffs = 0;\r
             \r
-            if(barcodes.size() != 0){\r
-                thisGroup = barcodeNameVector[barcode];\r
-                if (numFPrimers != 0) {\r
-                    if (primerNameVector[primer] != "") {\r
-                        if(thisGroup != "") {\r
-                            thisGroup += "." + primerNameVector[primer];\r
-                        }else {\r
-                            thisGroup = primerNameVector[primer];\r
-                        }\r
-                    }\r
-                }\r
+            int thisBarcodeIndex = 0;\r
+            int thisPrimerIndex = 0;\r
+            //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;\r
+            if(numBarcodes != 0){\r
+                thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);\r
+                if(thisSuccess > bdiffs)               { thisTrashCode += "b"; }\r
+                else{ thisCurrentSeqsDiffs += thisSuccess;  }\r
+            }\r
+            //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;\r
+            if(numFPrimers != 0){\r
+                thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, true);\r
+                if(thisSuccess > pdiffs)               { thisTrashCode += "f"; }\r
+                else{ thisCurrentSeqsDiffs += thisSuccess;  }\r
             }\r
             \r
+            if (thisCurrentSeqsDiffs > tdiffs) {       thisTrashCode += 't';   }\r
+            \r
+            if (thisTrashCode == "") {\r
+                trashCode = thisTrashCode;\r
+                success = thisSuccess;\r
+                currentSeqsDiffs = thisCurrentSeqsDiffs;\r
+                barcode = thisBarcodeIndex;\r
+                primer = thisPrimerIndex;\r
+                savedSeq.reverseComplement();\r
+                currSeq.setAligned(savedSeq.getAligned());\r
+                savedQual.flipQScores();\r
+                currQual.setScores(savedQual.getScores());\r
+            }else { trashCode += "(" + thisTrashCode + ")";  }\r
+        }\r
+\r
+        if (trashCode.length() == 0) { //is this sequence in the ignore group\r
+            string thisGroup = oligosObject->getGroupName(barcode, primer);\r
+            \r
             int pos = thisGroup.find("ignore");\r
             if (pos != string::npos) {  trashCode += "i"; }\r
         }\r
@@ -1297,12 +1337,6 @@ int SffInfoCommand::findGroup(Header header, seqRead read, int& barcode, int& pr
         \r
         string group = groupMap->getGroup(header.name);\r
         if (group == "not found") {     trashCode += "g";   } //scrap for group\r
-        else { //find file group\r
-            map<string, int>::iterator it = barcodes.find(group);\r
-            if (it != barcodes.end()) {\r
-                barcode = it->second;\r
-            }else { trashCode += "g"; }\r
-        }\r
         \r
         return trashCode.length();\r
     }\r
@@ -1833,121 +1867,59 @@ bool SffInfoCommand::readOligos(string oligoFile){
         numSplitReads.clear();\r
         filehandlesHeaders.clear();\r
         \r
-               ifstream inOligos;\r
-               m->openInputFile(oligoFile, inOligos);\r
-               \r
-               string type, oligo, group;\r
+               bool allBlank = false;\r
+        oligosObject->read(oligoFile);\r
         \r
-               int indexPrimer = 0;\r
-               int indexBarcode = 0;\r
-               \r
-               while(!inOligos.eof()){\r
-            \r
-                       inOligos >> type;\r
-            \r
-                       if(type[0] == '#'){\r
-                               while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there\r
-                               m->gobble(inOligos);\r
-                       }\r
-                       else{\r
-                               m->gobble(inOligos);\r
-                               //make type case insensitive\r
-                               for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }\r
-                               \r
-                               inOligos >> oligo;\r
-                               \r
-                               for(int i=0;i<oligo.length();i++){\r
-                                       oligo[i] = toupper(oligo[i]);\r
-                                       if(oligo[i] == 'U')     {       oligo[i] = 'T'; }\r
-                               }\r
-                               \r
-                               if(type == "FORWARD"){\r
-                                       group = "";\r
-                                       \r
-                                       // get rest of line in case there is a primer name\r
-                                       while (!inOligos.eof()) {\r
-                                               char c = inOligos.get();\r
-                                               if (c == 10 || c == 13 || c == -1){     break;  }\r
-                                               else if (c == 32 || c == 9){;} //space or tab\r
-                                               else {  group += c;  }\r
-                                       }\r
-                                       \r
-                                       //check for repeat barcodes\r
-                                       map<string, int>::iterator itPrime = primers.find(oligo);\r
-                                       if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }\r
-                                       \r
-                                       primers[oligo]=indexPrimer; indexPrimer++;\r
-                                       primerNameVector.push_back(group);\r
-                   \r
-                               }else if(type == "REVERSE"){\r
-                                       //Sequence oligoRC("reverse", oligo);\r
-                                       //oligoRC.reverseComplement();\r
-                    string oligoRC = reverseOligo(oligo);\r
-                                       revPrimer.push_back(oligoRC);\r
-                               }\r
-                               else if(type == "BARCODE"){\r
-                                       inOligos >> group;\r
-                    \r
-                                       \r
-                                       //check for repeat barcodes\r
-                                       map<string, int>::iterator itBar = barcodes.find(oligo);\r
-                                       if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }\r
-                    \r
-                                       barcodes[oligo]=indexBarcode; indexBarcode++;\r
-                                       barcodeNameVector.push_back(group);\r
-                               }else if(type == "LINKER"){\r
-                                       linker.push_back(oligo);\r
-                               }else if(type == "SPACER"){\r
-                                       spacer.push_back(oligo);\r
-                               }\r
-                               else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }\r
-                       }\r
-                       m->gobble(inOligos);\r
-               }\r
-               inOligos.close();\r
-    \r
-               if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ split = 1;      }\r
-    \r
-               //add in potential combos\r
-               if(barcodeNameVector.size() == 0){\r
-                       barcodes[""] = 0;\r
-                       barcodeNameVector.push_back("");\r
-               }\r
-               \r
-               if(primerNameVector.size() == 0){\r
-                       primers[""] = 0;\r
-                       primerNameVector.push_back("");\r
-               }\r
-               \r
-               filehandles.resize(barcodeNameVector.size());\r
+        if (m->control_pressed) { return false; } //error in reading oligos\r
+        \r
+        if (oligosObject->hasPairedBarcodes()) {\r
+            pairedOligos = true;\r
+            m->mothurOut("[ERROR]: sffinfo does not support paired barcodes and primers, aborting.\n"); m->control_pressed = true; return true;\r
+        }else {\r
+            pairedOligos = false;\r
+            numFPrimers = oligosObject->getPrimers().size();\r
+            numBarcodes = oligosObject->getBarcodes().size();\r
+        }\r
+        \r
+        numLinkers = oligosObject->getLinkers().size();\r
+        numSpacers = oligosObject->getSpacers().size();\r
+        numRPrimers = oligosObject->getReversePrimers().size();\r
+        \r
+        vector<string> groupNames = oligosObject->getGroupNames();\r
+        if (groupNames.size() == 0) { allBlank = true;  }\r
+        \r
+        filehandles.resize(oligosObject->getBarcodeNames().size());\r
                for(int i=0;i<filehandles.size();i++){\r
-                       filehandles[i].assign(primerNameVector.size(), "");\r
+            for(int j=0;j<oligosObject->getPrimerNames().size();j++){  filehandles[i].push_back(""); }\r
                }\r
         \r
-               if(split > 1){\r
-                       set<string> uniqueNames; //used to cleanup outputFileNames\r
-                       for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){\r
-                               for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){\r
-                                       \r
-                                       string primerName = primerNameVector[itPrimer->second];\r
-                                       string barcodeName = barcodeNameVector[itBar->second];\r
-                                       \r
+        if(split > 1){\r
+            set<string> uniqueNames; //used to cleanup outputFileNames\r
+            map<string, int> barcodes = oligosObject->getBarcodes() ;\r
+            map<string, int> primers = oligosObject->getPrimers();\r
+            for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){\r
+                for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){\r
+                    \r
+                    string primerName = oligosObject->getPrimerName(itPrimer->second);\r
+                    string barcodeName = oligosObject->getBarcodeName(itBar->second);\r
+                    \r
                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing\r
+                    else if ((primerName == "") && (barcodeName == "")) { } //do nothing\r
                     else {\r
                         string comboGroupName = "";\r
                         string fastaFileName = "";\r
                         string qualFileName = "";\r
                         string nameFileName = "";\r
+                        string countFileName = "";\r
                         \r
                         if(primerName == ""){\r
-                            comboGroupName = barcodeNameVector[itBar->second];\r
-                        }\r
-                        else{\r
+                            comboGroupName = barcodeName;\r
+                        }else{\r
                             if(barcodeName == ""){\r
-                                comboGroupName = primerNameVector[itPrimer->second];\r
+                                comboGroupName = primerName;\r
                             }\r
                             else{\r
-                                comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];\r
+                                comboGroupName = barcodeName + "." + primerName;\r
                             }\r
                         }\r
                         \r
@@ -1965,33 +1937,16 @@ bool SffInfoCommand::readOligos(string oligoFile){
                         filehandles[itBar->second][itPrimer->second] = thisFilename;\r
                         temp.open(thisFilename.c_str(), ios::binary);          temp.close();\r
                     }\r
-                               }\r
-                       }\r
-               }\r
-               numFPrimers = primers.size();\r
-        numLinkers = linker.size();\r
-        numSpacers = spacer.size();\r
+                }\r
+            }\r
+        }\r
+        \r
         map<string, string> variables;\r
         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(currentFileName));\r
         variables["[group]"] = "scrap";\r
                noMatchFile = getOutputFileName("sff",variables);\r
         m->mothurRemove(noMatchFile);\r
         numNoMatch = 0;\r
-        \r
-        \r
-               bool allBlank = true;\r
-               for (int i = 0; i < barcodeNameVector.size(); i++) {\r
-                       if (barcodeNameVector[i] != "") {\r
-                               allBlank = false;\r
-                               break;\r
-                       }\r
-               }\r
-               for (int i = 0; i < primerNameVector.size(); i++) {\r
-                       if (primerNameVector[i] != "") {\r
-                               allBlank = false;\r
-                               break;\r
-                       }\r
-               }\r
                \r
         filehandlesHeaders.resize(filehandles.size());\r
         numSplitReads.resize(filehandles.size());\r
@@ -2023,7 +1978,6 @@ bool SffInfoCommand::readGroup(string oligoFile){
         filehandles.clear();\r
         numSplitReads.clear();\r
         filehandlesHeaders.clear();\r
-        barcodes.clear();\r
         \r
         groupMap = new GroupMap();\r
         groupMap->readMap(oligoFile);\r
@@ -2045,7 +1999,6 @@ bool SffInfoCommand::readGroup(string oligoFile){
                 ofstream temp;\r
                 m->openOutputFileBinary(thisFilename, temp); temp.close();\r
                 filehandles[i].push_back(thisFilename);\r
-                barcodes[groups[i]] = i;\r
             }\r
         }\r
         \r
index 12f0180db565edf8818455648eadcc19107ec5ab..b304e485c1270aba1b4b761dc9e7b66af54dcfcf 100644 (file)
@@ -12,6 +12,8 @@
 
 #include "command.hpp"
 #include "groupmap.h"
+#include "oligos.h"
+#include "trimoligos.h"
 
 /**********************************************************/
 
@@ -37,22 +39,20 @@ public:
 private:
        string sffFilename, sfftxtFilename, outputDir, accnosName, currentFileName, oligosfile, noMatchFile, groupfile;
        vector<string> filenames, outputNames, accnosFileNames, oligosFileNames, groupFileNames;
-       bool abort, fasta, qual, trim, flow, sfftxt, hasAccnos, hasOligos, hasGroup;
-       int mycount, split, numFPrimers, numLinkers, numSpacers, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, numNoMatch;
+       bool abort, fasta, qual, trim, flow, sfftxt, hasAccnos, hasOligos, hasGroup, reorient, pairedOligos;
+       int mycount, split, numBarcodes, numFPrimers, numLinkers, numSpacers, numRPrimers, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, numNoMatch;
        set<string> seqNames;
-    map<string, int> barcodes;
-    map<string, int> primers;
     GroupMap* groupMap;
-    vector<string> linker, spacer, primerNameVector, barcodeNameVector, revPrimer;
     vector<vector<int> > numSplitReads;
     vector<vector<string> > filehandles;
     vector<vector<string> > filehandlesHeaders;
+    Oligos* oligosObject;
     
        //extract sff file functions
        int extractSffInfo(string, string, string);
        int readCommonHeader(ifstream&, CommonHeader&);
        int readHeader(ifstream&, Header&);
-       bool readSeqData(ifstream&, seqRead&, int, Header&);
+       bool readSeqData(ifstream&, seqRead&, int, Header&, TrimOligos*&, TrimOligos*&);
        int decodeName(string&, string&, string&, string);
     bool readOligos(string oligosFile);
     bool readGroup(string oligosFile);
@@ -67,7 +67,7 @@ private:
        int parseSffTxt();
        bool sanityCheck(Header&, seqRead&);
     int adjustCommonHeader(CommonHeader);
-    int findGroup(Header header, seqRead read, int& barcode, int& primer);
+    int findGroup(Header header, seqRead read, int& barcode, int& primer, TrimOligos*&, TrimOligos*&);
     int findGroup(Header header, seqRead read, int& barcode, int& primer, string);
     string reverseOligo(string oligo);
     
index cde795585c49a71e758c7fe331f02f9aac3e3964..d840dc8cfc59a62317600635fca3b601a7de2d5f 100644 (file)
@@ -18,6 +18,7 @@ vector<string> SRACommand::setParameters(){
         CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile-oligos", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfile);
                CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfastq);
         CommandParameter pcontact("project", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pcontact);
+        CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
         CommandParameter pmimark("mimark", "InputTypes", "", "", "none", "none", "none","xml",false,true,true); parameters.push_back(pmimark);
         //choose only one multiple options
         CommandParameter pplatform("platform", "Multiple", "_LS454-ILLUMINA-ION_TORRENT-PACBIO_SMRT", "_LS454", "", "", "","",false,false); parameters.push_back(pplatform);
@@ -51,7 +52,7 @@ string SRACommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The sra command creates the necessary files for a NCBI submission. The xml file and individual sff or fastq files parsed from the original sff or fastq file.\n";
-               helpString += "The sra command parameters are: sff, fastq, file, oligos, project, mimarksfile, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, platform, orientation, libstrategy, datatype, libsource, libselection and instrument.\n";
+               helpString += "The sra command parameters are: sff, fastq, file, oligos, project, mimarksfile, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, checkorient, platform, orientation, libstrategy, datatype, libsource, libselection and instrument.\n";
         helpString += "The sff parameter is used to provide the original sff file.\n";
                helpString += "The fastq parameter is used to provide the original fastq file.\n";
         helpString += "The project parameter is used to provide your project file.\n";
@@ -63,6 +64,7 @@ string SRACommand::getHelpString(){
                helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
         helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
                helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
+        helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. The default is false.\n";
         helpString += "The platform parameter is used to specify platform you are using choices are: _LS454,ILLUMINA,ION_TORRENT,PACBIO_SMRT. Default=_LS454. This is a controlled vocabulary section in the XML file that will be generated.\n";
         helpString += "The orientation parameter is used to specify sequence orientation. Choices are: forward and reverse. Default=forward. This is a controlled vocabulary section in the XML file that will be generated.\n";
         helpString += "The instrument parameter is used to specify instrument. Choices are 454_GS-454_GS_20-454_GS_FLX-454_GS_FLX_Titanium-454_GS_Junior-Illumina_Genome_Analyzer-Illumina_Genome_Analyzer_II-Illumina_Genome_Analyzer_IIx-Illumina_HiSeq_2000-Illumina_HiSeq_1000-Illumina_MiSeq-PacBio_RS-Ion_Torrent_PGM-unspecified. Default=454_GS. This is a controlled vocabulary section in the XML file that will be generated. \n";
@@ -134,7 +136,7 @@ SRACommand::SRACommand(string option)  {
             outputTypes["xml"] = tempOutNames;
                        
                        //if the user changes the input directory command factory will send this info to us in the output parameter
-                       string inputDir = validParameter.validFile(parameters, "inputdir", false);
+                       inputDir = validParameter.validFile(parameters, "inputdir", false);
                        if (inputDir == "not found"){   inputDir = "";          }
                        else {
             
@@ -284,6 +286,8 @@ SRACommand::SRACommand(string option)  {
                        m->mothurConvert(temp, tdiffs);
                        
                        if(tdiffs == 0){        tdiffs = bdiffs + pdiffs + ldiffs + sdiffs;     }
+            
+            checkorient = validParameter.validFile(parameters, "checkorient", false);          if (temp == "not found") { temp = "F"; }
                                
                }
                
@@ -816,12 +820,21 @@ int SRACommand::readFile(map<string, vector<string> >& files){
             
             if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ", thisFileName1 = " + thisFileName1 + ", thisFileName2 = " + thisFileName2  + ".\n"); }
             
+            if (inputDir != "") {
+                string path = m->hasPath(thisFileName1);
+                if (path == "") {  thisFileName1 = inputDir + thisFileName1;  }
+                
+                path = m->hasPath(thisFileName2);
+                if (path == "") {  thisFileName2 = inputDir + thisFileName2;  }
+            }
+            
             //check to make sure both are able to be opened
             ifstream in2;
             int openForward = m->openInputFile(thisFileName1, in2, "noerror");
             
             //if you can't open it, try default location
             if (openForward == 1) {
+                
                 if (m->getDefaultPath() != "") { //default path is set
                     string tryPath = m->getDefaultPath() + m->getSimpleName(thisFileName1);
                     m->mothurOut("Unable to open " + thisFileName1 + ". Trying default " + tryPath); m->mothurOutEndLine();
@@ -941,6 +954,7 @@ int SRACommand::parseSffFile(map<string, vector<string> >& files){
         if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
         if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
         if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
+        if (m->isTrue(checkorient)) { commandString += ", checkorient=" + checkorient; }
         
         m->mothurOutEndLine();
         m->mothurOut("/******************************************/"); m->mothurOutEndLine();
@@ -986,6 +1000,7 @@ int SRACommand::parseFastqFile(map<string, vector<string> >& files){
         if (ldiffs != 0) { commandString += ", ldiffs=" + toString(ldiffs); }
         if (sdiffs != 0) { commandString += ", sdiffs=" + toString(sdiffs); }
         if (tdiffs != 0) { commandString += ", tdiffs=" + toString(tdiffs); }
+        if (m->isTrue(checkorient)) { commandString += ", checkorient=" + checkorient; }
        
         m->mothurOutEndLine();
         m->mothurOut("/******************************************/"); m->mothurOutEndLine();
@@ -1072,201 +1087,40 @@ int SRACommand::checkGroups(map<string, vector<string> >& files){
 //***************************************************************************************************************
 int SRACommand::readOligos(){
        try {
-               ifstream inOligos;
-               m->openInputFile(oligosfile, inOligos);
-               
-               string type, oligo, roligo, group;
-        bool hasPrimer = false; bool hasPairedBarcodes = false; pairedOligos = false;
-        map<int, oligosPair> pairedBarcodes;
-        map<int, oligosPair> pairedPrimers;
-        map<string, int> barcodes;
-        map<string, int> primers;
-        vector<string>  linker;
-        vector<string>  spacer, revPrimer;
-               int indexPrimer = 0;
-               int indexBarcode = 0;
-        int indexPairedPrimer = 0;
-               int indexPairedBarcode = 0;
-        set<string> uniquePrimers;
-        set<string> uniqueBarcodes;
-               
-               while(!inOligos.eof()){
-            
-                       inOligos >> type;
-            
-                       if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
-            
-                       if(type[0] == '#'){
-                               while (!inOligos.eof()) {       char c = inOligos.get();  if (c == 10 || c == 13){      break;  }       } // get rest of line if there's any crap there
-                               m->gobble(inOligos);
-                       }
-                       else{
-                               m->gobble(inOligos);
-                               //make type case insensitive
-                               for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }
-                               
-                               inOligos >> oligo;
-                
-                if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
-                               
-                               for(int i=0;i<oligo.length();i++){
-                                       oligo[i] = toupper(oligo[i]);
-                                       if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
-                               }
-                               
-                               if(type == "FORWARD"){
-                                       group = "";
-                                       
-                                       // get rest of line in case there is a primer name
-                                       while (!inOligos.eof()) {
-                                               char c = inOligos.get();
-                                               if (c == 10 || c == 13 || c == -1){     break;  }
-                                               else if (c == 32 || c == 9){;} //space or tab
-                                               else {  group += c;  }
-                                       }
-                                       
-                                       //check for repeat barcodes
-                                       map<string, int>::iterator itPrime = primers.find(oligo);
-                                       if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
-                                       
-                    if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); }  }
-                    
-                                       primers[oligo] = indexPrimer; indexPrimer++;
-                                       primerNameVector.push_back(group);
-                               }
-                else if (type == "PRIMER"){
-                    m->gobble(inOligos);
-                                       
-                    inOligos >> roligo;
-                    
-                    for(int i=0;i<roligo.length();i++){
-                        roligo[i] = toupper(roligo[i]);
-                        if(roligo[i] == 'U')   {       roligo[i] = 'T';        }
-                    }
-                    roligo = reverseOligo(roligo);
-                    
-                    group = "";
-                    
-                                       // get rest of line in case there is a primer name
-                                       while (!inOligos.eof()) {
-                                               char c = inOligos.get();
-                                               if (c == 10 || c == 13 || c == -1){     break;  }
-                                               else if (c == 32 || c == 9){;} //space or tab
-                                               else {  group += c;  }
-                                       }
-                    
-                    oligosPair newPrimer(oligo, roligo);
-                    
-                    if (m->debug) { m->mothurOut("[DEBUG]: primer pair " + newPrimer.forward + " " + newPrimer.reverse + ", and group = " + group + ".\n"); }
-                                       
-                                       //check for repeat barcodes
-                    string tempPair = oligo+roligo;
-                    if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
-                    else { uniquePrimers.insert(tempPair); }
-                                       
-                    if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
-                    
-                                       pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
-                                       primerNameVector.push_back(group);
-                    hasPrimer = true;
-                }
-                               else if(type == "REVERSE"){
-                                       //Sequence oligoRC("reverse", oligo);
-                                       //oligoRC.reverseComplement();
-                    string oligoRC = reverseOligo(oligo);
-                                       revPrimer.push_back(oligoRC);
-                               }
-                               else if(type == "BARCODE"){
-                                       inOligos >> group;
-                    
-                    //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
-                    //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
-                    
-                    string temp = "";
-                    while (!inOligos.eof())    {
-                                               char c = inOligos.get();
-                                               if (c == 10 || c == 13 || c == -1){     break;  }
-                                               else if (c == 32 || c == 9){;} //space or tab
-                                               else {  temp += c;  }
-                                       }
-                                       
-                    //then this is illumina data with 4 columns
-                    if (temp != "") {
-                        hasPairedBarcodes = true;
-                        string reverseBarcode = group; //reverseOligo(group); //reverse barcode
-                        group = temp;
-                        
-                        for(int i=0;i<reverseBarcode.length();i++){
-                            reverseBarcode[i] = toupper(reverseBarcode[i]);
-                            if(reverseBarcode[i] == 'U')       {       reverseBarcode[i] = 'T';        }
-                        }
-                        
-                        reverseBarcode = reverseOligo(reverseBarcode);
-                        oligosPair newPair(oligo, reverseBarcode);
-                        
-                        if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
-                        //check for repeat barcodes
-                        string tempPair = oligo+reverseBarcode;
-                        if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
-                        else { uniqueBarcodes.insert(tempPair); }
-                        
-                        pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
-                        barcodeNameVector.push_back(group);
-                    }else {
-                        //check for repeat barcodes
-                        map<string, int>::iterator itBar = barcodes.find(oligo);
-                        if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
-                        
-                        barcodes[oligo]=indexBarcode; indexBarcode++;
-                        barcodeNameVector.push_back(group);
-                    }
-                               }else if(type == "LINKER"){
-                                       linker.push_back(oligo);
-                               }else if(type == "SPACER"){
-                                       spacer.push_back(oligo);
-                               }
-                               else{   m->mothurOut("[WARNING]: " + type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); }
-                       }
-                       m->gobble(inOligos);
-               }
-               inOligos.close();
-               
-        if (hasPairedBarcodes || hasPrimer) {
-            pairedOligos = true;
-            if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true;  m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine();  return 0; }
-        }
-               
+               Oligos oligos(oligosfile);
         
-               //add in potential combos
-               if(barcodeNameVector.size() == 0){
-                       barcodeNameVector.push_back("");
-               }
-               
-               if(primerNameVector.size() == 0){
-                       primerNameVector.push_back("");
-               }
+        if (m->control_pressed) { return false; } //error in reading oligos
+        
+        if (oligos.hasPairedBarcodes())     {   pairedOligos = true;    }
+        else                                {  pairedOligos = false;    }
         
+        set<string> uniqueNames; //used to cleanup outputFileNames
         if (pairedOligos) {
-            for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
-                for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
+            map<int, oligosPair> barcodes = oligos.getPairedBarcodes();
+            map<int, oligosPair> primers = oligos.getPairedPrimers();
+            for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+                for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
                     
-                    string primerName = primerNameVector[itPrimer->first];
-                    string barcodeName = barcodeNameVector[itBar->first];
+                    string primerName = oligos.getPrimerName(itPrimer->first);
+                    string barcodeName = oligos.getBarcodeName(itBar->first);
                     
                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+                    else if ((primerName == "") && (barcodeName == "")) { } //do nothing
                     else {
                         string comboGroupName = "";
-                        string fastqFileName = "";
+                        string fastaFileName = "";
+                        string qualFileName = "";
+                        string nameFileName = "";
+                        string countFileName = "";
                         
                         if(primerName == ""){
-                            comboGroupName = barcodeNameVector[itBar->first];
-                        }
-                        else{
+                            comboGroupName = barcodeName;
+                        }else{
                             if(barcodeName == ""){
-                                comboGroupName = primerNameVector[itPrimer->first];
+                                comboGroupName = primerName;
                             }
                             else{
-                                comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
+                                comboGroupName = barcodeName + "." + primerName;
                             }
                         }
                         uniqueNames.insert(comboGroupName);
@@ -1290,26 +1144,31 @@ int SRACommand::readOligos(){
                 }
             }
         }else {
+            map<string, int> barcodes = oligos.getBarcodes() ;
+            map<string, int> primers = oligos.getPrimers();
             for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
                 for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
                     
-                    string primerName = primerNameVector[itPrimer->second];
-                    string barcodeName = barcodeNameVector[itBar->second];
+                    string primerName = oligos.getPrimerName(itPrimer->second);
+                    string barcodeName = oligos.getBarcodeName(itBar->second);
                     
                     if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+                    else if ((primerName == "") && (barcodeName == "")) { } //do nothing
                     else {
                         string comboGroupName = "";
-                        string fastqFileName = "";
+                        string fastaFileName = "";
+                        string qualFileName = "";
+                        string nameFileName = "";
+                        string countFileName = "";
                         
                         if(primerName == ""){
-                            comboGroupName = barcodeNameVector[itBar->second];
-                        }
-                        else{
+                            comboGroupName = barcodeName;
+                        }else{
                             if(barcodeName == ""){
-                                comboGroupName = primerNameVector[itPrimer->second];
+                                comboGroupName = primerName;
                             }
                             else{
-                                comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
+                                comboGroupName = barcodeName + "." + primerName;
                             }
                         }
                         uniqueNames.insert(comboGroupName);
@@ -1333,10 +1192,8 @@ int SRACommand::readOligos(){
                 }
             }
         }
-
-               
-        if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
         
+        if (m->debug) { int count = 0; for (set<string>::iterator it = uniqueNames.begin(); it != uniqueNames.end(); it++) { m->mothurOut("[DEBUG]: " + toString(count) + " groupName = " + *it + "\n"); count++; } }
         
                return true;
                
index 6c30bd9a4623dcc90dc693fef8e68a7c150ea3e5..dde656132ad57144f694dba1b043e66aed68ea2f 100644 (file)
@@ -11,6 +11,7 @@
 
 #include "command.hpp"
 #include "trimoligos.h"
+#include "oligos.h"
 
 /**************************************************************************************************/
 
@@ -37,12 +38,10 @@ private:
     bool abort, isSFF, pairedOligos;
     int tdiffs, bdiffs, pdiffs, sdiffs, ldiffs;
     string sfffile, fastqfile, outputDir, file, oligosfile, contactfile, inputfile, mimarksfile;
-    string libStrategy, libSource, libSelection, libLayout, platform, instrumentModel, fileType, dataType;
+    string libStrategy, libSource, libSelection, libLayout, platform, instrumentModel, fileType, dataType, checkorient;
     string submissionName, lastName, firstName, email, centerName, centerType, description, website, orientation, packageType;
-    string projectName, grantId, grantTitle, grantAgency, projectTitle;
+    string projectName, grantId, grantTitle, grantAgency, projectTitle, inputDir;
     vector<string> outputNames, Groups;
-    vector<string> primerNameVector;
-    vector<string> barcodeNameVector;
     map<string, vector<string> > Group2Barcode;
     map<string, vector<string> > Group2Primer;
     map<string, string> Group2Organism;
index 5800cde2146f108ae6efc3d1bce50b9bb9913750..ec724de4d85c3232ec574a1ead04714331b0220a 100644 (file)
@@ -16,6 +16,7 @@ vector<string> TrimFlowsCommand::setParameters(){
        try {
                CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","flow-file",false,true,true); parameters.push_back(pflow);
                CommandParameter poligos("oligos", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(poligos);
+        CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
                CommandParameter pmaxhomop("maxhomop", "Number", "", "9", "", "", "","",false,false); parameters.push_back(pmaxhomop);
                CommandParameter pmaxflows("maxflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pmaxflows);
                CommandParameter pminflows("minflows", "Number", "", "450", "", "", "","",false,false); parameters.push_back(pminflows);
@@ -47,6 +48,14 @@ string TrimFlowsCommand::getHelpString(){
        try {
                string helpString = "";
                helpString += "The trim.flows command reads a flowgram file and creates .....\n";
+        helpString += "The oligos parameter allows you to provide an oligos file.\n";
+        helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
+        helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n";
+        helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. The default is false.\n";
+               helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n";
+               helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n";
+        helpString += "The ldiffs parameter is used to specify the number of differences allowed in the linker. The default is 0.\n";
+               helpString += "The sdiffs parameter is used to specify the number of differences allowed in the spacer. The default is 0.\n";
         helpString += "The order parameter options are A, B or I.  Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n";
                helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
                helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.flows.\n";
@@ -231,6 +240,9 @@ TrimFlowsCommand::TrimFlowsCommand(string option)  {
             
                        if(oligoFileName == "") {       allFiles = 0;           }
                        else                                    {       allFiles = 1;           }
+            
+            temp = validParameter.validFile(parameters, "checkorient", false);         if (temp == "not found") { temp = "F"; }
+                       reorient = m->isTrue(temp);
 
                        numFPrimers = 0;
                        numRPrimers = 0;
@@ -416,7 +428,15 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                int count = 0;
                bool moreSeqs = 1;
                
-               TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
+               TrimOligos* trimOligos = NULL;
+        if (pairedOligos)   {   trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes()); }
+        else                {   trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers());  }
+        
+        TrimOligos* rtrimOligos = NULL;
+        if (reorient) {
+            rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size();
+        }
+
                
                while(moreSeqs) {
                                
@@ -430,7 +450,9 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                        flowData.capFlows(maxFlows);    
                        
                        Sequence currSeq = flowData.getSequence();
-            //cout << currSeq.getName() << '\t' << currSeq.getUnaligned() << endl;
+            //for reorient
+            Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
+            
                        if(!flowData.hasMinFlows(minFlows)){    //screen to see if sequence is of a minimum number of flows
                                success = 0;
                                trashCode += 'l';
@@ -444,7 +466,7 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                        int barcodeIndex = 0;
                        
             if(numLinkers != 0){
-                success = trimOligos.stripLinker(currSeq);
+                success = trimOligos->stripLinker(currSeq);
                 if(success > ldiffs)           {       trashCode += 'k';       }
                 else{ currentSeqDiffs += success;  }
                 
@@ -452,21 +474,21 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
             
             if (m->debug) { m->mothurOut("[DEBUG]: " + currSeq.getName() + " " + currSeq.getUnaligned() + "\n"); }
             
-                       if(barcodes.size() != 0){
-                               success = trimOligos.stripBarcode(currSeq, barcodeIndex);
+                       if(numBarcodes != 0){
+                               success = trimOligos->stripBarcode(currSeq, barcodeIndex);
                                if(success > bdiffs)            {       trashCode += 'b';       }
                                else{ currentSeqDiffs += success;  }
                        }
                        
             if(numSpacers != 0){
-                success = trimOligos.stripSpacer(currSeq);
+                success = trimOligos->stripSpacer(currSeq);
                 if(success > sdiffs)           {       trashCode += 's';       }
                 else{ currentSeqDiffs += success;  }
                 
             }
             
                        if(numFPrimers != 0){
-                               success = trimOligos.stripForward(currSeq, primerIndex);
+                               success = trimOligos->stripForward(currSeq, primerIndex);
                                if(success > pdiffs)            {       trashCode += 'f';       }
                                else{ currentSeqDiffs += success;  }
                        }
@@ -474,24 +496,45 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                        if (currentSeqDiffs > tdiffs)   {       trashCode += 't';   }
                        
                        if(numRPrimers != 0){
-                               success = trimOligos.stripReverse(currSeq);
+                               success = trimOligos->stripReverse(currSeq);
                                if(!success)                            {       trashCode += 'r';       }
                        }
-                       
-                       if(trashCode.length() == 0){
-                string thisGroup = "";
-                if(barcodes.size() != 0){
-                    thisGroup = barcodeNameVector[barcodeIndex];
-                    if (primers.size() != 0) { 
-                        if (primerNameVector[primerIndex] != "") { 
-                            if(thisGroup != "") {
-                                thisGroup += "." + primerNameVector[primerIndex]; 
-                            }else {
-                                thisGroup = primerNameVector[primerIndex]; 
-                            }
-                        } 
-                    }
+            
+                       if (reorient && (trashCode != "")) { //if you failed and want to check the reverse
+                int thisSuccess = 0;
+                string thisTrashCode = "";
+                int thisCurrentSeqsDiffs = 0;
+                
+                int thisBarcodeIndex = 0;
+                int thisPrimerIndex = 0;
+                //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
+                if(numBarcodes != 0){
+                    thisSuccess = rtrimOligos->stripBarcode(savedSeq, thisBarcodeIndex);
+                    if(thisSuccess > bdiffs)           { thisTrashCode += "b"; }
+                    else{ thisCurrentSeqsDiffs += thisSuccess;  }
                 }
+                //cout << currSeq.getName() << '\t' << savedSeq.getUnaligned() << endl;
+                if(numFPrimers != 0){
+                    thisSuccess = rtrimOligos->stripForward(savedSeq, thisPrimerIndex);
+                    if(thisSuccess > pdiffs)           { thisTrashCode += "f"; }
+                    else{ thisCurrentSeqsDiffs += thisSuccess;  }
+                }
+                
+                if (thisCurrentSeqsDiffs > tdiffs)     {       thisTrashCode += 't';   }
+                
+                if (thisTrashCode == "") {
+                    trashCode = thisTrashCode;
+                    success = thisSuccess;
+                    currentSeqDiffs = thisCurrentSeqsDiffs;
+                    barcodeIndex = thisBarcodeIndex;
+                    primerIndex = thisPrimerIndex;
+                    savedSeq.reverseComplement();
+                    currSeq.setAligned(savedSeq.getAligned());
+                }else { trashCode += "(" + thisTrashCode + ")";  }
+            }
+
+                       if(trashCode.length() == 0){
+                string thisGroup = oligos.getGroupName(barcodeIndex, primerIndex);
                 
                 int pos = thisGroup.find("ignore");
                 if (pos == string::npos) {             
@@ -534,6 +577,8 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
                scrapFlowFile.close();
                flowFile.close();
                if(fasta){      fastaFile.close();      }
+        delete trimOligos;
+        if (reorient) { delete rtrimOligos; }
                
                return count;
        }
@@ -545,199 +590,131 @@ int TrimFlowsCommand::driverCreateTrim(string flowFileName, string trimFlowFileN
 
 //***************************************************************************************************************
 
-void TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
+int TrimFlowsCommand::getOligos(vector<vector<string> >& outFlowFileNames){
        try {
-               ifstream oligosFile;
-               m->openInputFile(oligoFileName, oligosFile);
-               
-               string type, oligo, group;
-
-               int indexPrimer = 0;
-               int indexBarcode = 0;
-               
-               while(!oligosFile.eof()){
-               
-                       oligosFile >> type;     //get the first column value of the row - is it a comment or a feature we are interested in?
-            
-            if (m->debug) { m->mothurOut("[DEBUG]: type = " + type + ".\n"); }
-            
-                       if(type[0] == '#'){     //igore the line because there's a comment
-                               while (!oligosFile.eof())       {       char c = oligosFile.get(); if (c == 10 || c == 13){     break;  }       }
-                m->gobble(oligosFile);// get rest of line if there's any crap there
-                       }
-                       else{                           //there's a feature we're interested in
-                m->gobble(oligosFile);
-                               for(int i=0;i<type.length();i++){       type[i] = toupper(type[i]);  }                                  //make type case insensitive
-
-                               oligosFile >> oligo;    //get the DNA sequence for the feature
-
-                               for(int i=0;i<oligo.length();i++){      //make type case insensitive and change any U's to T's
-                                       oligo[i] = toupper(oligo[i]);
-                                       if(oligo[i] == 'U')     {       oligo[i] = 'T'; }
-                               }
-                
-                if (m->debug) { m->mothurOut("[DEBUG]: oligos = " + oligo + ".\n"); }
-                
-                               if(type == "FORWARD"){  //if the feature is a forward primer...
-                                       group = "";
-
-                                       while (!oligosFile.eof())       {       // get rest of line in case there is a primer name = will have the name of the primer
-                                               char c = oligosFile.get(); 
-                                               if (c == 10 || c == 13 || c == -1){     break;  }
-                                               else if (c == 32 || c == 9){;} //space or tab
-                                               else {  group += c;  }
-                                       } 
-
-                                       //have we seen this primer already?
-                                       map<string, int>::iterator itPrimer = primers.find(oligo);
-                                       if (itPrimer != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
-
-                                       primers[oligo]=indexPrimer; indexPrimer++;
-                                       primerNameVector.push_back(group);
-
-                               }
-                               else if(type == "REVERSE"){
-                                       string oligoRC = reverseOligo(oligo);
-                                       revPrimer.push_back(oligoRC);
-                    if (m->debug) { m->mothurOut("[DEBUG]: reverse oligos = " + oligoRC + ".\n"); }
-                               }
-                               else if(type == "BARCODE"){
-                                       oligosFile >> group;
-
-                                       //check for repeat barcodes
-                                       map<string, int>::iterator itBar = barcodes.find(oligo);
-                                       if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
-                    
-                    if (m->debug) { m->mothurOut("[DEBUG]: group = " + group + ".\n"); }
-                    
-                                       barcodes[oligo]=indexBarcode; indexBarcode++;
-                                       barcodeNameVector.push_back(group);
-                               }else if(type == "LINKER"){
-                                       linker.push_back(oligo);
-                               }else if(type == "SPACER"){
-                                       spacer.push_back(oligo);
-                               }
-                               else{
-                                       m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine();  
-                               }
-                       }
-
-                       m->gobble(oligosFile);
-               }
-               oligosFile.close();
-               
-               if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
-               
-               //add in potential combos
-               if(barcodeNameVector.size() == 0){
-                       barcodes[""] = 0;
-                       barcodeNameVector.push_back("");                        
-               }
-               
-               if(primerNameVector.size() == 0){
-                       primers[""] = 0;
-                       primerNameVector.push_back("");                 
-               }
-
-               outFlowFileNames.resize(barcodeNameVector.size());
+        bool allBlank = false;
+        oligos.read(oligoFileName);
+        
+        if (m->control_pressed) { return 0; } //error in reading oligos
+        
+        if (oligos.hasPairedBarcodes()) {
+            pairedOligos = true;
+            numFPrimers = oligos.getPairedPrimers().size();
+            numBarcodes = oligos.getPairedBarcodes().size();
+        }else {
+            pairedOligos = false;
+            numFPrimers = oligos.getPrimers().size();
+            numBarcodes = oligos.getBarcodes().size();
+        }
+        
+        numLinkers = oligos.getLinkers().size();
+        numSpacers = oligos.getSpacers().size();
+        numRPrimers = oligos.getReversePrimers().size();
+        
+        vector<string> groupNames = oligos.getGroupNames();
+        if (groupNames.size() == 0) { allFiles = 0; allBlank = true;  }
+        
+        
+        outFlowFileNames.resize(oligos.getBarcodeNames().size());
                for(int i=0;i<outFlowFileNames.size();i++){
-                       outFlowFileNames[i].assign(primerNameVector.size(), "");
+            for(int j=0;j<oligos.getPrimerNames().size();j++){  outFlowFileNames[i].push_back(""); }
                }
-               
-               if(allFiles){
-
-                       for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
-                               for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
 
-                                       string primerName = primerNameVector[itPrimer->second];
-                                       string barcodeName = barcodeNameVector[itBar->second];
-                    
-                                       if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing 
-                                       else {                                  
-                        string comboGroupName = "";
-                        string fileName = "";
+        if (allFiles) {
+            set<string> uniqueNames; //used to cleanup outputFileNames
+            if (pairedOligos) {
+                map<int, oligosPair> barcodes = oligos.getPairedBarcodes();
+                map<int, oligosPair> primers = oligos.getPairedPrimers();
+                for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+                    for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
                         
-                        map<string, string> variables; 
-                        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName));
+                        string primerName = oligos.getPrimerName(itPrimer->first);
+                        string barcodeName = oligos.getBarcodeName(itBar->first);
                         
-                        if(primerName == ""){
-                            comboGroupName = barcodeNameVector[itBar->second];
-                            variables["[tag]"] = comboGroupName;
-                            fileName = getOutputFileName("flow", variables);
-                        }
-                        else{
-                            if(barcodeName == ""){
-                                comboGroupName = primerNameVector[itPrimer->second];
-                            }
-                            else{
-                                comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
+                        if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+                        else if ((primerName == "") && (barcodeName == "")) { } //do nothing
+                        else {
+                            string comboGroupName = "";
+                            
+                            if(primerName == ""){
+                                comboGroupName = barcodeName;
+                            }else{
+                                if(barcodeName == ""){
+                                    comboGroupName = primerName;
+                                }
+                                else{
+                                    comboGroupName = barcodeName + "." + primerName;
+                                }
                             }
+                            
+                            
+                            ofstream temp;
+                            map<string, string> variables;
+                            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName));
                             variables["[tag]"] = comboGroupName;
-                            fileName = getOutputFileName("flow", variables);
+                            string fileName = getOutputFileName("flow", variables);
+                            if (uniqueNames.count(fileName) == 0) {
+                                outputNames.push_back(fileName);
+                                outputTypes["flow"].push_back(fileName);
+                                uniqueNames.insert(fileName);
+                            }
+                            
+                            outFlowFileNames[itBar->first][itPrimer->first] = fileName;
+                            m->openOutputFile(fileName, temp);         temp.close();
                         }
+                    }
+                }
+            }else {
+                map<string, int> barcodes = oligos.getBarcodes() ;
+                map<string, int> primers = oligos.getPrimers();
+                for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+                    for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
                         
-                        outFlowFileNames[itBar->second][itPrimer->second] = fileName;
+                        string primerName = oligos.getPrimerName(itPrimer->second);
+                        string barcodeName = oligos.getBarcodeName(itBar->second);
                         
-                        ofstream temp;
-                        m->openOutputFile(fileName, temp);
-                        temp.close();
+                        if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+                        else if ((primerName == "") && (barcodeName == "")) { } //do nothing
+                        else {
+                            string comboGroupName = "";
+                            
+                            if(primerName == ""){
+                                comboGroupName = barcodeName;
+                            }else{
+                                if(barcodeName == ""){
+                                    comboGroupName = primerName;
+                                }
+                                else{
+                                    comboGroupName = barcodeName + "." + primerName;
+                                }
+                            }
+                            
+                            ofstream temp;
+                            map<string, string> variables;
+                            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName));
+                            variables["[tag]"] = comboGroupName;
+                            string fileName = getOutputFileName("flow", variables);
+                            if (uniqueNames.count(fileName) == 0) {
+                                outputNames.push_back(fileName);
+                                outputTypes["flow"].push_back(fileName);
+                                uniqueNames.insert(fileName);
+                            }
+                            
+                            outFlowFileNames[itBar->second][itPrimer->second] = fileName;
+                            m->openOutputFile(fileName, temp);         temp.close();
+                        }
                     }
-                               }
-                       }
-               }
-               
-               numFPrimers = primers.size();
-               numRPrimers = revPrimer.size();
-        numLinkers = linker.size();
-        numSpacers = spacer.size();
-               
-       }
-       catch(exception& e) {
-               m->errorOut(e, "TrimSeqsCommand", "getOligos");
-               exit(1);
-       }
-}
-//********************************************************************/
-string TrimFlowsCommand::reverseOligo(string oligo){
-       try {
-        string reverse = "";
-        
-        for(int i=oligo.length()-1;i>=0;i--){
-            
-            if(oligo[i] == 'A')                {       reverse += 'T'; }
-            else if(oligo[i] == 'T'){  reverse += 'A'; }
-            else if(oligo[i] == 'U'){  reverse += 'A'; }
-            
-            else if(oligo[i] == 'G'){  reverse += 'C'; }
-            else if(oligo[i] == 'C'){  reverse += 'G'; }
-            
-            else if(oligo[i] == 'R'){  reverse += 'Y'; }
-            else if(oligo[i] == 'Y'){  reverse += 'R'; }
-            
-            else if(oligo[i] == 'M'){  reverse += 'K'; }
-            else if(oligo[i] == 'K'){  reverse += 'M'; }
-            
-            else if(oligo[i] == 'W'){  reverse += 'W'; }
-            else if(oligo[i] == 'S'){  reverse += 'S'; }
-            
-            else if(oligo[i] == 'B'){  reverse += 'V'; }
-            else if(oligo[i] == 'V'){  reverse += 'B'; }
-            
-            else if(oligo[i] == 'D'){  reverse += 'H'; }
-            else if(oligo[i] == 'H'){  reverse += 'D'; }
+                }
+            }
             
-            else                                               {       reverse += 'N'; }
         }
-        
-        
-        return reverse;
-    }
+               return 0;
+       }
        catch(exception& e) {
-               m->errorOut(e, "TrimFlowsCommand", "reverseOligo");
+               m->errorOut(e, "TrimFlowsCommand", "getOligos");
                exit(1);
        }
 }
-
 /**************************************************************************************************/
 vector<unsigned long long> TrimFlowsCommand::getFlowFileBreaks() {
 
@@ -884,7 +861,7 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim
                //Windows version shared memory, so be careful when passing variables through the trimFlowData struct. 
                //Above fork() will clone, so memory is separate, but that's not the case with windows, 
                //////////////////////////////////////////////////////////////////////////////////////////////////////
-               
+               /*
                vector<trimFlowData*> pDataArray; 
                DWORD   dwThreadIdArray[processors-1];
                HANDLE  hThreadArray[processors-1]; 
@@ -958,7 +935,7 @@ int TrimFlowsCommand::createProcessesCreateTrim(string flowFileName, string trim
                        CloseHandle(hThreadArray[i]);
                        delete pDataArray[i];
                }
-               
+               */
                
 #endif 
                //append files
index f5493eb7517db72c20048da57080a587ae675f7e..58d3ed096eac4e9b3b733adaa31d10b37a068d87 100644 (file)
@@ -16,6 +16,7 @@
 #include "flowdata.h"
 #include "groupmap.h"
 #include "trimoligos.h"
+#include "oligos.h"
 
 class TrimFlowsCommand : public Command {
 public:
@@ -47,43 +48,29 @@ private:
        int comboStarts;
        vector<int> processIDS;   //processid
        vector<linePair*> lines;
-
-       vector<unsigned long long> getFlowFileBreaks();
-       int createProcessesCreateTrim(string, string, string, string, vector<vector<string> >); 
-       int driverCreateTrim(string, string, string, string, vector<vector<string> >, linePair*);
-    string reverseOligo(string);
-    
-       vector<string> outputNames;
+    vector<string> outputNames;
        set<string> filesToRemove;
-       
-       void getOligos(vector<vector<string> >&);               //a rewrite of what is in trimseqscommand.h
-       
-       bool allFiles;
+    bool allFiles;
        int processors;
-       int numFPrimers, numRPrimers;
+       int numFPrimers, numRPrimers, numBarcodes;
        int maxFlows, minFlows, minLength, maxLength, maxHomoP, tdiffs, bdiffs, pdiffs, sdiffs, ldiffs, numLinkers, numSpacers;
        int numFlows;
        float signal, noise;
-       bool fasta;
-       string flowOrder;       
-       
-       string flowFileName, oligoFileName, outputDir;
+       bool fasta, pairedOligos, reorient;
+       string flowOrder, flowFileName, oligoFileName, outputDir;
+    Oligos oligos;
 
-       map<string, int> barcodes;
-       map<string, int> primers;
-       vector<string> revPrimer;
-    vector<string> linker;
-    vector<string> spacer;
-
-       vector<string> primerNameVector;        //needed here?
-       vector<string> barcodeNameVector;       //needed here?
 
-       map<string, int> combos;                        //needed here?
-       map<string, int> groupToIndex;          //needed here?
+       vector<unsigned long long> getFlowFileBreaks();
+       int createProcessesCreateTrim(string, string, string, string, vector<vector<string> >); 
+       int driverCreateTrim(string, string, string, string, vector<vector<string> >, linePair*);
+       int getOligos(vector<vector<string> >&);                //a rewrite of what is in trimseqscommand.h
+       
+       
        
 };
 
-/**************************************************************************************************/
+/**************************************************************************************************
 //custom data structure for threads to use.
 // This is passed by void pointer so it can be any data type
 // that can be passed using a single void pointer (LPVOID).
@@ -133,7 +120,7 @@ struct trimFlowData {
        }
 };
 
-/**************************************************************************************************/
+/**************************************************************************************************
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
 #else
 static DWORD WINAPI MyTrimFlowThreadFunction(LPVOID lpParam){ 
@@ -260,6 +247,6 @@ static DWORD WINAPI MyTrimFlowThreadFunction(LPVOID lpParam){
        }
 } 
 #endif
-
+*/
 
 #endif
index 0f2a9cbb0e584ddfa393a53015b02b37ccfa3903..c7016d8b08a5804d1636a9ab8fde27b891f2a0a6 100644 (file)
@@ -800,6 +800,14 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                 
                 if(minDiff > bdiffs)   {       success = minDiff;      }       //no good matches
                 else {
+                    if (m->debug) {
+                        string fMatches = "";
+                        for (int i = 0; i < minFGroup.size(); i++) { for (int j = 0; j < minFGroup[i].size(); j++) { fMatches += toString(minFGroup[i][j]) + " "; } }
+                        m->mothurOut("[DEBUG]: forward matches =  " + fMatches + "\n");
+                        string rMatches = "";
+                        for (int i = 0; i < minRGroup.size(); i++) { for (int j = 0; j < minRGroup[i].size(); j++) { rMatches += toString(minRGroup[i][j]) + " "; } }
+                        m->mothurOut("[DEBUG]: reverse matches =  " + rMatches + "\n");
+                    }
                     bool foundMatch = false;
                     vector<int> matches;
                     for (int i = 0; i < minFGroup.size(); i++) {
@@ -817,6 +825,9 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                         group = matches[0];
                         fStart = minFPos[0];
                         rStart = minRPos[0];
+                        if (m->debug) { m->mothurOut("[DEBUG]: found match = " + toString(matches[0]) + ".\n");  }
+                    }else {
+                        if (m->debug) { m->mothurOut("[DEBUG]: failed too many matches.\n");  }
                     }
                     
                     //we have an acceptable match for the forward and reverse, but do they match?
@@ -1065,6 +1076,222 @@ int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& grou
         exit(1);
     }
     
+}
+//*******************************************************************/
+int TrimOligos::stripPairedBarcode(Sequence& seq, int& group){
+    try {
+        //look for forward barcode
+        string rawSeq = seq.getUnaligned();
+        int success = bdiffs + 1;      //guilty until proven innocent
+        //cout << seq.getName() << endl;
+        //can you find the forward barcode
+        for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
+            string foligo = it->second.forward;
+            string roligo = it->second.reverse;
+            //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
+            //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
+            if(rawSeq.length() < foligo.length()){     //let's just assume that the barcodes are the same length
+                success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
+                break;
+            }
+            if(rawSeq.length() < roligo.length()){     //let's just assume that the barcodes are the same length
+                success = bdiffs + 10; //if the sequence is shorter than the barcode then bail out
+                break;
+            }
+            
+            if (rawSeq.length() < (foligo.length() + roligo.length())) {  success = pdiffs + 10; break; }
+            
+            if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
+                group = it->first;
+                string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
+                seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
+                success = 0;
+                break;
+            }
+        }
+        //cout << "success=" << success << endl;
+        //if you found the barcode or if you don't want to allow for diffs
+        if ((bdiffs == 0) || (success == 0)) { return success; }
+        else { //try aligning and see if you can find it
+            Alignment* alignment;
+            if (ifbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1)); }
+            else{ alignment = NULL; }
+            
+            //can you find the barcode
+            int minDiff = 1e6;
+            int minCount = 1;
+            vector< vector<int> > minFGroup;
+            vector<int> minFPos;
+            
+            //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
+            /*
+             1 Sarah Westcott
+             2 John Westcott
+             3 Anna Westcott
+             4 Sarah Schloss
+             5 Pat Schloss
+             6 Gail Brown
+             7 Pat Moore
+             only want to look for forward = Sarah, John, Anna, Pat, Gail
+             reverse = Westcott, Schloss, Brown, Moore
+             but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
+             */
+            //cout << endl << forwardSeq.getName() << endl;
+            for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
+                string oligo = it->first;
+                
+                if(rawSeq.length() < maxFBarcodeLength){       //let's just assume that the barcodes are the same length
+                    success = bdiffs + 10;
+                    break;
+                }
+                //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
+                //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+                alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
+                oligo = alignment->getSeqAAln();
+                string temp = alignment->getSeqBAln();
+                
+                int alnLength = oligo.length();
+                
+                for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){     alnLength = i+1;        break;  } }
+                oligo = oligo.substr(0,alnLength);
+                temp = temp.substr(0,alnLength);
+                int numDiff = countDiffs(oligo, temp);
+                
+                if (alnLength == 0) { numDiff = bdiffs + 100; }
+                //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+                
+                if(numDiff < minDiff){
+                    minDiff = numDiff;
+                    minCount = 1;
+                    minFGroup.clear();
+                    minFGroup.push_back(it->second);
+                    int tempminFPos = 0;
+                    minFPos.clear();
+                    for(int i=0;i<alnLength;i++){
+                        if(temp[i] != '-'){
+                            tempminFPos++;
+                        }
+                    }
+                    minFPos.push_back(tempminFPos);
+                }else if(numDiff == minDiff){
+                    minFGroup.push_back(it->second);
+                    int tempminFPos = 0;
+                    for(int i=0;i<alnLength;i++){
+                        if(temp[i] != '-'){
+                            tempminFPos++;
+                        }
+                    }
+                    minFPos.push_back(tempminFPos);
+                }
+            }
+            
+            //cout << minDiff << '\t' << minCount << '\t' << endl;
+            if(minDiff > bdiffs)       {       success = minDiff;      }       //no good matches
+            else{
+                //check for reverse match
+                if (alignment != NULL) { delete alignment; }
+                
+                if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1)); }
+                else{ alignment = NULL; }
+                
+                //can you find the barcode
+                minDiff = 1e6;
+                minCount = 1;
+                vector< vector<int> > minRGroup;
+                vector<int> minRPos;
+                
+                string rawRSequence = reverseOligo(seq.getUnaligned());
+                //cout << irbarcodes.size() << '\t' << maxRBarcodeLength << endl;
+                for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
+                    string oligo = reverseOligo(it->first);
+                    //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl;
+                    if(rawRSequence.length() < maxRBarcodeLength){     //let's just assume that the barcodes are the same length
+                        success = bdiffs + 10;
+                        break;
+                    }
+                    
+                    //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+                    alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
+                    oligo = alignment->getSeqAAln();
+                    string temp = alignment->getSeqBAln();
+                    
+                    int alnLength = oligo.length();
+                    for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1;        break;  } }
+                    oligo = oligo.substr(0,alnLength);
+                    temp = temp.substr(0,alnLength);
+                    int numDiff = countDiffs(oligo, temp);
+                    if (alnLength == 0) { numDiff = bdiffs + 100; }
+                    
+                    //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
+                    if(numDiff < minDiff){
+                        minDiff = numDiff;
+                        minCount = 1;
+                        minRGroup.clear();
+                        minRGroup.push_back(it->second);
+                        int tempminRPos = 0;
+                        minRPos.clear();
+                        for(int i=0;i<alnLength;i++){
+                            if(temp[i] != '-'){
+                                tempminRPos++;
+                            }
+                        }
+                        minRPos.push_back(tempminRPos);
+                    }else if(numDiff == minDiff){
+                        int tempminRPos = 0;
+                        for(int i=0;i<alnLength;i++){
+                            if(temp[i] != '-'){
+                                tempminRPos++;
+                            }
+                        }
+                        minRPos.push_back(tempminRPos);
+                        minRGroup.push_back(it->second);
+                    }
+                    
+                }
+                
+                if(minDiff > bdiffs)   {       success = minDiff;      }       //no good matches
+                else {
+                    bool foundMatch = false;
+                    vector<int> matches;
+                    for (int i = 0; i < minFGroup.size(); i++) {
+                        for (int j = 0; j < minFGroup[i].size(); j++) {
+                            for (int k = 0; k < minRGroup.size(); k++) {
+                                if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
+                            }
+                        }
+                    }
+                    
+                    int fStart = 0;
+                    int rStart = 0;
+                    if (matches.size() == 1) {
+                        foundMatch = true;
+                        group = matches[0];
+                        fStart = minFPos[0];
+                        rStart = rawSeq.length() - minRPos[0];
+                        if (fStart > rStart) { foundMatch = false; } //only barcodes not a good sequence
+                    }
+                    
+                    //we have an acceptable match for the forward and reverse, but do they match?
+                    if (foundMatch) {
+                        string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
+                        seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
+                        success = minDiff;
+                        //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
+                    }else { success = bdiffs + 100;    }
+                }
+            }
+            
+            if (alignment != NULL) { delete alignment; }
+        }
+        
+        return success;
+        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "TrimOligos", "stripPairedBarcode");
+        exit(1);
+    }
+    
 }
 //*******************************************************************/
 int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
@@ -1295,7 +1522,223 @@ int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& grou
     }
     
 }
-
+//*******************************************************************/
+int TrimOligos::stripPairedPrimers(Sequence& seq, int& group){
+    try {
+        //look for forward
+        string rawSeq = seq.getUnaligned();
+        int success = pdiffs + 1;      //guilty until proven innocent
+        //cout << seq.getName() << endl;
+        //can you find the forward
+        for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
+            string foligo = it->second.forward;
+            string roligo = it->second.reverse;
+            
+            //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
+            //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
+            if(rawSeq.length() < foligo.length()){     //let's just assume that the barcodes are the same length
+                success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
+                break;
+            }
+            if(rawSeq.length() < roligo.length()){     //let's just assume that the barcodes are the same length
+                success = pdiffs + 10; //if the sequence is shorter than the barcode then bail out
+                break;
+            }
+            
+            if (rawSeq.length() < (foligo.length() + roligo.length())) {  success = pdiffs + 10; break; }
+            
+            if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
+                group = it->first;
+                string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
+                seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
+                success = 0;
+                break;
+            }
+        }
+        //cout << "success=" << success << endl;
+        //if you found the barcode or if you don't want to allow for diffs
+        if ((pdiffs == 0) || (success == 0)) { return success; }
+        else { //try aligning and see if you can find it
+            Alignment* alignment;
+            if (ifprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1)); }
+            else{ alignment = NULL; }
+            
+            //can you find the barcode
+            int minDiff = 1e6;
+            int minCount = 1;
+            vector< vector<int> > minFGroup;
+            vector<int> minFPos;
+            
+            //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
+            /*
+             1 Sarah Westcott
+             2 John Westcott
+             3 Anna Westcott
+             4 Sarah Schloss
+             5 Pat Schloss
+             6 Gail Brown
+             7 Pat Moore
+             only want to look for forward = Sarah, John, Anna, Pat, Gail
+             reverse = Westcott, Schloss, Brown, Moore
+             but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same. so both barcodes map to same group.
+             */
+            //cout << endl << forwardSeq.getName() << endl;
+            for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
+                string oligo = it->first;
+                
+                if(rawSeq.length() < maxFPrimerLength){        //let's just assume that the barcodes are the same length
+                    success = pdiffs + 10;
+                    break;
+                }
+                //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+pdiffs) << endl;
+                //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+                alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
+                oligo = alignment->getSeqAAln();
+                string temp = alignment->getSeqBAln();
+                
+                int alnLength = oligo.length();
+                
+                for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){     alnLength = i+1;        break;  } }
+                oligo = oligo.substr(0,alnLength);
+                temp = temp.substr(0,alnLength);
+                int numDiff = countDiffs(oligo, temp);
+                
+                if (alnLength == 0) { numDiff = pdiffs + 100; }
+                //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+                
+                if(numDiff < minDiff){
+                    minDiff = numDiff;
+                    minCount = 1;
+                    minFGroup.clear();
+                    minFGroup.push_back(it->second);
+                    int tempminFPos = 0;
+                    minFPos.clear();
+                    for(int i=0;i<alnLength;i++){
+                        if(temp[i] != '-'){
+                            tempminFPos++;
+                        }
+                    }
+                    minFPos.push_back(tempminFPos);
+                }else if(numDiff == minDiff){
+                    minFGroup.push_back(it->second);
+                    int tempminFPos = 0;
+                    for(int i=0;i<alnLength;i++){
+                        if(temp[i] != '-'){
+                            tempminFPos++;
+                        }
+                    }
+                    minFPos.push_back(tempminFPos);
+                }
+            }
+            
+            //cout << minDiff << '\t' << minCount << '\t' << endl;
+            if(minDiff > pdiffs)       {       success = minDiff;      }       //no good matches
+            else{
+                //check for reverse match
+                if (alignment != NULL) { delete alignment; }
+                
+                if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1)); }
+                else{ alignment = NULL; }
+                
+                //can you find the barcode
+                minDiff = 1e6;
+                minCount = 1;
+                vector< vector<int> > minRGroup;
+                vector<int> minRPos;
+                
+                string rawRSequence = reverseOligo(seq.getUnaligned());
+                
+                for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
+                    string oligo = reverseOligo(it->first);
+                    //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl;
+                    if(rawRSequence.length() < maxRPrimerLength){      //let's just assume that the barcodes are the same length
+                        success = pdiffs + 10;
+                        break;
+                    }
+                    
+                    //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+                    alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
+                    oligo = alignment->getSeqAAln();
+                    string temp = alignment->getSeqBAln();
+                    
+                    int alnLength = oligo.length();
+                    for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1;        break;  } }
+                    oligo = oligo.substr(0,alnLength);
+                    temp = temp.substr(0,alnLength);
+                    int numDiff = countDiffs(oligo, temp);
+                    if (alnLength == 0) { numDiff = pdiffs + 100; }
+                    
+                    //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
+                    if(numDiff < minDiff){
+                        minDiff = numDiff;
+                        minCount = 1;
+                        minRGroup.clear();
+                        minRGroup.push_back(it->second);
+                        int tempminRPos = 0;
+                        minRPos.clear();
+                        for(int i=0;i<alnLength;i++){
+                            if(temp[i] != '-'){
+                                tempminRPos++;
+                            }
+                        }
+                        minRPos.push_back(tempminRPos);
+                    }else if(numDiff == minDiff){
+                        int tempminRPos = 0;
+                        for(int i=0;i<alnLength;i++){
+                            if(temp[i] != '-'){
+                                tempminRPos++;
+                            }
+                        }
+                        minRPos.push_back(tempminRPos);
+                        minRGroup.push_back(it->second);
+                    }
+                    
+                }
+                
+                if(minDiff > pdiffs)   {       success = minDiff;      }       //no good matches
+                else {
+                    bool foundMatch = false;
+                    vector<int> matches;
+                    for (int i = 0; i < minFGroup.size(); i++) {
+                        for (int j = 0; j < minFGroup[i].size(); j++) {
+                            for (int k = 0; k < minRGroup.size(); k++) {
+                                if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
+                            }
+                        }
+                    }
+                    
+                    int fStart = 0;
+                    int rStart = 0;
+                    if (matches.size() == 1) {
+                        foundMatch = true;
+                        group = matches[0];
+                        fStart = minFPos[0];
+                        rStart = rawSeq.length() - minRPos[0];
+                        if (fStart > rStart) { foundMatch = false; } //only primers not a good sequence
+                    }
+                    
+                    //we have an acceptable match for the forward and reverse, but do they match?
+                    if (foundMatch) {
+                        string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
+                        seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
+                        success = minDiff;
+                        //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
+                    }else { success = pdiffs + 100;    }
+                }
+            }
+            
+            if (alignment != NULL) { delete alignment; }
+        }
+        
+        return success;
+        
+    }
+    catch(exception& e) {
+        m->errorOut(e, "TrimOligos", "stripPairedPrimers");
+        exit(1);
+    }
+    
+}
 //*******************************************************************/
 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
     try {
@@ -1733,6 +2176,8 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
 int TrimOligos::stripBarcode(Sequence& seq, int& group){
     try {
         
+        if (paired) { int success = stripPairedBarcode(seq, group); return success; }
+        
         string rawSequence = seq.getUnaligned();
         int success = bdiffs + 1;      //guilty until proven innocent
         
@@ -1836,6 +2281,8 @@ int TrimOligos::stripBarcode(Sequence& seq, int& group){
 int TrimOligos::stripForward(Sequence& seq, int& group){
     try {
         
+        if (paired) { int success = stripPairedPrimers(seq, group); return success; }
+        
         string rawSequence = seq.getUnaligned();
         int success = pdiffs + 1;      //guilty until proven innocent
         
index a86eb9ae1c815a8c71c4331934f88bf6b83f773f..5f4f9f06ba48032777e332c3267ae048937d9b48 100644 (file)
 #include "sequence.hpp"
 #include "qualityscores.h"
 
-struct oligosPair {
-       string forward;
-       string reverse;
-       
-       oligosPair() { forward = ""; reverse = "";  }
-       oligosPair(string f, string r) : forward(f), reverse(r) {}
-       ~oligosPair() {}
-};
 
 class TrimOligos {
        
@@ -82,6 +74,9 @@ class TrimOligos {
         
         int stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group);
         int stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool);
+        int stripPairedBarcode(Sequence& seq,  int& group);
+        int stripPairedPrimers(Sequence& seq,  int& group);
+
 };
 
 #endif
index de607c62f40b7e95083e3058aae53636b260503a..060733036297fb2bd023a2783774218e2f513e80 100644 (file)
@@ -12,6 +12,7 @@
 #include "trimoligos.h"
 
 
+
 //**********************************************************************************************************************
 vector<string> TrimSeqsCommand::setParameters(){       
        try {
@@ -65,7 +66,7 @@ string TrimSeqsCommand::getHelpString(){
                helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast, logtransform and allfiles.\n";
                helpString += "The fasta parameter is required.\n";
                helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
-        helpString += "The checkorient parameter will check the reverse compliment of the sequence if the barcodes and primers cannot be found in the forward. The default is false.\n";
+       helpString += "The checkorient parameter will check look for the reverse compliment of the barcode or primer in the sequence. If found the sequence is flipped. The default is false.\n";
                helpString += "The oligos parameter allows you to provide an oligos file.\n";
                helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
         helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
@@ -385,6 +386,7 @@ int TrimSeqsCommand::execute(){
                vector<vector<string> > fastaFileNames;
                vector<vector<string> > qualFileNames;
                vector<vector<string> > nameFileNames;
+        map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
                
         map<string, string> variables; 
                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
@@ -440,7 +442,7 @@ int TrimSeqsCommand::execute(){
                
                string outputGroupFileName;
                if(oligoFile != ""){
-                       createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames);
+                       createGroup = getOligos(fastaFileNames, qualFileNames, nameFileNames, uniqueFastaNames);
                        if ((createGroup) && (countfile == "")){
                 map<string, string> myvariables; 
                 myvariables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
@@ -464,7 +466,6 @@ int TrimSeqsCommand::execute(){
                if (m->control_pressed) {  return 0; }                  
        
                if(allFiles){
-                       map<string, string> uniqueFastaNames;// so we don't add the same groupfile multiple times
                        map<string, string>::iterator it;
                        set<string> namesToRemove;
                        for(int i=0;i<fastaFileNames.size();i++){
@@ -484,11 +485,7 @@ int TrimSeqsCommand::execute(){
                                                                        m->mothurRemove(nameFileNames[i][j]);
                                                                        namesToRemove.insert(nameFileNames[i][j]);
                                                                }
-                                                       }else{  
-                                                               it = uniqueFastaNames.find(fastaFileNames[i][j]);
-                                                               if (it == uniqueFastaNames.end()) {     
-                                                                       uniqueFastaNames[fastaFileNames[i][j]] = barcodeNameVector[i];  
-                                                               }       
+                                uniqueFastaNames.erase(fastaFileNames[i][j]); //remove from list for group file print
                                                        }
                                                }
                                        }
@@ -679,40 +676,13 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                
                int count = 0;
                bool moreSeqs = 1;
-        int numBarcodes = barcodes.size();
                TrimOligos* trimOligos = NULL;
-        if (pairedOligos)   {   trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); }
-        else                {   trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);  }
+        if (pairedOligos)   {   trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes()); }
+        else                {   trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers());  }
         
         TrimOligos* rtrimOligos = NULL;
         if (reorient) {
-            //create reoriented primer and barcode pairs
-            map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
-            for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
-                  oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
-                rpairedPrimers[it->first] = tempPair;
-                //cout  << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl;
-            }
-            for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
-                 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
-                rpairedBarcodes[it->first] = tempPair;
-                 //cout  << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
-            }
-            int index = rpairedBarcodes.size();
-            for (map<string, int>::iterator it = barcodes.begin(); it != barcodes.end(); it++) {
-                oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
-                rpairedBarcodes[index] = tempPair; index++;
-                //cout  << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << barcodeNameVector[it->first] << endl;
-            }
-            
-            index = rpairedPrimers.size();
-            for (map<string, int>::iterator it = primers.begin(); it != primers.end(); it++) {
-                oligosPair tempPair("", reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
-                rpairedPrimers[index] = tempPair; index++;
-                //cout  << reverseOligo((it->second).reverse) << '\t' << (reverseOligo((it->second).forward)) << '\t' << primerNameVector[it->first] << endl;
-            }
-
-            rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size(); 
+            rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size();
         }
         
                while (moreSeqs) {
@@ -871,20 +841,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                 
                                if(trashCode.length() == 0){
                     string thisGroup = "";
-                    if (createGroup) {
-                                               if(numBarcodes != 0){
-                                                       thisGroup = barcodeNameVector[barcodeIndex];
-                                                       if (numFPrimers != 0) {
-                                                               if (primerNameVector[primerIndex] != "") { 
-                                                                       if(thisGroup != "") {
-                                                                               thisGroup += "." + primerNameVector[primerIndex]; 
-                                                                       }else {
-                                                                               thisGroup = primerNameVector[primerIndex]; 
-                                                                       }
-                                                               } 
-                                                       }
-                        }
-                    }
+                    if (createGroup) {  thisGroup = oligos.getGroupName(barcodeIndex, primerIndex);    }
                     
                     int pos = thisGroup.find("ignore");
                     if (pos == string::npos) {
@@ -1189,8 +1146,8 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                               tempPrimerQualFileNames,
                                               tempNameFileNames,
                                               lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
-                                              pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos,
-                                             primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
+                                              pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, oligoFile,
+                                              createGroup, allFiles, keepforward, keepFirst, removeLast,
                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage, logtransform, 
                                              minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount);
                        pDataArray.push_back(tempTrim);
@@ -1501,9 +1458,201 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) {
 
 //***************************************************************************************************************
 
-bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames){
+bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<vector<string> >& qualFileNames, vector<vector<string> >& nameFileNames, map<string, string>& fastaFile2Group){
        try {
-               ifstream inOligos;
+        
+        bool allBlank = false;
+        oligos.read(oligoFile);
+        
+        if (m->control_pressed) { return false; } //error in reading oligos
+        
+        if (oligos.hasPairedBarcodes()) {
+            pairedOligos = true;
+            numFPrimers = oligos.getPairedPrimers().size();
+            numBarcodes = oligos.getPairedBarcodes().size();
+        }else {
+            pairedOligos = false;
+            numFPrimers = oligos.getPrimers().size();
+            numBarcodes = oligos.getBarcodes().size();
+        }
+        
+        numLinkers = oligos.getLinkers().size();
+        numSpacers = oligos.getSpacers().size();
+        numRPrimers = oligos.getReversePrimers().size();
+        
+        vector<string> groupNames = oligos.getGroupNames();
+        if (groupNames.size() == 0) { allFiles = 0; allBlank = true;  }
+        
+        
+        fastaFileNames.resize(oligos.getBarcodeNames().size());
+               for(int i=0;i<fastaFileNames.size();i++){
+            for(int j=0;j<oligos.getPrimerNames().size();j++){  fastaFileNames[i].push_back(""); }
+               }
+        
+               if(qFileName != "")     {       qualFileNames = fastaFileNames; }
+               if(nameFile != "")      {       nameFileNames = fastaFileNames; }
+        
+        
+        if (allFiles) {
+            set<string> uniqueNames; //used to cleanup outputFileNames
+            if (pairedOligos) {
+                map<int, oligosPair> barcodes = oligos.getPairedBarcodes();
+                map<int, oligosPair> primers = oligos.getPairedPrimers();
+                for(map<int, oligosPair>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+                    for(map<int, oligosPair>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
+                        
+                        string primerName = oligos.getPrimerName(itPrimer->first);
+                        string barcodeName = oligos.getBarcodeName(itBar->first);
+                        
+                        if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+                        else if ((primerName == "") && (barcodeName == "")) { } //do nothing
+                        else {
+                            string comboGroupName = "";
+                            string fastaFileName = "";
+                            string qualFileName = "";
+                            string nameFileName = "";
+                            string countFileName = "";
+                            
+                            if(primerName == ""){
+                                comboGroupName = barcodeName;
+                            }else{
+                                if(barcodeName == ""){
+                                    comboGroupName = primerName;
+                                }
+                                else{
+                                    comboGroupName = barcodeName + "." + primerName;
+                                }
+                            }
+                            
+                            
+                            ofstream temp;
+                            map<string, string> variables;
+                            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
+                            variables["[tag]"] = comboGroupName;
+                            fastaFileName = getOutputFileName("fasta", variables);
+                            if (uniqueNames.count(fastaFileName) == 0) {
+                                outputNames.push_back(fastaFileName);
+                                outputTypes["fasta"].push_back(fastaFileName);
+                                uniqueNames.insert(fastaFileName);
+                                fastaFile2Group[fastaFileName] = comboGroupName;
+                            }
+                            
+                            fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
+                            m->openOutputFile(fastaFileName, temp);            temp.close();
+                            
+                            if(qFileName != ""){
+                                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
+                                qualFileName = getOutputFileName("qfile", variables);
+                                if (uniqueNames.count(qualFileName) == 0) {
+                                    outputNames.push_back(qualFileName);
+                                    outputTypes["qfile"].push_back(qualFileName);
+                                }
+                                
+                                qualFileNames[itBar->first][itPrimer->first] = qualFileName;
+                                m->openOutputFile(qualFileName, temp);         temp.close();
+                            }
+                            
+                            if(nameFile != ""){
+                                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
+                                nameFileName = getOutputFileName("name", variables);
+                                if (uniqueNames.count(nameFileName) == 0) {
+                                    outputNames.push_back(nameFileName);
+                                    outputTypes["name"].push_back(nameFileName);
+                                }
+                                
+                                nameFileNames[itBar->first][itPrimer->first] = nameFileName;
+                                m->openOutputFile(nameFileName, temp);         temp.close();
+                            }
+                        }
+                    }
+                }
+            }else {
+                map<string, int> barcodes = oligos.getBarcodes() ;
+                map<string, int> primers = oligos.getPrimers();
+                for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+                    for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
+                        
+                        string primerName = oligos.getPrimerName(itPrimer->second);
+                        string barcodeName = oligos.getBarcodeName(itBar->second);
+                        
+                        if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+                        else if ((primerName == "") && (barcodeName == "")) { } //do nothing
+                        else {
+                            string comboGroupName = "";
+                            string fastaFileName = "";
+                            string qualFileName = "";
+                            string nameFileName = "";
+                            string countFileName = "";
+                            
+                            if(primerName == ""){
+                                comboGroupName = barcodeName;
+                            }else{
+                                if(barcodeName == ""){
+                                    comboGroupName = primerName;
+                                }
+                                else{
+                                    comboGroupName = barcodeName + "." + primerName;
+                                }
+                            }
+                            
+                            
+                            
+                            ofstream temp;
+                            map<string, string> variables;
+                            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
+                            variables["[tag]"] = comboGroupName;
+                            fastaFileName = getOutputFileName("fasta", variables);
+                            if (uniqueNames.count(fastaFileName) == 0) {
+                                outputNames.push_back(fastaFileName);
+                                outputTypes["fasta"].push_back(fastaFileName);
+                                uniqueNames.insert(fastaFileName);
+                                fastaFile2Group[fastaFileName] = comboGroupName;
+                            }
+                           
+                            fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
+                            m->openOutputFile(fastaFileName, temp);            temp.close();
+                            
+                            if(qFileName != ""){
+                                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
+                                qualFileName = getOutputFileName("qfile", variables);
+                                if (uniqueNames.count(qualFileName) == 0) {
+                                    outputNames.push_back(qualFileName);
+                                    outputTypes["qfile"].push_back(qualFileName);
+                                }
+                                
+                                qualFileNames[itBar->second][itPrimer->second] = qualFileName;
+                                m->openOutputFile(qualFileName, temp);         temp.close();
+                            }
+                           
+                            if(nameFile != ""){
+                                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
+                                nameFileName = getOutputFileName("name", variables);
+                                if (uniqueNames.count(nameFileName) == 0) {
+                                    outputNames.push_back(nameFileName);
+                                    outputTypes["name"].push_back(nameFileName);
+                                }
+                                
+                                nameFileNames[itBar->second][itPrimer->second] = nameFileName;
+                                m->openOutputFile(nameFileName, temp);         temp.close();
+                            }
+                        }
+                    }
+                }
+            }
+
+        }
+    
+    
+    
+    if (allBlank) {
+        m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
+        allFiles = false;
+        return false;
+    }
+    
+
+    
+               /*ifstream inOligos;
                m->openInputFile(oligoFile, inOligos);
                
                ofstream test;
@@ -1520,7 +1669,7 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                
                while(!inOligos.eof()){
 
-                       inOligos >> type; 
+                       inOligos >> type;
             
                        if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }      
             
@@ -1825,33 +1974,8 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                 }
             }
                }
-               numFPrimers = primers.size();
-        if (pairedOligos) { numFPrimers  = pairedPrimers.size(); }
-               numRPrimers = revPrimer.size();
-        numLinkers = linker.size();
-        numSpacers = spacer.size();
-               
-               bool allBlank = true;
-               for (int i = 0; i < barcodeNameVector.size(); i++) {
-                       if (barcodeNameVector[i] != "") {
-                               allBlank = false;
-                               break;
-                       }
-               }
-               for (int i = 0; i < primerNameVector.size(); i++) {
-                       if (primerNameVector[i] != "") {
-                               allBlank = false;
-                               break;
-                       }
-               }
-
-               if (allBlank) {
-                       m->mothurOut("[WARNING]: your oligos file does not contain any group names.  mothur will not create a groupfile."); m->mothurOutEndLine();
-                       allFiles = false;
-                       return false;
-               }
-               
-               return true;
+         */
+                               return true;
                
        }
        catch(exception& e) {
@@ -1951,46 +2075,6 @@ bool TrimSeqsCommand::cullHomoP(Sequence& seq){
        }
        
 }
-//********************************************************************/
-string TrimSeqsCommand::reverseOligo(string oligo){
-       try {
-        string reverse = "";
-        
-        for(int i=oligo.length()-1;i>=0;i--){
-            
-            if(oligo[i] == 'A')                {       reverse += 'T'; }
-            else if(oligo[i] == 'T'){  reverse += 'A'; }
-            else if(oligo[i] == 'U'){  reverse += 'A'; }
-            
-            else if(oligo[i] == 'G'){  reverse += 'C'; }
-            else if(oligo[i] == 'C'){  reverse += 'G'; }
-            
-            else if(oligo[i] == 'R'){  reverse += 'Y'; }
-            else if(oligo[i] == 'Y'){  reverse += 'R'; }
-            
-            else if(oligo[i] == 'M'){  reverse += 'K'; }
-            else if(oligo[i] == 'K'){  reverse += 'M'; }
-            
-            else if(oligo[i] == 'W'){  reverse += 'W'; }
-            else if(oligo[i] == 'S'){  reverse += 'S'; }
-            
-            else if(oligo[i] == 'B'){  reverse += 'V'; }
-            else if(oligo[i] == 'V'){  reverse += 'B'; }
-            
-            else if(oligo[i] == 'D'){  reverse += 'H'; }
-            else if(oligo[i] == 'H'){  reverse += 'D'; }
-            
-            else                                               {       reverse += 'N'; }
-        }
-        
-        
-        return reverse;
-    }
-       catch(exception& e) {
-               m->errorOut(e, "TrimSeqsCommand", "reverseOligo");
-               exit(1);
-       }
-}
 
 //***************************************************************************************************************
 
index 14eed17e236a333583d461d61482a1d541c6a9cf..2448669816c6936f55bff8b3ad256ce15f1de474 100644 (file)
@@ -16,6 +16,7 @@
 #include "qualityscores.h"
 #include "trimoligos.h"
 #include "counttable.h"
+#include "oligos.h"
 
 
 class TrimSeqsCommand : public Command {
@@ -44,34 +45,22 @@ private:
         linePair() {}
     };
     
-       bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
+    Oligos oligos;
+       bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&, map<string, string>&);
        bool keepFirstTrim(Sequence&, QualityScores&);
        bool removeLastTrim(Sequence&, QualityScores&);
        bool cullLength(Sequence&);
        bool cullHomoP(Sequence&);
        bool cullAmbigs(Sequence&);
-    string reverseOligo(string);
-
        bool abort, createGroup;
        string fastaFile, oligoFile, qFileName, groupfile, nameFile, countfile, outputDir;
        
        bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient, logtransform;
-       int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
+       int numBarcodes, numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
        int qWindowSize, qWindowStep, keepFirst, removeLast;
        double qRollAverage, qThreshold, qWindowAverage, qAverage;
-       vector<string> revPrimer, outputNames;
        set<string> filesToRemove;
-    map<int, oligosPair> pairedBarcodes;
-    map<int, oligosPair> pairedPrimers;
-       map<string, int> barcodes;
-       vector<string> groupVector;
-       map<string, int> primers;
-    vector<string>  linker;
-    vector<string>  spacer;
-       map<string, int> combos;
-       map<string, int> groupToIndex;
-       vector<string> primerNameVector;        //needed here?
-       vector<string> barcodeNameVector;       //needed here?
+    vector<string> groupVector,outputNames;
        map<string, int> groupCounts;  
        map<string, string> nameMap;
     map<string, int> nameCount; //for countfile name -> repCount
@@ -93,34 +82,23 @@ private:
 struct trimData {
     unsigned long long start, end;
     MothurOut* m;
-    string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, trimCFileName, scrapCFileName, groupFileName, nameFile, countfile;
+    string filename, qFileName, oligosfile, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, trimCFileName, scrapCFileName, groupFileName, nameFile, countfile;
        vector<vector<string> > fastaFileNames;
     vector<vector<string> > qualFileNames;
     vector<vector<string> > nameFileNames;
     unsigned long long lineStart, lineEnd, qlineStart, qlineEnd;
-    bool flip, allFiles, qtrim, keepforward, createGroup, pairedOligos, reorient, logtransform;
-       int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
+    bool flip, allFiles, qtrim, keepforward, createGroup, reorient, logtransform;
+       int maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
        int qWindowSize, qWindowStep, keepFirst, removeLast, count;
        double qRollAverage, qThreshold, qWindowAverage, qAverage;
-    vector<string> revPrimer;
-       map<string, int> barcodes;
-       map<string, int> primers;
     map<string, int> nameCount;
-    vector<string>  linker;
-    vector<string>  spacer;
-       map<string, int> combos;
-       vector<string> primerNameVector;        
-       vector<string> barcodeNameVector;       
        map<string, int> groupCounts;  
        map<string, string> nameMap;
     map<string, string> groupMap;
-    map<int, oligosPair> pairedBarcodes;
-    map<int, oligosPair> pairedPrimers;
     
        trimData(){}
        trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend,  MothurOut* mout,
-                      int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, vector<string> revP, vector<string> li, vector<string> spa, map<int, oligosPair> pbr, map<int, oligosPair> ppr, bool po,
-                      vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
+                      int pd, int bd, int ld, int sd, int td, string o, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
                       int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage, bool lt,
                       int minL, int maxA, int maxH, int maxL, bool fli, bool reo, map<string, string> nm, map<string, int> ncount) {
         filename = fn;
@@ -145,22 +123,13 @@ struct trimData {
         qlineEnd = qend;
                m = mout;
         nameCount = ncount;
+        oligosfile = o;
         
         pdiffs = pd;
         bdiffs = bd;
         ldiffs = ld;
         sdiffs = sd;
         tdiffs = td;
-        barcodes = bar;
-        pairedPrimers = ppr;
-        pairedBarcodes = pbr;
-        pairedOligos = po;
-        primers = pri;      numFPrimers = primers.size();
-        revPrimer = revP;   numRPrimers = revPrimer.size();
-        linker = li;        numLinkers = linker.size();
-        spacer = spa;       numSpacers = spacer.size();
-        primerNameVector = priNameVector;
-        barcodeNameVector = barNameVector;
         
         createGroup = cGroup;
         allFiles = aFiles;
@@ -260,37 +229,30 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
             } 
                }
                
+        int numBarcodes, numLinkers, numSpacers, numRPrimers, numFPrimers;
+        bool pairedOligos;
+        Oligos oligos(pDataArray->oligosfile);
+        if (oligos.hasPairedBarcodes()) {
+            pairedOligos = true;
+            numFPrimers = oligos.getPairedPrimers().size();
+            numBarcodes = oligos.getPairedBarcodes().size();
+        }else {
+            pairedOligos = false;
+            numFPrimers = oligos.getPrimers().size();
+            numBarcodes = oligos.getBarcodes().size();
+        }
+        
+        numLinkers = oligos.getLinkers().size();
+        numSpacers = oligos.getSpacers().size();
+        numRPrimers = oligos.getReversePrimers().size();
+        
         TrimOligos* trimOligos = NULL;
-        int numBarcodes = pDataArray->barcodes.size();
-        if (pDataArray->pairedOligos)   {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->pairedPrimers, pDataArray->pairedBarcodes);   numBarcodes = pDataArray->pairedBarcodes.size(); pDataArray->numFPrimers = pDataArray->pairedPrimers.size(); }
-        else                {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer);  }
+        if (pairedOligos)   {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes());    }
+        else                {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers());  }
         
         TrimOligos* rtrimOligos = NULL;
         if (pDataArray->reorient) {
-            //create reoriented primer and barcode pairs
-            map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
-            for (map<int, oligosPair>::iterator it = pDataArray->pairedPrimers.begin(); it != pDataArray->pairedPrimers.end(); it++) {
-                oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
-                rpairedPrimers[it->first] = tempPair;
-            }
-            for (map<int, oligosPair>::iterator it = pDataArray->pairedBarcodes.begin(); it != pDataArray->pairedBarcodes.end(); it++) {
-                oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
-                rpairedBarcodes[it->first] = tempPair;
-            }
-            
-            int index = rpairedBarcodes.size();
-            for (map<string, int>::iterator it = pDataArray->barcodes.begin(); it != pDataArray->barcodes.end(); it++) {
-                oligosPair tempPair("", trimOligos->reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
-                rpairedBarcodes[index] = tempPair; index++;
-            }
-            
-            index = rpairedPrimers.size();
-            for (map<string, int>::iterator it = pDataArray->primers.begin(); it != pDataArray->primers.end(); it++) {
-                oligosPair tempPair("", trimOligos->reverseOligo((it->first))); //reverseBarcode, rc ForwardBarcode
-                rpairedPrimers[index] = tempPair; index++;
-            }
-
-            rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
+            rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0,  oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size();
         }
         
                pDataArray->count = 0;
@@ -329,7 +291,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                int barcodeIndex = 0;
                                int primerIndex = 0;
                                
-                if(pDataArray->numLinkers != 0){
+                if(numLinkers != 0){
                                        success = trimOligos->stripLinker(currSeq, currQual);
                                        if(success > pDataArray->ldiffs)                {       trashCode += 'k';       }
                                        else{ currentSeqsDiffs += success;  }
@@ -341,14 +303,14 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                        else{ currentSeqsDiffs += success;  }
                                }
                 
-                if(pDataArray->numSpacers != 0){
+                if(numSpacers != 0){
                                        success = trimOligos->stripSpacer(currSeq, currQual);
                                        if(success > pDataArray->sdiffs)                {       trashCode += 's';       }
                                        else{ currentSeqsDiffs += success;  }
 
                                }
                 
-                               if(pDataArray->numFPrimers != 0){
+                               if(numFPrimers != 0){
                                        success = trimOligos->stripForward(currSeq, currQual, primerIndex, pDataArray->keepforward);
                                        if(success > pDataArray->pdiffs)                {       trashCode += 'f';       }
                                        else{ currentSeqsDiffs += success;  }
@@ -356,7 +318,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                
                                if (currentSeqsDiffs > pDataArray->tdiffs)      {       trashCode += 't';   }
                                
-                               if(pDataArray->numRPrimers != 0){
+                               if(numRPrimers != 0){
                                        success = trimOligos->stripReverse(currSeq, currQual);
                                        if(!success)                            {       trashCode += 'r';       }
                                }
@@ -375,7 +337,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
                     }
                     
-                    if(pDataArray->numFPrimers != 0){
+                    if(numFPrimers != 0){
                         thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, pDataArray->keepforward);
                         if(thisSuccess > pDataArray->pdiffs)           {       thisTrashCode += 'f';   }
                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
@@ -478,20 +440,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                
                                if(trashCode.length() == 0){
                     string thisGroup = "";
-                    if (pDataArray->createGroup) {
-                                               if(numBarcodes != 0){
-                                                       thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
-                                                       if (pDataArray->numFPrimers != 0) {
-                                                               if (pDataArray->primerNameVector[primerIndex] != "") { 
-                                                                       if(thisGroup != "") {
-                                                                               thisGroup += "." + pDataArray->primerNameVector[primerIndex]; 
-                                                                       }else {
-                                                                               thisGroup = pDataArray->primerNameVector[primerIndex]; 
-                                                                       }
-                                                               } 
-                                                       }
-                        }
-                    }
+                    if (pDataArray->createGroup) {    thisGroup = oligos.getGroupName(barcodeIndex, primerIndex);                     }
                     
                     int pos = thisGroup.find("ignore");
                     if (pos == string::npos) {