]> git.donarmstrong.com Git - mothur.git/commitdiff
mods to resolve some misc warnings. added alignPrimer function to needle man to allow...
authorSarahsWork <sarahswork@imac.westcotts.net>
Wed, 27 Feb 2013 20:08:18 +0000 (15:08 -0500)
committerSarahsWork <sarahswork@imac.westcotts.net>
Wed, 27 Feb 2013 20:08:18 +0000 (15:08 -0500)
19 files changed:
abstractrandomforest.cpp
alignment.hpp
alignmentdb.cpp
cluster.hpp
consensusseqscommand.cpp
kmertree.cpp
kruskalwalliscommand.cpp
libshuff.h
mothurout.cpp
needlemanoverlap.cpp
needlemanoverlap.hpp
preclustercommand.h
regularizedrandomforest.cpp
shhhercommand.cpp
trialSwap2.cpp
trimoligos.cpp
trimoligos.h
trimseqscommand.cpp
trimseqscommand.h

index ae60b773742c25e0af85f811167003641e4646a4..fc3b429cdd201cc1fb9a583bc69b2cdc5dc00f90 100644 (file)
@@ -55,4 +55,4 @@ vector<int> AbstractRandomForest::getGlobalDiscardedFeatureIndices() {
        } 
 }
 
-/***********************************************************************/
\ No newline at end of file
+/***********************************************************************/
index 9d2197f5eaa7248b9bb8490c87c804f85931d095..3a2e84de5a8e24cf744ef3544d44df9e3e5dc722 100644 (file)
@@ -25,6 +25,7 @@ public:
        Alignment();
        virtual ~Alignment();
        virtual void align(string, string) = 0;
+    virtual void alignPrimer(string, string) {}
        
        
 //     float getAlignmentScore();
index 4ce76ea487d4c34f9b9a8f5f56658a2352ff7721..9fec737778e01704e98f5c813e3c0554e7ab0981 100644 (file)
@@ -47,7 +47,7 @@ AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gap
                        int start = time(NULL);
                        m->mothurOutEndLine();
                        m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t");   cout.flush();
-                       bool aligned = false;
+                       //bool aligned = false;
             int tempLength = 0;
             
                        #ifdef USE_MPI  
index ff5d41d38c71cfea8dd9d434f7a4e466e0cd160c..dff55e61fcf7b571da5c0b895a5f46910f58f8c6 100644 (file)
@@ -14,6 +14,7 @@ class Cluster {
        
 public:
        Cluster(RAbundVector*, ListVector*, SparseDistanceMatrix*, float, string);
+    virtual ~Cluster() {}
     virtual void update(double&);                              
        virtual string getTag() = 0;
        virtual void setMapWanted(bool m);  
@@ -102,4 +103,4 @@ private:
 
 
 
-#endif
\ No newline at end of file
+#endif
index 9bfdc9caba1e6bef2fe14d6eb469af2218bd29d8..1b1e7d20d37603610945f6dadbf6dd32399ed8d5 100644 (file)
@@ -312,7 +312,7 @@ int ConsensusSeqsCommand::execute(){
                
                }else {
                        
-                                               
+            
                        InputData* input = new InputData(listfile, "list");
                        ListVector* list = input->getListVector();
                        
@@ -435,15 +435,24 @@ int ConsensusSeqsCommand::processList(ListVector*& list){
                
                outSummary << "OTU#\tPositioninAlignment\tA\tT\tG\tC\tGap\tNumberofSeqs\tConsensusBase" << endl;
                
+        string snumBins = toString(list->getNumBins());
                for (int i = 0; i < list->getNumBins(); i++) {
                        
                        if (m->control_pressed) { outSummary.close(); outName.close(); outFasta.close(); return 0; }
                        
                        string bin = list->get(i);
                        string consSeq = getConsSeq(bin, outSummary, i);
+            
+            string seqName = "Otu";
+            string sbinNumber = toString(i+1);
+            if (sbinNumber.length() < snumBins.length()) {
+                int diff = snumBins.length() - sbinNumber.length();
+                for (int h = 0; h < diff; h++) { seqName += "0"; }
+            }
+            seqName += sbinNumber;
                        
-                       outFasta << ">seq" << (i+1) << endl << consSeq << endl;
-                       outName << "seq" << (i+1) << '\t' << "seq" << (i+1) << "," << bin << endl;
+                       outFasta << ">" << seqName << endl << consSeq << endl;
+                       outName << seqName << '\t' << seqName << "," << bin << endl;
                }
                
                outSummary.close(); outName.close(); outFasta.close();
index fbf2bfb9d458791320ca4dccc58f333623fa154e..bc23ff6600fc82fd200b79f1d3d1dd15b453848b 100755 (executable)
@@ -357,7 +357,7 @@ string KmerTree::getTaxonomy(Sequence* thisSeq){
                     //                 levelProbabilityOutput << tree[indices[i][maxIndex[i]]]->getName() << '(' << setprecision(6) << pLevel_X[i] << ");";
                 }
                 else{
-                    taxonProbabilityString += "unclassified" + '(' + toString(confidenceScore) + ");";
+                    taxonProbabilityString += "unclassified(" + toString(confidenceScore) + ");";
                     //                 levelProbabilityOutput << "unclassified" << '(' << setprecision(6) << pLevel_X[i] << ");";
                     simpleTax += "unclassified;";
                 }
index b321f133a96107023598b71d426506b5cf2f933c..bf4fafa950f2f180227f61ab41e1377008927c82 100644 (file)
@@ -259,4 +259,5 @@ void KruskalWallisCommand::assignValue(vector<groupRank> &vec) {
 }
 //**********************************************************************************************************************
 //**********************************************************************************************************************
-//**********************************************************************************************************************
\ No newline at end of file
+//**********************************************************************************************************************
+
index ef0e1b0655cfd568a9a45dad013b96ea979dcc0f..4275ff78e93a4728e85c85d0cd1eca3bfa8e81d0 100644 (file)
@@ -16,6 +16,7 @@ class Libshuff {
        
 public:
        Libshuff(FullMatrix*, int, float, float);
+    virtual ~Libshuff() {}
        virtual vector<vector<double> > evaluateAll() = 0;
        virtual float evaluatePair(int,int) = 0;
        void randomizeGroups(int, int);
index 9f678fef98505cae0de916b1db66914d5f41074f..f7b0d86bf93ff1c3f1ff6ec4ad62d21d66fc382b 100644 (file)
@@ -452,6 +452,8 @@ void MothurOut::errorOut(exception& e, string object, string function) {
     }else { //bad alloc
         if (object == "cluster"){
             mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory.  There are two common causes for this, file size and format.\n\nFile Size:\nThe cluster command loads your distance matrix into RAM, and your distance file is most likely too large to fit in RAM. There are two options to help with this. The first is to use a cutoff. By using a cutoff mothur will only load distances that are below the cutoff. If that is still not enough, there is a command called cluster.split, http://www.mothur.org/wiki/cluster.split which divides the distance matrix, and clusters the smaller pieces separately. You may also be able to reduce the size of the original distance matrix by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. \n\nWrong Format:\nThis error can be caused by trying to read a column formatted distance matrix using the phylip parameter. By default, the dist.seqs command generates a column formatted distance matrix. To make a phylip formatted matrix set the dist.seqs command parameter output to lt.  \n\nIf you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
+        }else if (object == "shhh.flows"){
+                mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory. The shhh.flows command is very memory intensive. This error is most commonly caused by trying to process a dataset too large, using multiple processors, or failing to run trim.flows before shhh.flows. If you are running our 32bit version, your memory usage is limited to 4G.  If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue.  If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Running trim.flows with an oligos file, and then shhh.flows with the file option may also resolve the issue. If for some reason you are unable to run shhh.flows with your data, a good alternative is to use the trim.seqs command using a 50-bp sliding window and to trim the sequence when the average quality score over that window drops below 35. Our results suggest that the sequencing error rates by this method are very good, but not quite as good as by shhh.flows and that the resulting sequences tend to be a bit shorter. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry. ");
         }else {
             mothurOut(" has occurred in the " + object + " class function " + function + ". This error indicates your computer is running out of memory.  This is most commonly caused by trying to process a dataset too large, using multiple processors, or a file format issue. If you are running our 32bit version, your memory usage is limited to 4G.  If you have more than 4G of RAM and are running a 64bit OS, using our 64bit version may resolve your issue.  If you are using multiple processors, try running the command with processors=1, the more processors you use the more memory is required. Also, you may be able to reduce the size of your dataset by using the commands outlined in the Schloss SOP, http://www.mothur.org/wiki/Schloss_SOP. If you are uable to resolve the issue, please contact Pat Schloss at mothur.bugs@gmail.com, and be sure to include the mothur.logFile with your inquiry.");
         }
index 1239beb1a2a9f61d3f8c2cec178161fb36a7b865..9334a7fdab7474db0cfb93a84562896eabd3c0de 100644 (file)
@@ -103,6 +103,98 @@ void NeedlemanOverlap::align(string A, string B){
        }
 
 }
+/**************************************************************************************************/
+
+void NeedlemanOverlap::alignPrimer(string A, string B){
+       try {
+        
+               seqA = ' ' + A; lA = seqA.length();             //      algorithm requires a dummy space at the beginning of each string
+               seqB = ' ' + B; lB = seqB.length();             //      algorithm requires a dummy space at the beginning of each string
+        
+               if (lA > nRows) { m->mothurOut("One of your candidate sequences is longer than you longest template sequence. Your longest template sequence is " + toString(nRows) + ". Your candidate is " + toString(lA) + "."); m->mothurOutEndLine();  }
+               
+               for(int i=1;i<lB;i++){                                  //      This code was largely translated from Perl code provided in Ex 3.1
+            
+                       for(int j=1;j<lA;j++){                          //      of the O'Reilly BLAST book.  I found that the example output had a
+                
+                               //      number of errors
+                               float diagonal;
+                               if(isEquivalent(seqB[i],seqA[j]))       {       diagonal = alignment[i-1][j-1].cValue + match;          }
+                               else                                {   diagonal = alignment[i-1][j-1].cValue + mismatch;       }
+                
+                               float up        = alignment[i-1][j].cValue + gap;
+                               float left      = alignment[i][j-1].cValue + gap;
+                               
+                               if(diagonal >= up){
+                                       if(diagonal >= left){
+                                               alignment[i][j].cValue = diagonal;
+                                               alignment[i][j].prevCell = 'd';
+                                       }
+                                       else{
+                                               alignment[i][j].cValue = left;
+                                               alignment[i][j].prevCell = 'l';
+                                       }
+                               }
+                               else{
+                                       if(up >= left){
+                                               alignment[i][j].cValue = up;
+                                               alignment[i][j].prevCell = 'u';
+                                       }
+                                       else{
+                                               alignment[i][j].cValue = left;
+                                               alignment[i][j].prevCell = 'l';
+                                       }
+                               }
+                       }
+               }
+        
+               Overlap over;
+               over.setOverlap(alignment, lA, lB, 0);          //      Fix gaps at the beginning and end of the sequences
+               traceBack();                                                            //      Traceback the alignment to populate seqAaln and seqBaln
+        
+       }
+       catch(exception& e) {
+               m->errorOut(e, "NeedlemanOverlap", "align");
+               exit(1);
+       }
+    
+}
+//********************************************************************/
+bool NeedlemanOverlap::isEquivalent(char oligo, char seq){
+       try {
+               
+        bool same = true;
+                                       
+        oligo = toupper(oligo);
+        seq = toupper(seq);
+        
+        if(oligo != seq){
+            if(oligo == 'A' && (seq != 'A' && seq != 'M' && seq != 'R' && seq != 'W' && seq != 'D' && seq != 'H' && seq != 'V'))       {       same = false;   }
+            else if(oligo == 'C' && (seq != 'C' && seq != 'Y' && seq != 'M' && seq != 'S' && seq != 'B' && seq != 'H' && seq != 'V'))       {  same = false;   }
+            else if(oligo == 'G' && (seq != 'G' && seq != 'R' && seq != 'K' && seq != 'S' && seq != 'B' && seq != 'D' && seq != 'V'))       {  same = false;   }
+            else if(oligo == 'T' && (seq != 'T' && seq != 'Y' && seq != 'K' && seq != 'W' && seq != 'B' && seq != 'D' && seq != 'H'))       {  same = false;   }
+            else if((oligo == '.' || oligo == '-'))           {        same = false;   }
+            else if((oligo == 'N' || oligo == 'I') && (seq == 'N'))                         {  same = false;   }
+            else if(oligo == 'R' && (seq != 'A' && seq != 'G'))                        {       same = false;   }
+            else if(oligo == 'Y' && (seq != 'C' && seq != 'T'))                        {       same = false;   }
+            else if(oligo == 'M' && (seq != 'C' && seq != 'A'))                        {       same = false;   }
+            else if(oligo == 'K' && (seq != 'T' && seq != 'G'))                        {       same = false;   }
+            else if(oligo == 'W' && (seq != 'T' && seq != 'A'))                        {       same = false;   }
+            else if(oligo == 'S' && (seq != 'C' && seq != 'G'))                        {       same = false;   }
+            else if(oligo == 'B' && (seq != 'C' && seq != 'T' && seq != 'G'))       {  same = false;   }
+            else if(oligo == 'D' && (seq != 'A' && seq != 'T' && seq != 'G'))       {  same = false;   }
+            else if(oligo == 'H' && (seq != 'A' && seq != 'T' && seq != 'C'))       {  same = false;   }
+            else if(oligo == 'V' && (seq != 'A' && seq != 'C' && seq != 'G'))       {  same = false;   }
+        }
 
+               
+               
+               return same;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimOligos", "countDiffs");
+               exit(1);
+       }
+}
 /**************************************************************************************************/
 
index ea7bee515bff32685f03b195506fa4416814dddf..858e70a56c811ceef39e51878b7e68a3e0816840 100644 (file)
@@ -32,12 +32,13 @@ public:
        NeedlemanOverlap(float, float, float, int);
        ~NeedlemanOverlap();
        void align(string, string);
-
+    void alignPrimer(string, string);
        
 private:       
        float gap;
        float match;
        float mismatch;
+    bool isEquivalent(char, char);
 };
 
 /**************************************************************************************************/
index 7be805f0083fca6819703f487d438257674d03df..12dd2d2556fcef245794ff178171920f7f5f0b37 100644 (file)
@@ -329,7 +329,7 @@ static DWORD WINAPI MyPreclusterThreadFunction(LPVOID lpParam){
                                 if (mismatch > pDataArray->diffs) { mismatch = length; break; } //to far to cluster
                             }
                             
-                            if (mismatch <= diffs) {
+                            if (mismatch <= pDataArray->diffs) {
                                 //merge
                                 alignSeqs[j].names += ',' + alignSeqs[i].names;
                                 alignSeqs[j].numIdentical += alignSeqs[i].numIdentical;
index 55a8dc4a4284d07aef4211504e648be50a6cbc78..9fa19aba00ceb68353f32936c21962e14791a173 100644 (file)
@@ -54,4 +54,4 @@ int RegularizedRandomForest::updateGlobalOutOfBagEstimates(DecisionTree *decisio
                m->errorOut(e, "RegularizedRandomForest", "updateGlobalOutOfBagEstimates");
                exit(1);
        }
-}
\ No newline at end of file
+}
index 30815a077c19ff7663d498223c49e73ddc268809..63157820390d18e1ef598a8bf58932964cc6f369 100644 (file)
@@ -2236,6 +2236,8 @@ int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFil
             double begClock = clock();
             unsigned long long begTime;
             
+            string fileNameForOutput = filenames[i];
+            
             for (int g = 0; g < theseFlowFileNames.size(); g++) {
                 
                 string flowFileName = theseFlowFileNames[g];
@@ -2269,7 +2271,7 @@ int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFil
                 begTime = time(NULL);
                
                 
-                flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);        
+                flowDistParentFork(numFlowCells, distFileName, numUniques, mapUniqueToSeq, mapSeqToUnique, lengths, flowDataPrI, flowDataIntI);
                 
                 m->mothurOutEndLine();
                 m->mothurOut("Total time: " + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/CLOCKS_PER_SEC) + '\n');
@@ -2434,7 +2436,7 @@ int ShhherCommand::driver(vector<string> filenames, string thisCompositeFASTAFil
                        }
             
             numCompleted++;
-                       m->mothurOut("Total time to process " + flowFileName + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
+                       m->mothurOut("Total time to process " + fileNameForOutput + ":\t" + toString(time(NULL) - begTime) + '\t' + toString((clock() - begClock)/(double)CLOCKS_PER_SEC) + '\n');
                }
                
         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
index 4cff4260ad5754fb4c0347747d20d0439df3f5d6..9959443c9ffbb0c411dfcd0fc6dd6920fbe6f5c8 100644 (file)
@@ -149,7 +149,7 @@ int TrialSwap2::calc_combo (int nrows, int ncols, vector<vector<int> > &nullmatr
 {
     try {
         //need to transpose so we can compare rows (row-major order)
-        int tmpnrows = nrows;
+        //int tmpnrows = nrows;
         vector<vector<int> > tmpmatrix;
         
         vector<int> tmprow;
index a53f4e85388f0f7bbb6fda40c2074b4f555e29d3..e0f093c011e5c3ce97f62a22421e4145bded0325 100644 (file)
@@ -17,6 +17,7 @@
 TrimOligos::TrimOligos(int p, int b, int l, int s, map<string, int> pr, map<string, int> br, vector<string> r, vector<string> lk, vector<string> sp){
        try {
                m = MothurOut::getInstance();
+        paired = false;
                
                pdiffs = p;
                bdiffs = b;
@@ -71,6 +72,7 @@ TrimOligos::TrimOligos(int p, int b, int l, int s, map<int, oligosPair> pr, map<
                bdiffs = b;
         ldiffs = l;
         sdiffs = s;
+        paired = true;
                
         maxFBarcodeLength = 0;
         for(map<int,oligosPair>::iterator it=br.begin();it!=br.end();it++){
@@ -144,6 +146,7 @@ TrimOligos::TrimOligos(int p, int b, map<string, int> pr, map<string, int> br, v
                barcodes = br;
                primers = pr;
                revPrimer = r;
+        paired = false;
         
         maxFBarcodeLength = 0;
         for(map<string,int>::iterator it=barcodes.begin();it!=barcodes.end();it++){
@@ -224,7 +227,7 @@ bool TrimOligos::findForward(Sequence& seq, int& primerStart, int& primerEnd){
                         string rawChunk = rawSequence.substr(j, olength+pdiffs);
                         
                         //use needleman to align first primer.length()+numdiffs of sequence to each barcode
-                        alignment->align(oligo, rawChunk);
+                        alignment->alignPrimer(oligo, rawChunk);
                         oligo = alignment->getSeqAAln();
                         string temp = alignment->getSeqBAln();
                                
@@ -302,6 +305,8 @@ bool TrimOligos::findReverse(Sequence& seq, int& primerStart, int& primerEnd){
 int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
        try {
                
+        if (paired) {  int success = stripPairedBarcode(seq, qual, group);  return success; }
+        
                string rawSequence = seq.getUnaligned();
                int success = bdiffs + 1;       //guilty until proven innocent
                
@@ -350,7 +355,7 @@ int TrimOligos::stripBarcode(Sequence& seq, QualityScores& qual, int& group){
                                }
                                
                                //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                               alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
+                               alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
                                oligo = alignment->getSeqAAln();
                                string temp = alignment->getSeqBAln();
                                
@@ -473,7 +478,7 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
                                }
                                //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
                                //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                               alignment->align(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
+                               alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
                                oligo = alignment->getSeqAAln();
                                string temp = alignment->getSeqBAln();
                
@@ -536,7 +541,7 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
                     }
                     
                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                    alignment->align(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
+                    alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
                     oligo = alignment->getSeqAAln();
                     string temp = alignment->getSeqBAln();
                     
@@ -700,7 +705,7 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                                }
                                //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+bdiffs) << endl;
                                //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                               alignment->align(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
+                               alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+bdiffs));
                                oligo = alignment->getSeqAAln();
                                string temp = alignment->getSeqBAln();
                 
@@ -763,7 +768,7 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                     }
                     
                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                    alignment->align(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
+                    alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
                     oligo = alignment->getSeqAAln();
                     string temp = alignment->getSeqBAln();
                     
@@ -858,6 +863,484 @@ int TrimOligos::stripBarcode(Sequence& forwardSeq, Sequence& reverseSeq, Quality
        }
        
 }
+//*******************************************************************/
+int TrimOligos::stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group){
+       try {
+               //look for forward barcode
+               string rawSeq = seq.getUnaligned();
+               int success = bdiffs + 1;       //guilty until proven innocent
+               //cout << seq.getName() << endl;
+               //can you find the forward barcode
+               for(map<int,oligosPair>::iterator it=ipbarcodes.begin();it!=ipbarcodes.end();it++){
+                       string foligo = it->second.forward;
+            string roligo = it->second.reverse;
+            //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
+            //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
+                       if(rawSeq.length() < foligo.length()){  //let's just assume that the barcodes are the same length
+                               success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
+                               break;
+                       }
+            if(rawSeq.length() < roligo.length()){     //let's just assume that the barcodes are the same length
+                               success = bdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
+                               break;
+                       }
+                       
+                       if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
+                               group = it->first;
+                string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
+                seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
+                if(qual.getName() != ""){
+                    qual.trimQScores(-1, rawSeq.length()-roligo.length());
+                    qual.trimQScores(foligo.length(), -1);
+                }
+                               success = 0;
+                               break;
+                       }
+               }
+               //cout << "success=" << success << endl;
+               //if you found the barcode or if you don't want to allow for diffs
+               if ((bdiffs == 0) || (success == 0)) { return success;  }
+               else { //try aligning and see if you can find it
+                       Alignment* alignment;
+                       if (ifbarcodes.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFBarcodeLength+bdiffs+1));   }
+                       else{ alignment = NULL; }
+                       
+                       //can you find the barcode
+                       int minDiff = 1e6;
+                       int minCount = 1;
+                       vector< vector<int> > minFGroup;
+                       vector<int> minFPos;
+            
+            //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
+            /*
+             1   Sarah   Westcott
+             2   John    Westcott
+             3   Anna    Westcott
+             4   Sarah   Schloss
+             5   Pat     Schloss
+             6   Gail    Brown
+             7   Pat     Moore
+             
+             only want to look for forward = Sarah, John, Anna, Pat, Gail
+             reverse = Westcott, Schloss, Brown, Moore
+             
+             but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
+             */
+            //cout << endl << forwardSeq.getName() << endl;
+                       for(map<string, vector<int> >::iterator it=ifbarcodes.begin();it!=ifbarcodes.end();it++){
+                               string oligo = it->first;
+                               
+                               if(rawSeq.length() < maxFBarcodeLength){        //let's just assume that the barcodes are the same length
+                                       success = bdiffs + 10;
+                                       break;
+                               }
+                               //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
+                               //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+                               alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+bdiffs));
+                               oligo = alignment->getSeqAAln();
+                               string temp = alignment->getSeqBAln();
+                
+                               int alnLength = oligo.length();
+                               
+                               for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
+                               oligo = oligo.substr(0,alnLength);
+                               temp = temp.substr(0,alnLength);
+                int numDiff = countDiffs(oligo, temp);
+                               
+                if (alnLength == 0) { numDiff = bdiffs + 100; }
+                //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+                
+                               if(numDiff < minDiff){
+                                       minDiff = numDiff;
+                                       minCount = 1;
+                                       minFGroup.clear();
+                    minFGroup.push_back(it->second);
+                                       int tempminFPos = 0;
+                    minFPos.clear();
+                                       for(int i=0;i<alnLength;i++){
+                                               if(temp[i] != '-'){
+                                                       tempminFPos++;
+                                               }
+                                       }
+                    minFPos.push_back(tempminFPos);
+                               }else if(numDiff == minDiff){
+                                       minFGroup.push_back(it->second);
+                    int tempminFPos = 0;
+                                       for(int i=0;i<alnLength;i++){
+                                               if(temp[i] != '-'){
+                                                       tempminFPos++;
+                                               }
+                                       }
+                    minFPos.push_back(tempminFPos);
+                               }
+                       }
+            
+                       //cout << minDiff << '\t' << minCount << '\t' << endl;
+                       if(minDiff > bdiffs)    {       success = minDiff;              }       //no good matches
+                       else{
+                //check for reverse match
+                if (alignment != NULL) {  delete alignment;  }
+                
+                if (irbarcodes.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRBarcodeLength+bdiffs+1));   }
+                else{ alignment = NULL; }
+                
+                //can you find the barcode
+                minDiff = 1e6;
+                minCount = 1;
+                vector< vector<int> > minRGroup;
+                vector<int> minRPos;
+                
+                string rawRSequence = reverseOligo(seq.getUnaligned());
+                //cout << irbarcodes.size() << '\t' << maxRBarcodeLength <<  endl;
+                for(map<string, vector<int> >::iterator it=irbarcodes.begin();it!=irbarcodes.end();it++){
+                    string oligo = reverseOligo(it->first);
+                    //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+bdiffs)) << endl;
+                    if(rawRSequence.length() < maxRBarcodeLength){     //let's just assume that the barcodes are the same length
+                        success = bdiffs + 10;
+                        break;
+                    }
+                    
+                    //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+                    alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+bdiffs));
+                    oligo = alignment->getSeqAAln();
+                    string temp = alignment->getSeqBAln();
+                    
+                    int alnLength = oligo.length();
+                    for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1;        break;  } }
+                    oligo = oligo.substr(0,alnLength);
+                    temp = temp.substr(0,alnLength);
+                    int numDiff = countDiffs(oligo, temp);
+                    if (alnLength == 0) { numDiff = bdiffs + 100; }
+                    
+                    //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
+                    if(numDiff < minDiff){
+                        minDiff = numDiff;
+                        minCount = 1;
+                        minRGroup.clear();
+                        minRGroup.push_back(it->second);
+                        int tempminRPos = 0;
+                        minRPos.clear();
+                        for(int i=0;i<alnLength;i++){
+                            if(temp[i] != '-'){
+                                tempminRPos++;
+                            }
+                        }
+                        minRPos.push_back(tempminRPos);
+                    }else if(numDiff == minDiff){
+                        int tempminRPos = 0;
+                        for(int i=0;i<alnLength;i++){
+                            if(temp[i] != '-'){
+                                tempminRPos++;
+                            }
+                        }
+                        minRPos.push_back(tempminRPos);
+                        minRGroup.push_back(it->second);
+                    }
+                    
+                }
+                
+                /*cout << minDiff << '\t' << minCount << '\t' << endl;
+                 for (int i = 0; i < minFGroup.size(); i++) {
+                 cout << i << '\t';
+                 for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
+                 cout << endl;
+                 }
+                 cout << endl;
+                 for (int i = 0; i < minRGroup.size(); i++) {
+                 cout << i << '\t';
+                 for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
+                 cout << endl;
+                 }
+                 cout << endl;*/
+                if(minDiff > bdiffs)   {       success = minDiff;              }       //no good matches
+                else  {
+                    bool foundMatch = false;
+                    vector<int> matches;
+                    for (int i = 0; i < minFGroup.size(); i++) {
+                        for (int j = 0; j < minFGroup[i].size(); j++) {
+                            for (int k = 0; k < minRGroup.size(); k++) {
+                                if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
+                            }
+                        }
+                    }
+                    
+                    int fStart = 0;
+                    int rStart = 0;
+                    if (matches.size() == 1) {
+                        foundMatch = true;
+                        group = matches[0];
+                        fStart = minFPos[0];
+                        rStart = rawSeq.length() - minRPos[0];
+                    }
+                    
+                    //we have an acceptable match for the forward and reverse, but do they match?
+                    if (foundMatch) {
+                        string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
+                        seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
+                        if(qual.getName() != ""){
+                            qual.trimQScores(-1, rStart);
+                            qual.trimQScores(fStart, -1);
+                        }
+                        success = minDiff;
+                        //cout << "barcode = " << ipbarcodes[group].forward << '\t' << ipbarcodes[group].reverse << endl;
+                    }else { success = bdiffs + 100;    }
+                }
+                       }
+                       
+                       if (alignment != NULL) {  delete alignment;  }
+               }
+               
+               return success;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimOligos", "stripPairedBarcode");
+               exit(1);
+       }
+       
+}
+//*******************************************************************/
+int TrimOligos::stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
+       try {
+               //look for forward 
+               string rawSeq = seq.getUnaligned();
+               int success = pdiffs + 1;       //guilty until proven innocent
+        //cout << seq.getName() << endl;
+               //can you find the forward
+               for(map<int,oligosPair>::iterator it=ipprimers.begin();it!=ipprimers.end();it++){
+                       string foligo = it->second.forward;
+            string roligo = it->second.reverse;
+            
+            //cout << it->first << '\t' << foligo << '\t' << roligo << endl;
+            //cout << it->first << '\t' << rawSeq.substr(0,foligo.length()) << '\t' << rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length()) << endl;
+                       if(rawSeq.length() < foligo.length()){  //let's just assume that the barcodes are the same length
+                               success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
+                               break;
+                       }
+            if(rawSeq.length() < roligo.length()){     //let's just assume that the barcodes are the same length
+                               success = pdiffs + 10;                                  //if the sequence is shorter than the barcode then bail out
+                               break;
+                       }
+                       
+                       if((compareDNASeq(foligo, rawSeq.substr(0,foligo.length()))) && (compareDNASeq(roligo, rawSeq.substr(rawSeq.length()-roligo.length(),roligo.length())))) {
+                               group = it->first;
+                if (!keepForward) { 
+                    string trimmedSeq = rawSeq.substr(foligo.length()); //trim forward barcode
+                    seq.setUnaligned(trimmedSeq.substr(0,(trimmedSeq.length()-roligo.length()))); //trim reverse barcode
+                    if(qual.getName() != ""){
+                        qual.trimQScores(-1, rawSeq.length()-roligo.length());
+                        qual.trimQScores(foligo.length(), -1);
+                    }
+                }
+                               success = 0;
+                               break;
+                       }
+               }
+               //cout << "success=" << success << endl;
+               //if you found the barcode or if you don't want to allow for diffs
+               if ((pdiffs == 0) || (success == 0)) { return success;  }
+               else { //try aligning and see if you can find it
+                       Alignment* alignment;
+                       if (ifprimers.size() > 0) {  alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxFPrimerLength+pdiffs+1));   }
+                       else{ alignment = NULL; }
+                       
+                       //can you find the barcode
+                       int minDiff = 1e6;
+                       int minCount = 1;
+                       vector< vector<int> > minFGroup;
+                       vector<int> minFPos;
+            
+            //the pair can have duplicates, but we only want to search for the unique forward and reverses, example
+            /*
+             1   Sarah   Westcott
+             2   John    Westcott
+             3   Anna    Westcott
+             4   Sarah   Schloss
+             5   Pat     Schloss
+             6   Gail    Brown
+             7   Pat     Moore
+             
+             only want to look for forward = Sarah, John, Anna, Pat, Gail
+             reverse = Westcott, Schloss, Brown, Moore
+             
+             but if best match forward = 4, and reverse = 1, we want to count as a valid match because forward 1 and forward 4 are the same.  so both barcodes map to same group.
+             */
+            //cout << endl << forwardSeq.getName() << endl;
+                       for(map<string, vector<int> >::iterator it=ifprimers.begin();it!=ifprimers.end();it++){
+                               string oligo = it->first;
+                               
+                               if(rawSeq.length() < maxFPrimerLength){ //let's just assume that the barcodes are the same length
+                                       success = pdiffs + 10;
+                                       break;
+                               }
+                               //cout << "before = " << oligo << '\t' << rawSeq.substr(0,oligo.length()+bdiffs) << endl;
+                               //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+                               alignment->alignPrimer(oligo, rawSeq.substr(0,oligo.length()+pdiffs));
+                               oligo = alignment->getSeqAAln();
+                               string temp = alignment->getSeqBAln();
+                
+                               int alnLength = oligo.length();
+                               
+                               for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){      alnLength = i+1;        break;  } }
+                               oligo = oligo.substr(0,alnLength);
+                               temp = temp.substr(0,alnLength);
+                int numDiff = countDiffs(oligo, temp);
+                               
+                if (alnLength == 0) { numDiff = pdiffs + 100; }
+                //cout << "after = " << oligo << '\t' << temp << '\t' << numDiff << endl;
+                
+                               if(numDiff < minDiff){
+                                       minDiff = numDiff;
+                                       minCount = 1;
+                                       minFGroup.clear();
+                    minFGroup.push_back(it->second);
+                                       int tempminFPos = 0;
+                    minFPos.clear();
+                                       for(int i=0;i<alnLength;i++){
+                                               if(temp[i] != '-'){
+                                                       tempminFPos++;
+                                               }
+                                       }
+                    minFPos.push_back(tempminFPos);
+                               }else if(numDiff == minDiff){
+                                       minFGroup.push_back(it->second);
+                    int tempminFPos = 0;
+                                       for(int i=0;i<alnLength;i++){
+                                               if(temp[i] != '-'){
+                                                       tempminFPos++;
+                                               }
+                                       }
+                    minFPos.push_back(tempminFPos);
+                               }
+                       }
+            
+                       //cout << minDiff << '\t' << minCount << '\t' << endl;
+                       if(minDiff > pdiffs)    {       success = minDiff;              }       //no good matches
+                       else{
+                //check for reverse match
+                if (alignment != NULL) {  delete alignment;  }
+                
+                if (irprimers.size() > 0) { alignment = new NeedlemanOverlap(-1.0, 1.0, -1.0, (maxRPrimerLength+pdiffs+1));   }
+                else{ alignment = NULL; }
+                
+                //can you find the barcode
+                minDiff = 1e6;
+                minCount = 1;
+                vector< vector<int> > minRGroup;
+                vector<int> minRPos;
+                
+                string rawRSequence = reverseOligo(seq.getUnaligned());
+                
+                for(map<string, vector<int> >::iterator it=irprimers.begin();it!=irprimers.end();it++){
+                    string oligo = reverseOligo(it->first);
+                    //cout << "r before = " << reverseOligo(oligo) << '\t' << reverseOligo(rawRSequence.substr(0,oligo.length()+pdiffs)) << endl;
+                    if(rawRSequence.length() < maxRPrimerLength){      //let's just assume that the barcodes are the same length
+                        success = pdiffs + 10;
+                        break;
+                    }
+                    
+                    //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
+                    alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
+                    oligo = alignment->getSeqAAln();
+                    string temp = alignment->getSeqBAln();
+                    
+                    int alnLength = oligo.length();
+                    for(int i=oligo.length()-1;i>=0;i--){ if(oligo[i] != '-'){ alnLength = i+1;        break;  } }
+                    oligo = oligo.substr(0,alnLength);
+                    temp = temp.substr(0,alnLength);
+                    int numDiff = countDiffs(oligo, temp);
+                    if (alnLength == 0) { numDiff = pdiffs + 100; }
+                    
+                    //cout << "r after = " << reverseOligo(oligo) << '\t' << reverseOligo(temp) << '\t' << numDiff << endl;
+                    if(numDiff < minDiff){
+                        minDiff = numDiff;
+                        minCount = 1;
+                        minRGroup.clear();
+                        minRGroup.push_back(it->second);
+                        int tempminRPos = 0;
+                        minRPos.clear();
+                        for(int i=0;i<alnLength;i++){
+                            if(temp[i] != '-'){
+                                tempminRPos++;
+                            }
+                        }
+                        minRPos.push_back(tempminRPos);
+                    }else if(numDiff == minDiff){
+                        int tempminRPos = 0;
+                        for(int i=0;i<alnLength;i++){
+                            if(temp[i] != '-'){
+                                tempminRPos++;
+                            }
+                        }
+                        minRPos.push_back(tempminRPos);
+                        minRGroup.push_back(it->second);
+                    }
+                    
+                }
+                
+                /*cout << minDiff << '\t' << minCount << '\t' << endl;
+                 for (int i = 0; i < minFGroup.size(); i++) {
+                 cout << i << '\t';
+                 for (int j = 0; j < minFGroup[i].size(); j++) {  cout << minFGroup[i][j] << " "; }
+                 cout << endl;
+                 }
+                 cout << endl;
+                 for (int i = 0; i < minRGroup.size(); i++) {
+                 cout << i << '\t';
+                 for (int j = 0; j < minRGroup[i].size(); j++) {  cout << minRGroup[i][j] << " "; }
+                 cout << endl;
+                 }
+                 cout << endl;*/
+                if(minDiff > pdiffs)   {       success = minDiff;              }       //no good matches
+                else  {
+                    bool foundMatch = false;
+                    vector<int> matches;
+                    for (int i = 0; i < minFGroup.size(); i++) {
+                        for (int j = 0; j < minFGroup[i].size(); j++) {
+                            for (int k = 0; k < minRGroup.size(); k++) {
+                                if (m->inUsersGroups(minFGroup[i][j], minRGroup[k])) { matches.push_back(minFGroup[i][j]); k+= minRGroup.size(); }
+                            }
+                        }
+                    }
+                    
+                    int fStart = 0;
+                    int rStart = 0;
+                    if (matches.size() == 1) {
+                        foundMatch = true;
+                        group = matches[0];
+                        fStart = minFPos[0];
+                        rStart = rawSeq.length() - minRPos[0];
+                    }
+                    
+                    //we have an acceptable match for the forward and reverse, but do they match?
+                    if (foundMatch) {
+                        if (!keepForward) {
+                            string trimmedSeq = rawSeq.substr(0, rStart); //trim reverse barcode
+                            seq.setUnaligned(trimmedSeq.substr(fStart)); //trim forward barcode
+                            if(qual.getName() != ""){
+                                qual.trimQScores(-1, rStart);
+                                qual.trimQScores(fStart, -1);
+                            }
+                        }
+                        success = minDiff;
+                        //cout << "primer = " << ipprimers[group].forward << '\t' << ipprimers[group].reverse << endl;
+                    }else { success = pdiffs + 100;    }
+                }
+                       }
+                       
+                       if (alignment != NULL) {  delete alignment;  }
+               }
+               
+               return success;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "TrimOligos", "stripPairedPrimers");
+               exit(1);
+       }
+       
+}
+
 //*******************************************************************/
 int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, QualityScores& forwardQual, QualityScores& reverseQual, int& group){
        try {
@@ -929,7 +1412,7 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                                }
                                //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
                                //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                               alignment->align(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
+                               alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
                                oligo = alignment->getSeqAAln();
                                string temp = alignment->getSeqBAln();
                 
@@ -992,7 +1475,7 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, Quality
                     }
                     
                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                    alignment->align(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
+                    alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
                     oligo = alignment->getSeqAAln();
                     string temp = alignment->getSeqBAln();
                     
@@ -1156,7 +1639,7 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
                                }
                                //cout << "before = " << oligo << '\t' << rawFSequence.substr(0,oligo.length()+pdiffs) << endl;
                                //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                               alignment->align(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
+                               alignment->alignPrimer(oligo, rawFSequence.substr(0,oligo.length()+pdiffs));
                                oligo = alignment->getSeqAAln();
                                string temp = alignment->getSeqBAln();
                 
@@ -1219,7 +1702,7 @@ int TrimOligos::stripForward(Sequence& forwardSeq, Sequence& reverseSeq, int& gr
                     }
                     
                     //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                    alignment->align(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
+                    alignment->alignPrimer(oligo, rawRSequence.substr(0,oligo.length()+pdiffs));
                     oligo = alignment->getSeqAAln();
                     string temp = alignment->getSeqBAln();
                     
@@ -1360,7 +1843,7 @@ int TrimOligos::stripBarcode(Sequence& seq, int& group){
                                }
                                
                                //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                               alignment->align(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
+                               alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+bdiffs));
                                oligo = alignment->getSeqAAln();
                                string temp = alignment->getSeqBAln();
                                 
@@ -1460,7 +1943,7 @@ int TrimOligos::stripForward(Sequence& seq, int& group){
                                }
                                
                                //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                               alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
+                               alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
                                oligo = alignment->getSeqAAln();
                                string temp = alignment->getSeqBAln();
                                
@@ -1514,6 +1997,9 @@ int TrimOligos::stripForward(Sequence& seq, int& group){
 //*******************************************************************/
 int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, bool keepForward){
        try {
+        
+        if (paired) {  int success = stripPairedPrimers(seq, qual, group, keepForward);  return success; }
+        
                string rawSequence = seq.getUnaligned();
                int success = pdiffs + 1;       //guilty until proven innocent
                
@@ -1560,7 +2046,7 @@ int TrimOligos::stripForward(Sequence& seq, QualityScores& qual, int& group, boo
                                }
                                
                                //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                               alignment->align(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
+                               alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+pdiffs));
                                oligo = alignment->getSeqAAln();
                                string temp = alignment->getSeqBAln();
                                
@@ -1722,7 +2208,7 @@ bool TrimOligos::stripLinker(Sequence& seq, QualityScores& qual){
                                }
                                
                                //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                               alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
+                               alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
                                oligo = alignment->getSeqAAln();
                                string temp = alignment->getSeqBAln();
                                
@@ -1821,7 +2307,7 @@ bool TrimOligos::stripLinker(Sequence& seq){
                                }
                                
                                //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                               alignment->align(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
+                               alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+ldiffs));
                                oligo = alignment->getSeqAAln();
                                string temp = alignment->getSeqBAln();
                                
@@ -1918,7 +2404,7 @@ bool TrimOligos::stripSpacer(Sequence& seq, QualityScores& qual){
                                }
                                
                                //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                               alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
+                               alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
                                oligo = alignment->getSeqAAln();
                                string temp = alignment->getSeqBAln();
                                
@@ -2017,7 +2503,7 @@ bool TrimOligos::stripSpacer(Sequence& seq){
                                }
                                
                                //use needleman to align first barcode.length()+numdiffs of sequence to each barcode
-                               alignment->align(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
+                               alignment->alignPrimer(oligo, rawSequence.substr(0,oligo.length()+sdiffs));
                                oligo = alignment->getSeqAAln();
                                string temp = alignment->getSeqBAln();
                                
@@ -2137,6 +2623,48 @@ int TrimOligos::countDiffs(string oligo, string seq){
                exit(1);
        }
 }
+//********************************************************************/
+string TrimOligos::reverseOligo(string oligo){
+       try {
+        string reverse = "";
+        
+        for(int i=oligo.length()-1;i>=0;i--){
+            
+            if(oligo[i] == 'A')                {       reverse += 'T'; }
+            else if(oligo[i] == 'T'){  reverse += 'A'; }
+            else if(oligo[i] == 'U'){  reverse += 'A'; }
+            
+            else if(oligo[i] == 'G'){  reverse += 'C'; }
+            else if(oligo[i] == 'C'){  reverse += 'G'; }
+            
+            else if(oligo[i] == 'R'){  reverse += 'Y'; }
+            else if(oligo[i] == 'Y'){  reverse += 'R'; }
+            
+            else if(oligo[i] == 'M'){  reverse += 'K'; }
+            else if(oligo[i] == 'K'){  reverse += 'M'; }
+            
+            else if(oligo[i] == 'W'){  reverse += 'W'; }
+            else if(oligo[i] == 'S'){  reverse += 'S'; }
+            
+            else if(oligo[i] == 'B'){  reverse += 'V'; }
+            else if(oligo[i] == 'V'){  reverse += 'B'; }
+            
+            else if(oligo[i] == 'D'){  reverse += 'H'; }
+            else if(oligo[i] == 'H'){  reverse += 'D'; }
+            
+            else if(oligo[i] == '-'){  reverse += '-'; } 
+            else                                               {       reverse += 'N'; }
+        }
+        
+        
+        return reverse;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "TrimOligos", "reverseOligo");
+               exit(1);
+       }
+}
+
 /********************************************************************/
 
 
index d72b8146d36bc659ec13ddffc28463ef4a7b8204..df4643c87f1d5b7edd75c5384e7ff2a144a392c7 100644 (file)
@@ -55,10 +55,12 @@ class TrimOligos {
         bool findForward(Sequence&, int&, int&);
         bool findReverse(Sequence&, int&, int&);
     
+        string reverseOligo(string);
                                
        
        private:
                int pdiffs, bdiffs, ldiffs, sdiffs;
+        bool paired;
        
                map<string, int> barcodes;
                map<string, int> primers;
@@ -77,7 +79,10 @@ class TrimOligos {
                MothurOut* m;
        
                bool compareDNASeq(string, string);                             
-               int countDiffs(string, string);                 
+               int countDiffs(string, string);
+        
+        int stripPairedBarcode(Sequence& seq, QualityScores& qual, int& group);
+        int stripPairedPrimers(Sequence& seq, QualityScores& qual, int& group, bool);
 };
 
 #endif
index b208209e2a808a0f92cca9f160f4da456450e153..709202f7d690c17e8a6edb7f5a7812047aec9177 100644 (file)
@@ -21,6 +21,7 @@ vector<string> TrimSeqsCommand::setParameters(){
                CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none","name",false,false,true); parameters.push_back(pname);
         CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none","count",false,false,true); parameters.push_back(pcount);
                CommandParameter pflip("flip", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(pflip);
+        CommandParameter preorient("checkorient", "Boolean", "", "F", "", "", "","",false,false,true); parameters.push_back(preorient);
                CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
                CommandParameter pmaxhomop("maxhomop", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pmaxhomop);
                CommandParameter pminlength("minlength", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pminlength);
@@ -60,9 +61,10 @@ string TrimSeqsCommand::getHelpString(){
                string helpString = "";
                helpString += "The trim.seqs command reads a fastaFile and creates 2 new fasta files, .trim.fasta and scrap.fasta, as well as group files if you provide and oligos file.\n";
                helpString += "The .trim.fasta contains sequences that meet your requirements, and the .scrap.fasta contains those which don't.\n";
-               helpString += "The trim.seqs command parameters are fasta, name, count, flip, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
+               helpString += "The trim.seqs command parameters are fasta, name, count, flip, checkorient, oligos, maxambig, maxhomop, minlength, maxlength, qfile, qthreshold, qaverage, diffs, qtrim, keepfirst, removelast and allfiles.\n";
                helpString += "The fasta parameter is required.\n";
                helpString += "The flip parameter will output the reverse compliment of your trimmed sequence. The default is false.\n";
+        helpString += "The checkorient parameter will check the reverse compliment of the sequence if the barcodes and primers cannot be found in the forward. The default is false.\n";
                helpString += "The oligos parameter allows you to provide an oligos file.\n";
                helpString += "The name parameter allows you to provide a names file with your fasta file.\n";
         helpString += "The count parameter allows you to provide a count file with your fasta file.\n";
@@ -123,7 +125,7 @@ string TrimSeqsCommand::getOutputPattern(string type) {
 
 TrimSeqsCommand::TrimSeqsCommand(){    
        try {
-               abort = true; calledHelp = true; 
+               abort = true; calledHelp = true;
                setParameters();
                vector<string> tempOutNames;
                outputTypes["fasta"] = tempOutNames;
@@ -326,6 +328,9 @@ TrimSeqsCommand::TrimSeqsCommand(string option)  {
             
             temp = validParameter.validFile(parameters, "keepforward", false);         if (temp == "not found") { temp = "F"; }
                        keepforward = m->isTrue(temp);
+            
+            temp = validParameter.validFile(parameters, "checkorient", false);         if (temp == "not found") { temp = "F"; }
+                       reorient = m->isTrue(temp);
                        
                        temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
                        m->setProcessors(temp);
@@ -366,6 +371,7 @@ int TrimSeqsCommand::execute(){
        
                if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
                
+        pairedOligos = false;
                numFPrimers = 0;  //this needs to be initialized
                numRPrimers = 0;
         numSpacers = 0;
@@ -438,6 +444,8 @@ int TrimSeqsCommand::execute(){
                        }
                }
        
+        if (m->control_pressed) { return 0; }
+            
         //fills lines and qlines
                setLines(fastaFile, qFileName);
                
@@ -666,11 +674,34 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                
                int count = 0;
                bool moreSeqs = 1;
-               TrimOligos trimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);
-       
+        int numBarcodes = barcodes.size();
+               TrimOligos* trimOligos = NULL;
+        if (pairedOligos)   {   trimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, pairedPrimers, pairedBarcodes); numBarcodes = pairedBarcodes.size(); }
+        else                {   trimOligos = new TrimOligos(pdiffs, bdiffs, ldiffs, sdiffs, primers, barcodes, revPrimer, linker, spacer);  }
+        
+        TrimOligos* rtrimOligos = NULL;
+        if (reorient) {
+            //create reoriented primer and barcode pairs
+            map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
+            for (map<int, oligosPair>::iterator it = pairedPrimers.begin(); it != pairedPrimers.end(); it++) {
+                cout << "primer " << (it->second).forward << '\t' << (it->second).reverse << '\t' << primerNameVector[it->first] << endl;
+                cout << "rprimer " << trimOligos->reverseOligo((it->second).reverse) << '\t' << (trimOligos->reverseOligo((it->second).forward)) << endl;
+                 oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
+                rpairedPrimers[it->first] = tempPair;
+            }
+            for (map<int, oligosPair>::iterator it = pairedBarcodes.begin(); it != pairedBarcodes.end(); it++) {
+                cout << "barcode " << (it->second).forward << '\t' << (it->second).reverse << '\t' << barcodeNameVector[it->first] << endl;
+                cout << "rbarcode " << trimOligos->reverseOligo((it->second).reverse) << '\t' << (trimOligos->reverseOligo((it->second).forward)) << endl;
+                oligosPair tempPair(reverseOligo((it->second).reverse), (reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
+                rpairedBarcodes[it->first] = tempPair;
+            }
+            rtrimOligos = new TrimOligos(pdiffs, bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
+        }
+        
                while (moreSeqs) {
                                
-                       if (m->control_pressed) { 
+                       if (m->control_pressed) {
+                delete trimOligos; if (reorient) { delete rtrimOligos; }
                                inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
                                if ((createGroup) && (countfile == "")) {        outGroupsFile.close();   }
                 if(qFileName != "")    {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
@@ -699,27 +730,27 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                int primerIndex = 0;
                                
                 if(numLinkers != 0){
-                                       success = trimOligos.stripLinker(currSeq, currQual);
+                                       success = trimOligos->stripLinker(currSeq, currQual);
                                        if(success > ldiffs)            {       trashCode += 'k';       }
                                        else{ currentSeqsDiffs += success;  }
 
                                }
                 
-                               if(barcodes.size() != 0){
-                                       success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
+                               if(numBarcodes != 0){
+                                       success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex);
                                        if(success > bdiffs)            {       trashCode += 'b';       }
                                        else{ currentSeqsDiffs += success;  }
                                }
                                
                 if(numSpacers != 0){
-                                       success = trimOligos.stripSpacer(currSeq, currQual);
+                                       success = trimOligos->stripSpacer(currSeq, currQual);
                                        if(success > sdiffs)            {       trashCode += 's';       }
                                        else{ currentSeqsDiffs += success;  }
 
                                }
                 
                                if(numFPrimers != 0){
-                                       success = trimOligos.stripForward(currSeq, currQual, primerIndex, keepforward);
+                                       success = trimOligos->stripForward(currSeq, currQual, primerIndex, keepforward);
                                        if(success > pdiffs)            {       trashCode += 'f';       }
                                        else{ currentSeqsDiffs += success;  }
                                }
@@ -727,10 +758,45 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                if (currentSeqsDiffs > tdiffs)  {       trashCode += 't';   }
                                
                                if(numRPrimers != 0){
-                                       success = trimOligos.stripReverse(currSeq, currQual);
+                                       success = trimOligos->stripReverse(currSeq, currQual);
                                        if(!success)                            {       trashCode += 'r';       }
                                }
-
+                
+                if (reorient && (trashCode != "")) { //if you failed and want to check the reverse
+                    int thisSuccess = 0;
+                    string thisTrashCode = "";
+                    int thisCurrentSeqsDiffs = 0;
+                    
+                    int thisBarcodeIndex = 0;
+                    int thisPrimerIndex = 0;
+                    
+                    if(numBarcodes != 0){
+                        thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex);
+                        if(thisSuccess > bdiffs)               {       thisTrashCode += 'b';   }
+                        else{ thisCurrentSeqsDiffs += thisSuccess;  }
+                    }
+                    
+                    if(numFPrimers != 0){
+                        thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, keepforward);
+                        if(thisSuccess > pdiffs)               {       thisTrashCode += 'f';   }
+                        else{ thisCurrentSeqsDiffs += thisSuccess;  }
+                    }
+                    
+                    if (thisCurrentSeqsDiffs > tdiffs) {       thisTrashCode += 't';   }
+                    
+                    if (thisTrashCode == "") { 
+                        trashCode = thisTrashCode;
+                        success = thisSuccess;
+                        currentSeqsDiffs = thisCurrentSeqsDiffs;
+                        barcodeIndex = thisBarcodeIndex;
+                        primerIndex = thisPrimerIndex;
+                        currSeq.reverseComplement();
+                        if(qFileName != ""){
+                            currQual.flipQScores();
+                        }
+                    }
+                }
+                
                                if(keepFirst != 0){
                                        success = keepFirstTrim(currSeq, currQual);
                                }
@@ -781,9 +847,9 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                if(trashCode.length() == 0){
                     string thisGroup = "";
                     if (createGroup) {
-                                               if(barcodes.size() != 0){
+                                               if(numBarcodes != 0){
                                                        thisGroup = barcodeNameVector[barcodeIndex];
-                                                       if (primers.size() != 0) { 
+                                                       if (numFPrimers != 0) {
                                                                if (primerNameVector[primerIndex] != "") { 
                                                                        if(thisGroup != "") {
                                                                                thisGroup += "." + primerNameVector[primerIndex]; 
@@ -821,7 +887,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                         }
                         
                         if (createGroup) {
-                            if(barcodes.size() != 0){
+                            if(numBarcodes != 0){
                                                                 
                                 if (m->debug) { m->mothurOut(", group= " + thisGroup + "\n"); }
                                 
@@ -909,7 +975,8 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                //report progress
                if((count) % 1000 != 0){        m->mothurOut(toString(count)); m->mothurOutEndLine();           }
                
-               
+               delete trimOligos;
+        if (reorient) { delete rtrimOligos; }
                inFASTA.close();
                trimFASTAFile.close();
                scrapFASTAFile.close();
@@ -1097,10 +1164,10 @@ int TrimSeqsCommand::createProcessesCreateTrim(string filename, string qFileName
                                               tempPrimerQualFileNames,
                                               tempNameFileNames,
                                               lines[h].start, lines[h].end, qLines[h].start, qLines[h].end, m,
-                                              pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, 
+                                              pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, primers, barcodes, revPrimer, linker, spacer, pairedBarcodes, pairedPrimers, pairedOligos,
                                              primerNameVector, barcodeNameVector, createGroup, allFiles, keepforward, keepFirst, removeLast,
                                               qWindowStep, qWindowSize, qWindowAverage, qtrim, qThreshold, qAverage, qRollAverage,
-                                             minLength, maxAmbig, maxHomoP, maxLength, flip, nameMap, nameCount);
+                                             minLength, maxAmbig, maxHomoP, maxLength, flip, reorient, nameMap, nameCount);
                        pDataArray.push_back(tempTrim);
             
                        hThreadArray[h] = CreateThread(NULL, 0, MyTrimThreadFunction, pDataArray[h], 0, &dwThreadIdArray[h]);   
@@ -1316,6 +1383,10 @@ int TrimSeqsCommand::setLines(string filename, string qfilename) {
                         string sname = "";  nameStream >> sname;
                         sname = sname.substr(1);
                         
+                        for (int i = 0; i < sname.length(); i++) {
+                            if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
+                        }
+                        
                         map<string, int>::iterator it = firstSeqNames.find(sname);
                         
                         if(it != firstSeqNames.end()) { //this is the start of a new chunk
@@ -1414,10 +1485,15 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                
                ofstream test;
                
-               string type, oligo, group;
+               string type, oligo, roligo, group;
+        bool hasPrimer = false; bool hasPairedBarcodes = false;
 
                int indexPrimer = 0;
                int indexBarcode = 0;
+        int indexPairedPrimer = 0;
+               int indexPairedBarcode = 0;
+        set<string> uniquePrimers;
+        set<string> uniqueBarcodes;
                
                while(!inOligos.eof()){
 
@@ -1463,6 +1539,40 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                        primers[oligo]=indexPrimer; indexPrimer++;              
                                        primerNameVector.push_back(group);
                                }
+                else if (type == "PRIMER"){
+                    m->gobble(inOligos);
+                                       
+                    inOligos >> roligo;
+                    
+                    for(int i=0;i<roligo.length();i++){
+                        roligo[i] = toupper(roligo[i]);
+                        if(roligo[i] == 'U')   {       roligo[i] = 'T';        }
+                    }
+                    roligo = reverseOligo(roligo);
+                    
+                    group = "";
+                    
+                                       // get rest of line in case there is a primer name
+                                       while (!inOligos.eof()) {
+                                               char c = inOligos.get();
+                                               if (c == 10 || c == 13){        break;  }
+                                               else if (c == 32 || c == 9){;} //space or tab
+                                               else {  group += c;  }
+                                       }
+                    
+                    oligosPair newPrimer(oligo, roligo);
+                                       
+                                       //check for repeat barcodes
+                    string tempPair = oligo+roligo;
+                    if (uniquePrimers.count(tempPair) != 0) { m->mothurOut("primer pair " + newPrimer.forward + " " + newPrimer.reverse + " is in your oligos file already."); m->mothurOutEndLine();  }
+                    else { uniquePrimers.insert(tempPair); }
+                                       
+                    if (m->debug) {  if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer pair " + newPrimer.forward + " " + newPrimer.reverse + ".\n"); }  }
+                    
+                                       pairedPrimers[indexPairedPrimer]=newPrimer; indexPairedPrimer++;
+                                       primerNameVector.push_back(group);
+                    hasPrimer = true;
+                }
                                else if(type == "REVERSE"){
                                        //Sequence oligoRC("reverse", oligo);
                                        //oligoRC.reverseComplement();
@@ -1471,13 +1581,48 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                                }
                                else if(type == "BARCODE"){
                                        inOligos >> group;
-                                       
-                                       //check for repeat barcodes
-                                       map<string, int>::iterator itBar = barcodes.find(oligo);
-                                       if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
                     
-                                       barcodes[oligo]=indexBarcode; indexBarcode++;
-                                       barcodeNameVector.push_back(group);
+                    //barcode lines can look like   BARCODE   atgcatgc   groupName  - for 454 seqs
+                    //or                            BARCODE   atgcatgc   atgcatgc    groupName  - for illumina data that has forward and reverse info
+                    
+                    string temp = "";
+                    while (!inOligos.eof())    {
+                                               char c = inOligos.get();
+                                               if (c == 10 || c == 13){        break;  }
+                                               else if (c == 32 || c == 9){;} //space or tab
+                                               else {  temp += c;  }
+                                       }
+                                       
+                    //then this is illumina data with 4 columns
+                    if (temp != "") {
+                        hasPairedBarcodes = true;
+                        string reverseBarcode = group; //reverseOligo(group); //reverse barcode
+                        group = temp;
+                        
+                        for(int i=0;i<reverseBarcode.length();i++){
+                            reverseBarcode[i] = toupper(reverseBarcode[i]);
+                            if(reverseBarcode[i] == 'U')       {       reverseBarcode[i] = 'T';        }
+                        }
+                        
+                        oligosPair newPair(oligo, reverseOligo(reverseBarcode));
+                        
+                        if (m->debug) { m->mothurOut("[DEBUG]: barcode pair " + newPair.forward + " " + newPair.reverse + ", and group = " + group + ".\n"); }
+                        
+                        //check for repeat barcodes
+                        string tempPair = oligo+reverseBarcode;
+                        if (uniqueBarcodes.count(tempPair) != 0) { m->mothurOut("barcode pair " + newPair.forward + " " + newPair.reverse +  " is in your oligos file already, disregarding."); m->mothurOutEndLine();  }
+                        else { uniqueBarcodes.insert(tempPair); }
+                        
+                        pairedBarcodes[indexPairedBarcode]=newPair; indexPairedBarcode++;
+                        barcodeNameVector.push_back(group);
+                    }else {                            
+                        //check for repeat barcodes
+                        map<string, int>::iterator itBar = barcodes.find(oligo);
+                        if (itBar != barcodes.end()) { m->mothurOut("barcode " + oligo + " is in your oligos file already."); m->mothurOutEndLine();  }
+                        
+                        barcodes[oligo]=indexBarcode; indexBarcode++;
+                        barcodeNameVector.push_back(group);
+                    }
                                }else if(type == "LINKER"){
                                        linker.push_back(oligo);
                                }else if(type == "SPACER"){
@@ -1489,6 +1634,11 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                }       
                inOligos.close();
                
+        if (hasPairedBarcodes || hasPrimer) {
+            pairedOligos = true;
+            if ((primers.size() != 0) || (barcodes.size() != 0) || (linker.size() != 0) || (spacer.size() != 0) || (revPrimer.size() != 0)) { m->control_pressed = true;  m->mothurOut("[ERROR]: cannot mix paired primers and barcodes with non paired or linkers and spacers, quitting."); m->mothurOutEndLine();  return 0; }
+        }
+        
                if(barcodeNameVector.size() == 0 && primerNameVector[0] == ""){ allFiles = 0;   }
                
                //add in potential combos
@@ -1511,75 +1661,146 @@ bool TrimSeqsCommand::getOligos(vector<vector<string> >& fastaFileNames, vector<
                
                if(allFiles){
                        set<string> uniqueNames; //used to cleanup outputFileNames
-                       for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
-                               for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
-                                       
-                                       string primerName = primerNameVector[itPrimer->second];
-                                       string barcodeName = barcodeNameVector[itBar->second];
-                                       
-                    if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing 
-                                       else {
-                        string comboGroupName = "";
-                        string fastaFileName = "";
-                        string qualFileName = "";
-                        string nameFileName = "";
-                        string countFileName = "";
+            if (pairedOligos) {
+                for(map<int, oligosPair>::iterator itBar = pairedBarcodes.begin();itBar != pairedBarcodes.end();itBar++){
+                    for(map<int, oligosPair>::iterator itPrimer = pairedPrimers.begin();itPrimer != pairedPrimers.end(); itPrimer++){
                         
-                        if(primerName == ""){
-                            comboGroupName = barcodeNameVector[itBar->second];
-                        }
-                        else{
-                            if(barcodeName == ""){
-                                comboGroupName = primerNameVector[itPrimer->second];
+                        string primerName = primerNameVector[itPrimer->first];
+                        string barcodeName = barcodeNameVector[itBar->first];
+                        
+                        if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing
+                        else {
+                            string comboGroupName = "";
+                            string fastaFileName = "";
+                            string qualFileName = "";
+                            string nameFileName = "";
+                            string countFileName = "";
+                            
+                            if(primerName == ""){
+                                comboGroupName = barcodeNameVector[itBar->first];
                             }
                             else{
-                                comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
+                                if(barcodeName == ""){
+                                    comboGroupName = primerNameVector[itPrimer->first];
+                                }
+                                else{
+                                    comboGroupName = barcodeNameVector[itBar->first] + "." + primerNameVector[itPrimer->first];
+                                }
+                            }
+                            
+                            
+                            ofstream temp;
+                            map<string, string> variables;
+                            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
+                            variables["[tag]"] = comboGroupName;
+                            fastaFileName = getOutputFileName("fasta", variables);
+                            if (uniqueNames.count(fastaFileName) == 0) {
+                                outputNames.push_back(fastaFileName);
+                                outputTypes["fasta"].push_back(fastaFileName);
+                                uniqueNames.insert(fastaFileName);
+                            }
+                            
+                            fastaFileNames[itBar->first][itPrimer->first] = fastaFileName;
+                            m->openOutputFile(fastaFileName, temp);            temp.close();
+                            
+                            if(qFileName != ""){
+                                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
+                                qualFileName = getOutputFileName("qfile", variables);
+                                if (uniqueNames.count(qualFileName) == 0) {
+                                    outputNames.push_back(qualFileName);
+                                    outputTypes["qfile"].push_back(qualFileName);
+                                }
+                                
+                                qualFileNames[itBar->first][itPrimer->first] = qualFileName;
+                                m->openOutputFile(qualFileName, temp);         temp.close();
+                            }
+                            
+                            if(nameFile != ""){
+                                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
+                                nameFileName = getOutputFileName("name", variables);
+                                if (uniqueNames.count(nameFileName) == 0) {
+                                    outputNames.push_back(nameFileName);
+                                    outputTypes["name"].push_back(nameFileName);
+                                }
+                                
+                                nameFileNames[itBar->first][itPrimer->first] = nameFileName;
+                                m->openOutputFile(nameFileName, temp);         temp.close();
                             }
                         }
+                    }
+                }
+            }else {
+                for(map<string, int>::iterator itBar = barcodes.begin();itBar != barcodes.end();itBar++){
+                    for(map<string, int>::iterator itPrimer = primers.begin();itPrimer != primers.end(); itPrimer++){
                         
+                        string primerName = primerNameVector[itPrimer->second];
+                        string barcodeName = barcodeNameVector[itBar->second];
                         
-                        ofstream temp;
-                        map<string, string> variables; 
-                        variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
-                        variables["[tag]"] = comboGroupName;
-                        fastaFileName = getOutputFileName("fasta", variables);
-                        if (uniqueNames.count(fastaFileName) == 0) {
-                            outputNames.push_back(fastaFileName);
-                            outputTypes["fasta"].push_back(fastaFileName);
-                            uniqueNames.insert(fastaFileName);
-                        }
-                        
-                        fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
-                        m->openOutputFile(fastaFileName, temp);                temp.close();
-                        
-                        if(qFileName != ""){
-                            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
-                            qualFileName = getOutputFileName("qfile", variables);
-                            if (uniqueNames.count(qualFileName) == 0) {
-                                outputNames.push_back(qualFileName);
-                                outputTypes["qfile"].push_back(qualFileName);
+                        if ((primerName == "ignore") || (barcodeName == "ignore")) { } //do nothing 
+                        else {
+                            string comboGroupName = "";
+                            string fastaFileName = "";
+                            string qualFileName = "";
+                            string nameFileName = "";
+                            string countFileName = "";
+                            
+                            if(primerName == ""){
+                                comboGroupName = barcodeNameVector[itBar->second];
+                            }
+                            else{
+                                if(barcodeName == ""){
+                                    comboGroupName = primerNameVector[itPrimer->second];
+                                }
+                                else{
+                                    comboGroupName = barcodeNameVector[itBar->second] + "." + primerNameVector[itPrimer->second];
+                                }
                             }
                             
-                            qualFileNames[itBar->second][itPrimer->second] = qualFileName;
-                            m->openOutputFile(qualFileName, temp);             temp.close();
-                        }
-                        
-                        if(nameFile != ""){
-                            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
-                            nameFileName = getOutputFileName("name", variables);
-                            if (uniqueNames.count(nameFileName) == 0) {
-                                outputNames.push_back(nameFileName);
-                                outputTypes["name"].push_back(nameFileName);
+                            
+                            ofstream temp;
+                            map<string, string> variables; 
+                            variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFile));
+                            variables["[tag]"] = comboGroupName;
+                            fastaFileName = getOutputFileName("fasta", variables);
+                            if (uniqueNames.count(fastaFileName) == 0) {
+                                outputNames.push_back(fastaFileName);
+                                outputTypes["fasta"].push_back(fastaFileName);
+                                uniqueNames.insert(fastaFileName);
                             }
                             
-                            nameFileNames[itBar->second][itPrimer->second] = nameFileName;
-                            m->openOutputFile(nameFileName, temp);             temp.close();
+                            fastaFileNames[itBar->second][itPrimer->second] = fastaFileName;
+                            m->openOutputFile(fastaFileName, temp);            temp.close();
+                            
+                            if(qFileName != ""){
+                                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qFileName));
+                                qualFileName = getOutputFileName("qfile", variables);
+                                if (uniqueNames.count(qualFileName) == 0) {
+                                    outputNames.push_back(qualFileName);
+                                    outputTypes["qfile"].push_back(qualFileName);
+                                }
+                                
+                                qualFileNames[itBar->second][itPrimer->second] = qualFileName;
+                                m->openOutputFile(qualFileName, temp);         temp.close();
+                            }
+                            
+                            if(nameFile != ""){
+                                variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(nameFile));
+                                nameFileName = getOutputFileName("name", variables);
+                                if (uniqueNames.count(nameFileName) == 0) {
+                                    outputNames.push_back(nameFileName);
+                                    outputTypes["name"].push_back(nameFileName);
+                                }
+                                
+                                nameFileNames[itBar->second][itPrimer->second] = nameFileName;
+                                m->openOutputFile(nameFileName, temp);         temp.close();
+                            }
                         }
                     }
-                               }
-                       }
+                }
+            }
                }
                numFPrimers = primers.size();
+        if (pairedOligos) { numFPrimers  = pairedPrimers.size(); }
                numRPrimers = revPrimer.size();
         numLinkers = linker.size();
         numSpacers = spacer.size();
index 80e1ebe1a00e2f56ec19a6f23c3ba890af1c4f10..4d0a6112cb6e288600ae944e9340563f5973da47 100644 (file)
@@ -55,12 +55,14 @@ private:
        bool abort, createGroup;
        string fastaFile, oligoFile, qFileName, groupfile, nameFile, countfile, outputDir;
        
-       bool flip, allFiles, qtrim, keepforward;
+       bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient;
        int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
        int qWindowSize, qWindowStep, keepFirst, removeLast;
        double qRollAverage, qThreshold, qWindowAverage, qAverage;
        vector<string> revPrimer, outputNames;
        set<string> filesToRemove;
+    map<int, oligosPair> pairedBarcodes;
+    map<int, oligosPair> pairedPrimers;
        map<string, int> barcodes;
        vector<string> groupVector;
        map<string, int> primers;
@@ -96,7 +98,7 @@ struct trimData {
     vector<vector<string> > qualFileNames;
     vector<vector<string> > nameFileNames;
     unsigned long long lineStart, lineEnd, qlineStart, qlineEnd;
-    bool flip, allFiles, qtrim, keepforward, createGroup;
+    bool flip, allFiles, qtrim, keepforward, createGroup, pairedOligos, reorient;
        int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
        int qWindowSize, qWindowStep, keepFirst, removeLast, count;
        double qRollAverage, qThreshold, qWindowAverage, qAverage;
@@ -112,13 +114,15 @@ struct trimData {
        map<string, int> groupCounts;  
        map<string, string> nameMap;
     map<string, string> groupMap;
+    map<int, oligosPair> pairedBarcodes;
+    map<int, oligosPair> pairedPrimers;
     
        trimData(){}
        trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend,  MothurOut* mout,
-                      int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, vector<string> revP, vector<string> li, vector<string> spa, 
+                      int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, vector<string> revP, vector<string> li, vector<string> spa, map<int, oligosPair> pbr, map<int, oligosPair> ppr, bool po,
                       vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
                       int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage,
-                      int minL, int maxA, int maxH, int maxL, bool fli, map<string, string> nm, map<string, int> ncount) {
+                      int minL, int maxA, int maxH, int maxL, bool fli, bool reo, map<string, string> nm, map<string, int> ncount) {
         filename = fn;
         qFileName = qn;
         nameFile = nf;
@@ -148,6 +152,9 @@ struct trimData {
         sdiffs = sd;
         tdiffs = td;
         barcodes = bar;
+        pairedPrimers = ppr;
+        pairedBarcodes = pbr;
+        pairedOligos = po;
         primers = pri;      numFPrimers = primers.size();
         revPrimer = revP;   numRPrimers = revPrimer.size();
         linker = li;        numLinkers = linker.size();
@@ -172,6 +179,7 @@ struct trimData {
         maxHomoP = maxH;
         maxLength = maxL;
         flip = fli;
+        reorient = reo;
         nameMap = nm;
         count = 0;
        }
@@ -251,13 +259,31 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
             } 
                }
                
-               
-               TrimOligos trimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer);
+        TrimOligos* trimOligos = NULL;
+        int numBarcodes = pDataArray->barcodes.size();
+        if (pDataArray->pairedOligos)   {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->pairedPrimers, pDataArray->pairedBarcodes);   numBarcodes = pDataArray->pairedBarcodes.size(); }
+        else                {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer);  }
+        
+        TrimOligos* rtrimOligos = NULL;
+        if (pDataArray->reorient) {
+            //create reoriented primer and barcode pairs
+            map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
+            for (map<int, oligosPair>::iterator it = pDataArray->pairedPrimers.begin(); it != pDataArray->pairedPrimers.end(); it++) {
+                oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
+                rpairedPrimers[it->first] = tempPair;
+            }
+            for (map<int, oligosPair>::iterator it = pDataArray->pairedBarcodes.begin(); it != pDataArray->pairedBarcodes.end(); it++) {
+                oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
+                rpairedBarcodes[it->first] = tempPair;
+            }
+            rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
+        }
         
                pDataArray->count = 0;
                for(int i = 0; i < pDataArray->lineEnd; i++){ //end is the number of sequences to process
                                   
-                       if (pDataArray->m->control_pressed) { 
+                       if (pDataArray->m->control_pressed) {
+                delete trimOligos;
                                inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
                                if ((pDataArray->createGroup) && (pDataArray->countfile == "")) {        outGroupsFile.close();   }
                 if(pDataArray->qFileName != "")        {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
@@ -287,26 +313,26 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                int primerIndex = 0;
                                
                 if(pDataArray->numLinkers != 0){
-                                       success = trimOligos.stripLinker(currSeq, currQual);
+                                       success = trimOligos->stripLinker(currSeq, currQual);
                                        if(success > pDataArray->ldiffs)                {       trashCode += 'k';       }
                                        else{ currentSeqsDiffs += success;  }
                                }
                 
-                               if(pDataArray->barcodes.size() != 0){
-                                       success = trimOligos.stripBarcode(currSeq, currQual, barcodeIndex);
+                               if(numBarcodes != 0){
+                                       success = trimOligos->stripBarcode(currSeq, currQual, barcodeIndex);
                                        if(success > pDataArray->bdiffs)                {       trashCode += 'b';       }
                                        else{ currentSeqsDiffs += success;  }
                                }
                 
                 if(pDataArray->numSpacers != 0){
-                                       success = trimOligos.stripSpacer(currSeq, currQual);
+                                       success = trimOligos->stripSpacer(currSeq, currQual);
                                        if(success > pDataArray->sdiffs)                {       trashCode += 's';       }
                                        else{ currentSeqsDiffs += success;  }
 
                                }
                 
                                if(pDataArray->numFPrimers != 0){
-                                       success = trimOligos.stripForward(currSeq, currQual, primerIndex, pDataArray->keepforward);
+                                       success = trimOligos->stripForward(currSeq, currQual, primerIndex, pDataArray->keepforward);
                                        if(success > pDataArray->pdiffs)                {       trashCode += 'f';       }
                                        else{ currentSeqsDiffs += success;  }
                                }
@@ -314,10 +340,42 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                if (currentSeqsDiffs > pDataArray->tdiffs)      {       trashCode += 't';   }
                                
                                if(pDataArray->numRPrimers != 0){
-                                       success = trimOligos.stripReverse(currSeq, currQual);
+                                       success = trimOligos->stripReverse(currSeq, currQual);
                                        if(!success)                            {       trashCode += 'r';       }
                                }
                 
+                if (pDataArray->reorient && (trashCode != "")) { //if you failed and want to check the reverse
+                    int thisSuccess = 0;
+                    string thisTrashCode = "";
+                    int thisCurrentSeqsDiffs = 0;
+                    
+                    int thisBarcodeIndex = 0;
+                    int thisPrimerIndex = 0;
+                    
+                    if(numBarcodes != 0){
+                        thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex);
+                        if(thisSuccess > pDataArray->bdiffs)           {       thisTrashCode += 'b';   }
+                        else{ thisCurrentSeqsDiffs += thisSuccess;  }
+                    }
+                    
+                    if(pDataArray->numFPrimers != 0){
+                        thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, pDataArray->keepforward);
+                        if(thisSuccess > pDataArray->pdiffs)           {       thisTrashCode += 'f';   }
+                        else{ thisCurrentSeqsDiffs += thisSuccess;  }
+                    }
+                    
+                    if (thisCurrentSeqsDiffs > pDataArray->tdiffs)     {       thisTrashCode += 't';   }
+                    
+                    if (thisTrashCode == "") {
+                        trashCode = thisTrashCode;
+                        success = thisSuccess;
+                        currentSeqsDiffs = thisCurrentSeqsDiffs;
+                        barcodeIndex = thisBarcodeIndex;
+                        primerIndex = thisPrimerIndex;
+                    }
+                }
+
+                
                                if(pDataArray->keepFirst != 0){
                                        //success = keepFirstTrim(currSeq, currQual);
                     success = 1;
@@ -398,9 +456,9 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                if(trashCode.length() == 0){
                     string thisGroup = "";
                     if (pDataArray->createGroup) {
-                                               if(pDataArray->barcodes.size() != 0){
+                                               if(numBarcodes != 0){
                                                        thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
-                                                       if (pDataArray->primers.size() != 0) { 
+                                                       if (pDataArray->numFPrimers != 0) {
                                                                if (pDataArray->primerNameVector[primerIndex] != "") { 
                                                                        if(thisGroup != "") {
                                                                                thisGroup += "." + pDataArray->primerNameVector[primerIndex]; 
@@ -438,7 +496,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                         }
                         
                         if (pDataArray->createGroup) {
-                            if(pDataArray->barcodes.size() != 0){
+                            if(numBarcodes != 0){
                                 
                                 if (pDataArray->countfile == "") { outGroupsFile << currSeq.getName() << '\t' << thisGroup << endl; }
                                 else {   pDataArray->groupMap[currSeq.getName()] = thisGroup; }
@@ -515,7 +573,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                //report progress
                if((pDataArray->count) % 1000 != 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
                
-               
+               delete trimOligos;
                inFASTA.close();
                trimFASTAFile.close();
                scrapFASTAFile.close();