]> git.donarmstrong.com Git - mothur.git/blobdiff - trimseqscommand.h
added oligos class. added check orient parameter to trim.flows, sffinfo, fastq.info...
[mothur.git] / trimseqscommand.h
index 4d0a6112cb6e288600ae944e9340563f5973da47..2448669816c6936f55bff8b3ad256ce15f1de474 100644 (file)
@@ -16,6 +16,7 @@
 #include "qualityscores.h"
 #include "trimoligos.h"
 #include "counttable.h"
+#include "oligos.h"
 
 
 class TrimSeqsCommand : public Command {
@@ -44,34 +45,22 @@ private:
         linePair() {}
     };
     
-       bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
+    Oligos oligos;
+       bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&, map<string, string>&);
        bool keepFirstTrim(Sequence&, QualityScores&);
        bool removeLastTrim(Sequence&, QualityScores&);
        bool cullLength(Sequence&);
        bool cullHomoP(Sequence&);
        bool cullAmbigs(Sequence&);
-    string reverseOligo(string);
-
        bool abort, createGroup;
        string fastaFile, oligoFile, qFileName, groupfile, nameFile, countfile, outputDir;
        
-       bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient;
-       int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
+       bool flip, allFiles, qtrim, keepforward, pairedOligos, reorient, logtransform;
+       int numBarcodes, numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, processors, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs, comboStarts;
        int qWindowSize, qWindowStep, keepFirst, removeLast;
        double qRollAverage, qThreshold, qWindowAverage, qAverage;
-       vector<string> revPrimer, outputNames;
        set<string> filesToRemove;
-    map<int, oligosPair> pairedBarcodes;
-    map<int, oligosPair> pairedPrimers;
-       map<string, int> barcodes;
-       vector<string> groupVector;
-       map<string, int> primers;
-    vector<string>  linker;
-    vector<string>  spacer;
-       map<string, int> combos;
-       map<string, int> groupToIndex;
-       vector<string> primerNameVector;        //needed here?
-       vector<string> barcodeNameVector;       //needed here?
+    vector<string> groupVector,outputNames;
        map<string, int> groupCounts;  
        map<string, string> nameMap;
     map<string, int> nameCount; //for countfile name -> repCount
@@ -93,35 +82,24 @@ private:
 struct trimData {
     unsigned long long start, end;
     MothurOut* m;
-    string filename, qFileName, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, trimCFileName, scrapCFileName, groupFileName, nameFile, countfile;
+    string filename, qFileName, oligosfile, trimFileName, scrapFileName, trimQFileName, scrapQFileName, trimNFileName, scrapNFileName, trimCFileName, scrapCFileName, groupFileName, nameFile, countfile;
        vector<vector<string> > fastaFileNames;
     vector<vector<string> > qualFileNames;
     vector<vector<string> > nameFileNames;
     unsigned long long lineStart, lineEnd, qlineStart, qlineEnd;
-    bool flip, allFiles, qtrim, keepforward, createGroup, pairedOligos, reorient;
-       int numFPrimers, numRPrimers, numLinkers, numSpacers, maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
+    bool flip, allFiles, qtrim, keepforward, createGroup, reorient, logtransform;
+       int maxAmbig, maxHomoP, minLength, maxLength, tdiffs, bdiffs, pdiffs, ldiffs, sdiffs;
        int qWindowSize, qWindowStep, keepFirst, removeLast, count;
        double qRollAverage, qThreshold, qWindowAverage, qAverage;
-    vector<string> revPrimer;
-       map<string, int> barcodes;
-       map<string, int> primers;
     map<string, int> nameCount;
-    vector<string>  linker;
-    vector<string>  spacer;
-       map<string, int> combos;
-       vector<string> primerNameVector;        
-       vector<string> barcodeNameVector;       
        map<string, int> groupCounts;  
        map<string, string> nameMap;
     map<string, string> groupMap;
-    map<int, oligosPair> pairedBarcodes;
-    map<int, oligosPair> pairedPrimers;
     
        trimData(){}
        trimData(string fn, string qn, string nf, string cf, string tn, string sn, string tqn, string sqn, string tnn, string snn, string tcn, string scn,string gn, vector<vector<string> > ffn, vector<vector<string> > qfn, vector<vector<string> > nfn, unsigned long long lstart, unsigned long long lend, unsigned long long qstart, unsigned long long qend,  MothurOut* mout,
-                      int pd, int bd, int ld, int sd, int td, map<string, int> pri, map<string, int> bar, vector<string> revP, vector<string> li, vector<string> spa, map<int, oligosPair> pbr, map<int, oligosPair> ppr, bool po,
-                      vector<string> priNameVector, vector<string> barNameVector, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
-                      int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage,
+                      int pd, int bd, int ld, int sd, int td, string o, bool cGroup, bool aFiles, bool keepF, int keepfi, int removeL,
+                      int WindowStep, int WindowSize, int WindowAverage, bool trim, double Threshold, double Average, double RollAverage, bool lt,
                       int minL, int maxA, int maxH, int maxL, bool fli, bool reo, map<string, string> nm, map<string, int> ncount) {
         filename = fn;
         qFileName = qn;
@@ -145,22 +123,13 @@ struct trimData {
         qlineEnd = qend;
                m = mout;
         nameCount = ncount;
+        oligosfile = o;
         
         pdiffs = pd;
         bdiffs = bd;
         ldiffs = ld;
         sdiffs = sd;
         tdiffs = td;
-        barcodes = bar;
-        pairedPrimers = ppr;
-        pairedBarcodes = pbr;
-        pairedOligos = po;
-        primers = pri;      numFPrimers = primers.size();
-        revPrimer = revP;   numRPrimers = revPrimer.size();
-        linker = li;        numLinkers = linker.size();
-        spacer = spa;       numSpacers = spacer.size();
-        primerNameVector = priNameVector;
-        barcodeNameVector = barNameVector;
         
         createGroup = cGroup;
         allFiles = aFiles;
@@ -174,6 +143,7 @@ struct trimData {
         qThreshold = Threshold;
         qAverage = Average;
         qRollAverage = RollAverage;
+        logtransform = lt;
         minLength = minL;
         maxAmbig = maxA;
         maxHomoP = maxH;
@@ -259,31 +229,37 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
             } 
                }
                
+        int numBarcodes, numLinkers, numSpacers, numRPrimers, numFPrimers;
+        bool pairedOligos;
+        Oligos oligos(pDataArray->oligosfile);
+        if (oligos.hasPairedBarcodes()) {
+            pairedOligos = true;
+            numFPrimers = oligos.getPairedPrimers().size();
+            numBarcodes = oligos.getPairedBarcodes().size();
+        }else {
+            pairedOligos = false;
+            numFPrimers = oligos.getPrimers().size();
+            numBarcodes = oligos.getBarcodes().size();
+        }
+        
+        numLinkers = oligos.getLinkers().size();
+        numSpacers = oligos.getSpacers().size();
+        numRPrimers = oligos.getReversePrimers().size();
+        
         TrimOligos* trimOligos = NULL;
-        int numBarcodes = pDataArray->barcodes.size();
-        if (pDataArray->pairedOligos)   {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, pDataArray->pairedPrimers, pDataArray->pairedBarcodes);   numBarcodes = pDataArray->pairedBarcodes.size(); }
-        else                {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, pDataArray->primers, pDataArray->barcodes, pDataArray->revPrimer, pDataArray->linker, pDataArray->spacer);  }
+        if (pairedOligos)   {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, oligos.getPairedPrimers(), oligos.getPairedBarcodes());    }
+        else                {   trimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, pDataArray->ldiffs, pDataArray->sdiffs, oligos.getPrimers(), oligos.getBarcodes(), oligos.getReversePrimers(), oligos.getLinkers(), oligos.getSpacers());  }
         
         TrimOligos* rtrimOligos = NULL;
         if (pDataArray->reorient) {
-            //create reoriented primer and barcode pairs
-            map<int, oligosPair> rpairedPrimers, rpairedBarcodes;
-            for (map<int, oligosPair>::iterator it = pDataArray->pairedPrimers.begin(); it != pDataArray->pairedPrimers.end(); it++) {
-                oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reversePrimer, rc ForwardPrimer
-                rpairedPrimers[it->first] = tempPair;
-            }
-            for (map<int, oligosPair>::iterator it = pDataArray->pairedBarcodes.begin(); it != pDataArray->pairedBarcodes.end(); it++) {
-                oligosPair tempPair(trimOligos->reverseOligo((it->second).reverse), (trimOligos->reverseOligo((it->second).forward))); //reverseBarcode, rc ForwardBarcode
-                rpairedBarcodes[it->first] = tempPair;
-            }
-            rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0, rpairedPrimers, rpairedBarcodes); numBarcodes = rpairedBarcodes.size();
+            rtrimOligos = new TrimOligos(pDataArray->pdiffs, pDataArray->bdiffs, 0, 0,  oligos.getReorientedPairedPrimers(), oligos.getReorientedPairedBarcodes()); numBarcodes = oligos.getReorientedPairedBarcodes().size();
         }
         
                pDataArray->count = 0;
                for(int i = 0; i < pDataArray->lineEnd; i++){ //end is the number of sequences to process
                                   
                        if (pDataArray->m->control_pressed) {
-                delete trimOligos;
+                delete trimOligos; if (pDataArray->reorient) { delete rtrimOligos; }
                                inFASTA.close(); trimFASTAFile.close(); scrapFASTAFile.close();
                                if ((pDataArray->createGroup) && (pDataArray->countfile == "")) {        outGroupsFile.close();   }
                 if(pDataArray->qFileName != "")        {       qFile.close();  scrapQualFile.close(); trimQualFile.close();    }
@@ -299,11 +275,14 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                        int currentSeqsDiffs = 0;
             
                        Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
+            Sequence savedSeq(currSeq.getName(), currSeq.getAligned());
                        
-                       QualityScores currQual;
+                       QualityScores currQual; QualityScores savedQual;
                        if(pDataArray->qFileName != ""){
                                currQual = QualityScores(qFile);  pDataArray->m->gobble(qFile);
+                savedQual.setName(currQual.getName()); savedQual.setScores(currQual.getScores());
                        }
+              
                        
                        string origSeq = currSeq.getUnaligned();
                        if (origSeq != "") {
@@ -312,7 +291,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                int barcodeIndex = 0;
                                int primerIndex = 0;
                                
-                if(pDataArray->numLinkers != 0){
+                if(numLinkers != 0){
                                        success = trimOligos->stripLinker(currSeq, currQual);
                                        if(success > pDataArray->ldiffs)                {       trashCode += 'k';       }
                                        else{ currentSeqsDiffs += success;  }
@@ -324,14 +303,14 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                        else{ currentSeqsDiffs += success;  }
                                }
                 
-                if(pDataArray->numSpacers != 0){
+                if(numSpacers != 0){
                                        success = trimOligos->stripSpacer(currSeq, currQual);
                                        if(success > pDataArray->sdiffs)                {       trashCode += 's';       }
                                        else{ currentSeqsDiffs += success;  }
 
                                }
                 
-                               if(pDataArray->numFPrimers != 0){
+                               if(numFPrimers != 0){
                                        success = trimOligos->stripForward(currSeq, currQual, primerIndex, pDataArray->keepforward);
                                        if(success > pDataArray->pdiffs)                {       trashCode += 'f';       }
                                        else{ currentSeqsDiffs += success;  }
@@ -339,7 +318,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                
                                if (currentSeqsDiffs > pDataArray->tdiffs)      {       trashCode += 't';   }
                                
-                               if(pDataArray->numRPrimers != 0){
+                               if(numRPrimers != 0){
                                        success = trimOligos->stripReverse(currSeq, currQual);
                                        if(!success)                            {       trashCode += 'r';       }
                                }
@@ -353,13 +332,13 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                     int thisPrimerIndex = 0;
                     
                     if(numBarcodes != 0){
-                        thisSuccess = rtrimOligos->stripBarcode(currSeq, currQual, thisBarcodeIndex);
+                        thisSuccess = rtrimOligos->stripBarcode(savedSeq, savedQual, thisBarcodeIndex);
                         if(thisSuccess > pDataArray->bdiffs)           {       thisTrashCode += 'b';   }
                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
                     }
                     
-                    if(pDataArray->numFPrimers != 0){
-                        thisSuccess = rtrimOligos->stripForward(currSeq, currQual, thisPrimerIndex, pDataArray->keepforward);
+                    if(numFPrimers != 0){
+                        thisSuccess = rtrimOligos->stripForward(savedSeq, savedQual, thisPrimerIndex, pDataArray->keepforward);
                         if(thisSuccess > pDataArray->pdiffs)           {       thisTrashCode += 'f';   }
                         else{ thisCurrentSeqsDiffs += thisSuccess;  }
                     }
@@ -372,7 +351,13 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                         currentSeqsDiffs = thisCurrentSeqsDiffs;
                         barcodeIndex = thisBarcodeIndex;
                         primerIndex = thisPrimerIndex;
-                    }
+                        savedSeq.reverseComplement();
+                        currSeq.setAligned(savedSeq.getAligned());
+                        if(pDataArray->qFileName != ""){
+                            savedQual.flipQScores();
+                            currQual.setScores(savedQual.getScores());
+                        }
+                    }else { trashCode += "(" + thisTrashCode + ")";  }
                 }
 
                 
@@ -407,9 +392,9 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                        int origLength = currSeq.getNumBases();
                                        
                                        if(pDataArray->qThreshold != 0)                 {       success = currQual.stripQualThreshold(currSeq, pDataArray->qThreshold);                 }
-                                       else if(pDataArray->qAverage != 0)              {       success = currQual.cullQualAverage(currSeq, pDataArray->qAverage);                              }
-                                       else if(pDataArray->qRollAverage != 0)  {       success = currQual.stripQualRollingAverage(currSeq, pDataArray->qRollAverage);  }
-                                       else if(pDataArray->qWindowAverage != 0){       success = currQual.stripQualWindowAverage(currSeq, pDataArray->qWindowStep, pDataArray->qWindowSize, pDataArray->qWindowAverage);       }
+                                       else if(pDataArray->qAverage != 0)              {       success = currQual.cullQualAverage(currSeq, pDataArray->qAverage, pDataArray->logtransform);                            }
+                                       else if(pDataArray->qRollAverage != 0)  {       success = currQual.stripQualRollingAverage(currSeq, pDataArray->qRollAverage, pDataArray->logtransform);        }
+                                       else if(pDataArray->qWindowAverage != 0){       success = currQual.stripQualWindowAverage(currSeq, pDataArray->qWindowStep, pDataArray->qWindowSize, pDataArray->qWindowAverage, pDataArray->logtransform);     }
                                        else                                            {       success = 1;                            }
                                        
                                        //you don't want to trim, if it fails above then scrap it
@@ -455,20 +440,7 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                                
                                if(trashCode.length() == 0){
                     string thisGroup = "";
-                    if (pDataArray->createGroup) {
-                                               if(numBarcodes != 0){
-                                                       thisGroup = pDataArray->barcodeNameVector[barcodeIndex];
-                                                       if (pDataArray->numFPrimers != 0) {
-                                                               if (pDataArray->primerNameVector[primerIndex] != "") { 
-                                                                       if(thisGroup != "") {
-                                                                               thisGroup += "." + pDataArray->primerNameVector[primerIndex]; 
-                                                                       }else {
-                                                                               thisGroup = pDataArray->primerNameVector[primerIndex]; 
-                                                                       }
-                                                               } 
-                                                       }
-                        }
-                    }
+                    if (pDataArray->createGroup) {    thisGroup = oligos.getGroupName(barcodeIndex, primerIndex);                     }
                     
                     int pos = thisGroup.find("ignore");
                     if (pos == string::npos) {
@@ -567,12 +539,13 @@ static DWORD WINAPI MyTrimThreadFunction(LPVOID lpParam){
                        }
                        
                        //report progress
-                       if((i) % 1000 == 0){    pDataArray->m->mothurOut(toString(i)); pDataArray->m->mothurOutEndLine();               }
+                       if((pDataArray->count) % 1000 == 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
                        
                }
                //report progress
                if((pDataArray->count) % 1000 != 0){    pDataArray->m->mothurOut(toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();               }
                
+        if (pDataArray->reorient) { delete rtrimOligos; }
                delete trimOligos;
                inFASTA.close();
                trimFASTAFile.close();