]> git.donarmstrong.com Git - mothur.git/blobdiff - pcrseqscommand.h
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / pcrseqscommand.h
index 07ca8d5e8ed486af362933896725cb48c28b3df2..16c1e29be5f9cbba00180a55a746bbe923b3cbdb 100644 (file)
@@ -15,6 +15,7 @@
 #include "trimoligos.h"
 #include "alignment.hpp"
 #include "needlemanoverlap.hpp"
+#include "counttable.h"
 
 class PcrSeqsCommand : public Command {
 public:
@@ -25,7 +26,9 @@ public:
        vector<string> setParameters();
        string getCommandName()                 { return "pcr.seqs";    }
        string getCommandCategory()             { return "Sequence Processing";         }
+       
        string getHelpString(); 
+    string getOutputPattern(string);   
        string getCitation() { return "http://www.mothur.org/wiki/Pcr.seqs"; }
        string getDescription()         { return "pcr.seqs"; }
     
@@ -43,25 +46,23 @@ private:
     
     vector<linePair> lines;
        bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
-    bool abort, keepprimer;
-       string fastafile, oligosfile, taxfile, groupfile, namefile, ecolifile, outputDir, nomatch;
-       int start, end, pdiffs, processors, length;
+    bool abort, keepprimer, keepdots;
+       string fastafile, oligosfile, taxfile, groupfile, namefile, countfile, ecolifile, outputDir, nomatch;
+       int start, end, processors, length, pdiffs;
        
     vector<string> revPrimer, outputNames;
-       vector<string> primers;
+       map<string, int> primers;
     
     int writeAccnos(set<string>);
     int readName(set<string>&);
     int readGroup(set<string>);
     int readTax(set<string>);
+    int readCount(set<string>);
     bool readOligos();
     bool readEcoli();
        int driverPcr(string, string, string, set<string>&, linePair);  
        int createProcesses(string, string, string, set<string>&);
-    bool findForward(Sequence&, int&, int&);
-    bool findReverse(Sequence&, int&, int&);
     bool isAligned(string, map<int, int>&);
-    bool compareDNASeq(string, string);
     string reverseOligo(string);
 };
 
@@ -74,16 +75,16 @@ struct pcrData {
     string goodFasta, badFasta, oligosfile, ecolifile, nomatch;
        unsigned long long fstart;
        unsigned long long fend;
-       int count, start, end, length;
+       int count, start, end, length, pdiffs;
        MothurOut* m;
-       vector<string> primers;
+       map<string, int> primers;
     vector<string> revPrimer;
     set<string> badSeqNames;
-    bool keepprimer;
+    bool keepprimer, keepdots;
        
        
        pcrData(){}
-       pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, vector<string> pr, vector<string> rpr, string nm, bool kp, int st, int en, int l, unsigned long long fst, unsigned long long fen) {
+       pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, map<string, int> pr, vector<string> rpr, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) {
                filename = f;
         goodFasta = gf;
         badFasta = bfn;
@@ -94,11 +95,13 @@ struct pcrData {
         revPrimer = rpr;
         nomatch = nm;
         keepprimer = kp;
+        keepdots = kd;
                start = st;
                end = en;
         length = l;
                fstart = fst;
         fend = fen;
+        pdiffs = pd;
                count = 0;
        }
 };
@@ -127,9 +130,12 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                }
         
         set<int> lengths;
-               pDataArray->count = pDataArray->fend;
+        //pdiffs, bdiffs, primers, barcodes, revPrimers
+        map<string, int> faked;
+        TrimOligos trim(pDataArray->pdiffs, 0, pDataArray->primers, faked, pDataArray->revPrimer);
+               
                for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
-            
+            pDataArray->count++;
                        if (pDataArray->m->control_pressed) {  break; }
                        
                        Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
@@ -155,69 +161,29 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                     //process primers
                     if (pDataArray->primers.size() != 0) {
                         int primerStart = 0; int primerEnd = 0;
-                        //bool good = findForward(currSeq, primerStart, primerEnd);
-                        ///////////////////////////////////////////////////////////////
-                        bool good = false;
-                        string rawSequence = currSeq.getUnaligned();
-                        
-                        for(int j=0;j<pDataArray->primers.size();j++){
-                            string oligo = pDataArray->primers[j];
-                            
-                            if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
-                            
-                            if(rawSequence.length() < oligo.length()) {  break;  }
-                            
-                            //search for primer
-                            int olength = oligo.length();
-                            for (int l = 0; l < rawSequence.length()-olength; l++){
-                                if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
-                                string rawChunk = rawSequence.substr(l, olength);
-                                //compareDNASeq(oligo, rawChunk)
-                                ////////////////////////////////////////////////////////
-                                bool success = 1;
-                                for(int k=0;k<olength;k++){
-                                    
-                                    if(oligo[k] != rawChunk[k]){
-                                        if(oligo[k] == 'A' || oligo[k] == 'T' || oligo[k] == 'G' || oligo[k] == 'C')   {       success = 0;    }
-                                        else if((oligo[k] == 'N' || oligo[k] == 'I') && (rawChunk[k] == 'N'))                          {       success = 0;    }
-                                        else if(oligo[k] == 'R' && (rawChunk[k] != 'A' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'Y' && (rawChunk[k] != 'C' && rawChunk[k] != 'T'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'M' && (rawChunk[k] != 'C' && rawChunk[k] != 'A'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'K' && (rawChunk[k] != 'T' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'W' && (rawChunk[k] != 'T' && rawChunk[k] != 'A'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'S' && (rawChunk[k] != 'C' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'B' && (rawChunk[k] != 'C' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
-                                        else if(oligo[k] == 'D' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
-                                        else if(oligo[k] == 'H' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'C'))   {       success = 0;    }
-                                        else if(oligo[k] == 'V' && (rawChunk[k] != 'A' && rawChunk[k] != 'C' && rawChunk[k] != 'G'))   {       success = 0;    }                       
-                                        
-                                        if(success == 0)       {       break;   }
-                                    }
-                                    else{
-                                        success = 1;
-                                    }
-                                }
-                                
-                                ////////////////////////////////////////////////////////////////////
-                                if(success) {
-                                    primerStart = j;
-                                    primerEnd = primerStart + olength;
-                                    good = true; break;
-                                }
-                            }
-                            if (good) { break; }
-                        }      
-                        
-                        if (!good) { primerStart = 0; primerEnd = 0; }
-                        ///////////////////////////////////////////////////////////////
-                        
+                        bool good = trim.findForward(currSeq, primerStart, primerEnd);
                         
                         if(!good){     if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "f";     }
                         else{
                             //are you aligned
                             if (aligned) { 
-                                if (!pDataArray->keepprimer)    {  currSeq.padToPos(mapAligned[primerEnd]); } 
-                                else                {  currSeq.padToPos(mapAligned[primerStart]); }
+                                if (!pDataArray->keepprimer)    {  
+                                    if (pDataArray->keepdots)   { currSeq.filterToPos(mapAligned[primerEnd]);   }
+                                    else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd]));                                              }
+                                } 
+                                else                {  
+                                    if (pDataArray->keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
+                                    else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));                                              }
+                                }
+                                ///////////////////////////////////////////////////////////////
+                                mapAligned.clear();
+                                string seq = currSeq.getAligned(); 
+                                int countBases = 0;
+                                for (int k = 0; k < seq.length(); k++) {
+                                    if (!isalpha(seq[k])) { ; }
+                                    else { mapAligned[countBases] = k; countBases++; } 
+                                }                                                   
+                                ///////////////////////////////////////////////////////////////
                             }else { 
                                 if (!pDataArray->keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
@@ -228,67 +194,20 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                     //process reverse primers
                     if (pDataArray->revPrimer.size() != 0) {
                         int primerStart = 0; int primerEnd = 0;
-                        bool good = false;
-                        //findReverse(currSeq, primerStart, primerEnd);
-                         ///////////////////////////////////////////////////////////////
-                        string rawSequence = currSeq.getUnaligned();
-                        
-                        for(int j=0;j<pDataArray->revPrimer.size();j++){
-                            string oligo = pDataArray->revPrimer[j];
-                            if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
-                            if(rawSequence.length() < oligo.length()) {  break;  }
-                            
-                            //search for primer
-                            int olength = oligo.length();
-                            for (int l = rawSequence.length()-olength; l >= 0; l--){
-                                
-                                string rawChunk = rawSequence.substr(l, olength);
-                                //compareDNASeq(oligo, rawChunk)
-                                ////////////////////////////////////////////////////////
-                                bool success = 1;
-                                for(int k=0;k<olength;k++){
-                                    
-                                    if(oligo[k] != rawChunk[k]){
-                                        if(oligo[k] == 'A' || oligo[k] == 'T' || oligo[k] == 'G' || oligo[k] == 'C')   {       success = 0;    }
-                                        else if((oligo[k] == 'N' || oligo[k] == 'I') && (rawChunk[k] == 'N'))                          {       success = 0;    }
-                                        else if(oligo[k] == 'R' && (rawChunk[k] != 'A' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'Y' && (rawChunk[k] != 'C' && rawChunk[k] != 'T'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'M' && (rawChunk[k] != 'C' && rawChunk[k] != 'A'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'K' && (rawChunk[k] != 'T' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'W' && (rawChunk[k] != 'T' && rawChunk[k] != 'A'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'S' && (rawChunk[k] != 'C' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'B' && (rawChunk[k] != 'C' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
-                                        else if(oligo[k] == 'D' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
-                                        else if(oligo[k] == 'H' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'C'))   {       success = 0;    }
-                                        else if(oligo[k] == 'V' && (rawChunk[k] != 'A' && rawChunk[k] != 'C' && rawChunk[k] != 'G'))   {       success = 0;    }                       
-                                        
-                                        if(success == 0)       {       break;   }
-                                    }
-                                    else{
-                                        success = 1;
-                                    }
-                                }
-                                
-                                ////////////////////////////////////////////////////////////////////
-                                if(success) {
-                                    primerStart = j;
-                                    primerEnd = primerStart + olength;
-                                    good = true; break;
-                                }
-                            }
-                            if (good) { break; }
-                        }      
-                        
-                        if (!good) { primerStart = 0; primerEnd = 0; }
-
-                         ///////////////////////////////////////////////////////////////
+                        bool good = trim.findReverse(currSeq, primerStart, primerEnd);
+                         
                         if(!good){     if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "r";     }
                         else{ 
                             //are you aligned
                             if (aligned) { 
-                                if (!pDataArray->keepprimer)    {  currSeq.padFromPos(mapAligned[primerStart]); } 
-                                else                {  currSeq.padFromPos(mapAligned[primerEnd]); } 
-                            }
+                                if (!pDataArray->keepprimer)    {  
+                                    if (pDataArray->keepdots)   { currSeq.filterFromPos(mapAligned[primerStart]); }
+                                    else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));   }
+                                } 
+                                else                {  
+                                    if (pDataArray->keepdots)   { currSeq.filterFromPos(mapAligned[primerEnd]); }
+                                    else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd]));   }
+                                }                             }
                             else { 
                                 if (!pDataArray->keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart));   } 
                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd));     }
@@ -302,21 +221,38 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                     else if (currSeq.getAligned().length() != pDataArray->length) {
                         pDataArray->m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); pDataArray->m->control_pressed = true; break; 
                     }else {
-                        currSeq.padToPos(pDataArray->start); 
-                        currSeq.padFromPos(pDataArray->end);
+                        if (pDataArray->keepdots)   { 
+                            currSeq.filterToPos(pDataArray->start); 
+                            currSeq.filterFromPos(pDataArray->end);
+                        }else {
+                            string seqString = currSeq.getAligned().substr(0, pDataArray->end);
+                            seqString = seqString.substr(pDataArray->start);
+                            currSeq.setAligned(seqString); 
+                        }
                     }
                 }else{ //using start and end to trim
                     //make sure the seqs are aligned
                     lengths.insert(currSeq.getAligned().length());
                     if (lengths.size() > 1) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
                     else {
-                        if (pDataArray->start != -1) { currSeq.padToPos(pDataArray->start); }
                         if (pDataArray->end != -1) {
                             if (pDataArray->end > currSeq.getAligned().length()) {  pDataArray->m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); pDataArray->m->control_pressed = true; break; }
                             else {
-                                currSeq.padFromPos(pDataArray->end);
+                                if (pDataArray->keepdots)   { currSeq.filterFromPos(pDataArray->end); }
+                                else {
+                                    string seqString = currSeq.getAligned().substr(0, pDataArray->end);
+                                    currSeq.setAligned(seqString); 
+                                }
                             }
                         }
+                        if (pDataArray->start != -1) { 
+                            if (pDataArray->keepdots)   {  currSeq.filterToPos(pDataArray->start);  }
+                            else {
+                                string seqString = currSeq.getAligned().substr(pDataArray->start);
+                                currSeq.setAligned(seqString); 
+                            }
+                        }
+                        
                     }
                 }