]> git.donarmstrong.com Git - mothur.git/blobdiff - pcrseqscommand.h
working on pam
[mothur.git] / pcrseqscommand.h
index 03092bca2a70da8428e77f5599e6ab200c2b8960..ba9062437d78df8aa337bc5948b603b1acf8e351 100644 (file)
@@ -15,6 +15,7 @@
 #include "trimoligos.h"
 #include "alignment.hpp"
 #include "needlemanoverlap.hpp"
+#include "counttable.h"
 
 class PcrSeqsCommand : public Command {
 public:
@@ -25,7 +26,9 @@ public:
        vector<string> setParameters();
        string getCommandName()                 { return "pcr.seqs";    }
        string getCommandCategory()             { return "Sequence Processing";         }
+       
        string getHelpString(); 
+    string getOutputPattern(string);   
        string getCitation() { return "http://www.mothur.org/wiki/Pcr.seqs"; }
        string getDescription()         { return "pcr.seqs"; }
     
@@ -43,26 +46,26 @@ private:
     
     vector<linePair> lines;
        bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
-    bool abort, keepprimer, keepdots;
-       string fastafile, oligosfile, taxfile, groupfile, namefile, ecolifile, outputDir, nomatch;
-       int start, end, pdiffs, processors, length;
+    bool abort, keepprimer, keepdots, fileAligned;
+       string fastafile, oligosfile, taxfile, groupfile, namefile, countfile, ecolifile, outputDir, nomatch;
+       int start, end, processors, length, pdiffs;
        
     vector<string> revPrimer, outputNames;
-       vector<string> primers;
+       map<string, int> primers;
     
     int writeAccnos(set<string>);
     int readName(set<string>&);
     int readGroup(set<string>);
     int readTax(set<string>);
+    int readCount(set<string>);
     bool readOligos();
     bool readEcoli();
-       int driverPcr(string, string, string, set<string>&, linePair);  
+       int driverPcr(string, string, string, string, set<string>&, linePair, int&, bool&);
        int createProcesses(string, string, string, set<string>&);
-    bool findForward(Sequence&, int&, int&);
-    bool findReverse(Sequence&, int&, int&);
     bool isAligned(string, map<int, int>&);
-    bool compareDNASeq(string, string);
     string reverseOligo(string);
+    int adjustDots(string, string, int, int);
+    
 };
 
 /**************************************************************************************************/
@@ -71,19 +74,19 @@ private:
 // that can be passed using a single void pointer (LPVOID).
 struct pcrData {
        string filename; 
-    string goodFasta, badFasta, oligosfile, ecolifile, nomatch;
+    string goodFasta, badFasta, oligosfile, ecolifile, nomatch, locationsName;
        unsigned long long fstart;
        unsigned long long fend;
-       int count, start, end, length;
+       int count, start, end, length, pdiffs, pstart, pend;
        MothurOut* m;
-       vector<string> primers;
+       map<string, int> primers;
     vector<string> revPrimer;
     set<string> badSeqNames;
-    bool keepprimer, keepdots;
+    bool keepprimer, keepdots, fileAligned, adjustNeeded;
        
        
        pcrData(){}
-       pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, vector<string> pr, vector<string> rpr, string nm, bool kp, bool kd, int st, int en, int l, unsigned long long fst, unsigned long long fen) {
+       pcrData(string f, string gf, string bfn, string loc, MothurOut* mout, string ol, string ec, map<string, int> pr, vector<string> rpr, string nm, bool kp, bool kd, int st, int en, int l, int pd, unsigned long long fst, unsigned long long fen) {
                filename = f;
         goodFasta = gf;
         badFasta = bfn;
@@ -95,12 +98,18 @@ struct pcrData {
         nomatch = nm;
         keepprimer = kp;
         keepdots = kd;
+        end = en;
                start = st;
-               end = en;
         length = l;
                fstart = fst;
         fend = fen;
+        pdiffs = pd;
+        locationsName = loc;
                count = 0;
+        fileAligned = true;
+        adjustNeeded = false;
+        pstart = -1;
+        pend = -1;
        }
 };
 /**************************************************************************************************/
@@ -116,6 +125,9 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
         
         ofstream badFile;
                pDataArray->m->openOutputFile(pDataArray->badFasta, badFile);
+        
+        ofstream locationsFile;
+               pDataArray->m->openOutputFile(pDataArray->locationsName, locationsFile);
                
                ifstream inFASTA;
                pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
@@ -128,14 +140,27 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                }
         
         set<int> lengths;
-               pDataArray->count = pDataArray->fend;
+        //pdiffs, bdiffs, primers, barcodes, revPrimers
+        map<string, int> faked;
+        set<int> locations; //locations = beginning locations
+        
+        TrimOligos trim(pDataArray->pdiffs, 0, pDataArray->primers, faked, pDataArray->revPrimer);
+               
                for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
-            
+            pDataArray->count++;
                        if (pDataArray->m->control_pressed) {  break; }
                        
                        Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
-          
+            
+            if (pDataArray->fileAligned) { //assume aligned until proven otherwise
+                lengths.insert(currSeq.getAligned().length());
+                if (lengths.size() > 1) { pDataArray->fileAligned = false; }
+            }
+            
             string trashCode = "";
+            string locationsString = "";
+            int thisPStart = -1;
+            int thisPEnd = -1;
                        if (currSeq.getName() != "") {
                 
                 bool goodSeq = true;
@@ -156,75 +181,41 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                     //process primers
                     if (pDataArray->primers.size() != 0) {
                         int primerStart = 0; int primerEnd = 0;
-                        //bool good = findForward(currSeq, primerStart, primerEnd);
-                        ///////////////////////////////////////////////////////////////
-                        bool good = false;
-                        string rawSequence = currSeq.getUnaligned();
-                        
-                        for(int j=0;j<pDataArray->primers.size();j++){
-                            string oligo = pDataArray->primers[j];
-                            
-                            if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
-                            
-                            if(rawSequence.length() < oligo.length()) {  break;  }
-                            
-                            //search for primer
-                            int olength = oligo.length();
-                            for (int l = 0; l < rawSequence.length()-olength; l++){
-                                if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
-                                string rawChunk = rawSequence.substr(l, olength);
-                                //compareDNASeq(oligo, rawChunk)
-                                ////////////////////////////////////////////////////////
-                                bool success = 1;
-                                for(int k=0;k<olength;k++){
-                                    
-                                    if(oligo[k] != rawChunk[k]){
-                                        if(oligo[k] == 'A' || oligo[k] == 'T' || oligo[k] == 'G' || oligo[k] == 'C')   {       success = 0;    }
-                                        else if((oligo[k] == 'N' || oligo[k] == 'I') && (rawChunk[k] == 'N'))                          {       success = 0;    }
-                                        else if(oligo[k] == 'R' && (rawChunk[k] != 'A' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'Y' && (rawChunk[k] != 'C' && rawChunk[k] != 'T'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'M' && (rawChunk[k] != 'C' && rawChunk[k] != 'A'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'K' && (rawChunk[k] != 'T' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'W' && (rawChunk[k] != 'T' && rawChunk[k] != 'A'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'S' && (rawChunk[k] != 'C' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'B' && (rawChunk[k] != 'C' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
-                                        else if(oligo[k] == 'D' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
-                                        else if(oligo[k] == 'H' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'C'))   {       success = 0;    }
-                                        else if(oligo[k] == 'V' && (rawChunk[k] != 'A' && rawChunk[k] != 'C' && rawChunk[k] != 'G'))   {       success = 0;    }                       
-                                        
-                                        if(success == 0)       {       break;   }
-                                    }
-                                    else{
-                                        success = 1;
-                                    }
-                                }
-                                
-                                ////////////////////////////////////////////////////////////////////
-                                if(success) {
-                                    primerStart = j;
-                                    primerEnd = primerStart + olength;
-                                    good = true; break;
-                                }
-                            }
-                            if (good) { break; }
-                        }      
-                        
-                        if (!good) { primerStart = 0; primerEnd = 0; }
-                        ///////////////////////////////////////////////////////////////
-                        
+                        bool good = trim.findForward(currSeq, primerStart, primerEnd);
                         
                         if(!good){     if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "f";     }
                         else{
                             //are you aligned
                             if (aligned) { 
                                 if (!pDataArray->keepprimer)    {  
-                                    if (pDataArray->keepdots)   { currSeq.filterToPos(mapAligned[primerEnd]);   }
-                                    else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd]));                                              }
+                                    if (pDataArray->keepdots)   { currSeq.filterToPos(mapAligned[primerEnd-1]+1);   }
+                                    else            {
+                                        currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd-1]+1));
+                                        if (pDataArray->fileAligned) {
+                                            thisPStart = mapAligned[primerEnd-1]+1; //locations.insert(mapAligned[primerEnd-1]+1);
+                                            locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
+                                        }
+}
                                 } 
                                 else                {  
                                     if (pDataArray->keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
-                                    else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));                                              }
+                                    else            {
+                                        currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));
+                                        if (pDataArray->fileAligned) {
+                                            thisPStart = mapAligned[primerStart]; //locations.insert(mapAligned[primerStart]);
+                                            locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
+                                        }
+                                    }
                                 }
+                                ///////////////////////////////////////////////////////////////
+                                mapAligned.clear();
+                                string seq = currSeq.getAligned(); 
+                                int countBases = 0;
+                                for (int k = 0; k < seq.length(); k++) {
+                                    if (!isalpha(seq[k])) { ; }
+                                    else { mapAligned[countBases] = k; countBases++; } 
+                                }                                                   
+                                ///////////////////////////////////////////////////////////////
                             }else { 
                                 if (!pDataArray->keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
                                 else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
@@ -235,71 +226,33 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                     //process reverse primers
                     if (pDataArray->revPrimer.size() != 0) {
                         int primerStart = 0; int primerEnd = 0;
-                        bool good = false;
-                        //findReverse(currSeq, primerStart, primerEnd);
-                         ///////////////////////////////////////////////////////////////
-                        string rawSequence = currSeq.getUnaligned();
-                        
-                        for(int j=0;j<pDataArray->revPrimer.size();j++){
-                            string oligo = pDataArray->revPrimer[j];
-                            if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
-                            if(rawSequence.length() < oligo.length()) {  break;  }
-                            
-                            //search for primer
-                            int olength = oligo.length();
-                            for (int l = rawSequence.length()-olength; l >= 0; l--){
-                                
-                                string rawChunk = rawSequence.substr(l, olength);
-                                //compareDNASeq(oligo, rawChunk)
-                                ////////////////////////////////////////////////////////
-                                bool success = 1;
-                                for(int k=0;k<olength;k++){
-                                    
-                                    if(oligo[k] != rawChunk[k]){
-                                        if(oligo[k] == 'A' || oligo[k] == 'T' || oligo[k] == 'G' || oligo[k] == 'C')   {       success = 0;    }
-                                        else if((oligo[k] == 'N' || oligo[k] == 'I') && (rawChunk[k] == 'N'))                          {       success = 0;    }
-                                        else if(oligo[k] == 'R' && (rawChunk[k] != 'A' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'Y' && (rawChunk[k] != 'C' && rawChunk[k] != 'T'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'M' && (rawChunk[k] != 'C' && rawChunk[k] != 'A'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'K' && (rawChunk[k] != 'T' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'W' && (rawChunk[k] != 'T' && rawChunk[k] != 'A'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'S' && (rawChunk[k] != 'C' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'B' && (rawChunk[k] != 'C' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
-                                        else if(oligo[k] == 'D' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
-                                        else if(oligo[k] == 'H' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'C'))   {       success = 0;    }
-                                        else if(oligo[k] == 'V' && (rawChunk[k] != 'A' && rawChunk[k] != 'C' && rawChunk[k] != 'G'))   {       success = 0;    }                       
-                                        
-                                        if(success == 0)       {       break;   }
-                                    }
-                                    else{
-                                        success = 1;
-                                    }
-                                }
-                                
-                                ////////////////////////////////////////////////////////////////////
-                                if(success) {
-                                    primerStart = j;
-                                    primerEnd = primerStart + olength;
-                                    good = true; break;
-                                }
-                            }
-                            if (good) { break; }
-                        }      
-                        
-                        if (!good) { primerStart = 0; primerEnd = 0; }
-
-                         ///////////////////////////////////////////////////////////////
+                        bool good = trim.findReverse(currSeq, primerStart, primerEnd);
+                         
                         if(!good){     if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "r";     }
                         else{ 
                             //are you aligned
                             if (aligned) { 
                                 if (!pDataArray->keepprimer)    {  
                                     if (pDataArray->keepdots)   { currSeq.filterFromPos(mapAligned[primerStart]); }
-                                    else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));   }
+                                    else            {
+                                        currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));
+                                        if (pDataArray->fileAligned) {
+                                            thisPEnd = mapAligned[primerStart]; //locations.insert(mapAligned[primerStart]);
+                                            locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerStart]) + "\n";
+                                        }
+
+                                    }
                                 } 
                                 else                {  
-                                    if (pDataArray->keepdots)   { currSeq.filterFromPos(mapAligned[primerEnd]); }
-                                    else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd]));   }
+                                    if (pDataArray->keepdots)   { currSeq.filterFromPos(mapAligned[primerEnd-1]+1); }
+                                    else            {
+                                        currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd-1]+1));
+                                        if (pDataArray->fileAligned) {
+                                            thisPEnd = mapAligned[primerEnd-1]+1; //locations.insert(mapAligned[primerEnd-1]+1);
+                                            locationsString += currSeq.getName() + "\t" + toString(mapAligned[primerEnd-1]+1) + "\n";
+                                        }
+
+                                    }
                                 }                             }
                             else { 
                                 if (!pDataArray->keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart));   } 
@@ -309,39 +262,37 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                     }
                 }else if (pDataArray->ecolifile != "") {
                     //make sure the seqs are aligned
-                    lengths.insert(currSeq.getAligned().length());
-                    if (lengths.size() > 1) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
+                    if (!pDataArray->fileAligned) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
                     else if (currSeq.getAligned().length() != pDataArray->length) {
                         pDataArray->m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); pDataArray->m->control_pressed = true; break; 
                     }else {
                         if (pDataArray->keepdots)   { 
-                            currSeq.filterToPos(start); 
-                            currSeq.filterFromPos(end);
+                            currSeq.filterToPos(pDataArray->start); 
+                            currSeq.filterFromPos(pDataArray->end);
                         }else {
-                            string seqString = currSeq.getAligned().substr(0, end);
-                            seqString = seqString.substr(start);
+                            string seqString = currSeq.getAligned().substr(0, pDataArray->end);
+                            seqString = seqString.substr(pDataArray->start);
                             currSeq.setAligned(seqString); 
                         }
                     }
                 }else{ //using start and end to trim
                     //make sure the seqs are aligned
-                    lengths.insert(currSeq.getAligned().length());
-                    if (lengths.size() > 1) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
+                    if (!pDataArray->fileAligned) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
                     else {
                         if (pDataArray->end != -1) {
                             if (pDataArray->end > currSeq.getAligned().length()) {  pDataArray->m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); pDataArray->m->control_pressed = true; break; }
                             else {
-                                if (pDataArray->keepdots)   { currSeq.filterFromPos(end); }
+                                if (pDataArray->keepdots)   { currSeq.filterFromPos(pDataArray->end); }
                                 else {
-                                    string seqString = currSeq.getAligned().substr(0, end);
+                                    string seqString = currSeq.getAligned().substr(0, pDataArray->end);
                                     currSeq.setAligned(seqString); 
                                 }
                             }
                         }
                         if (pDataArray->start != -1) { 
-                            if (pDataArray->keepdots)   {  currSeq.filterToPos(start);  }
+                            if (pDataArray->keepdots)   {  currSeq.filterToPos(pDataArray->start);  }
                             else {
-                                string seqString = currSeq.getAligned().substr(start);
+                                string seqString = currSeq.getAligned().substr(pDataArray->start);
                                 currSeq.setAligned(seqString); 
                             }
                         }
@@ -349,7 +300,11 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                     }
                 }
                 
-                               if(goodSeq == 1)    {   currSeq.printSequence(goodFile);        }
+                               if(goodSeq == 1)    {
+                    currSeq.printSequence(goodFile);
+                    if (locationsString != "") { locationsFile << locationsString; }
+                    if (thisPStart != -1)   { locations.insert(thisPStart);  }
+                }
                                else {  
                     pDataArray->badSeqNames.insert(currSeq.getName()); 
                     currSeq.setName(currSeq.getName() + '|' + trashCode);
@@ -358,17 +313,24 @@ static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){
                        }
                                                
                        //report progress
-                       if((i+1) % 100 == 0){   pDataArray->m->mothurOut("Processing sequence: " + toString(i+1)); pDataArray->m->mothurOutEndLine();           }
+                       if((i+1) % 100 == 0){   pDataArray->m->mothurOutJustToScreen("Processing sequence: " + toString(i+1)+"\n");     }
                }
                //report progress
-               if((pDataArray->count) % 100 != 0){     pDataArray->m->mothurOut("Thread Processing sequence: " + toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();              }
+               if((pDataArray->count) % 100 != 0){     pDataArray->m->mothurOutJustToScreen("Thread Processing sequence: " + toString(pDataArray->count)+"\n");                }
                
                goodFile.close();
                inFASTA.close();
         badFile.close();
+        locationsFile.close();
+        
+        if (pDataArray->m->debug) { pDataArray->m->mothurOut("[DEBUG]: fileAligned = " + toString(pDataArray->fileAligned) +'\n'); }
+        
+        if (pDataArray->fileAligned && !pDataArray->keepdots) { //print out smallest start value and largest end value
+            if (locations.size() > 1) { pDataArray->adjustNeeded = true; }
+            if (pDataArray->primers.size() != 0)    {   set<int>::iterator it = locations.begin();  pDataArray->pstart = *it;  }
+        }
         
         return 0;
-               
        }
        catch(exception& e) {
                pDataArray->m->errorOut(e, "PcrSeqsCommand", "MyPcrThreadFunction");