]> git.donarmstrong.com Git - mothur.git/blobdiff - pcrseqscommand.h
moved mothur's source into a folder to make grabbing just the source easier on github
[mothur.git] / pcrseqscommand.h
diff --git a/pcrseqscommand.h b/pcrseqscommand.h
deleted file mode 100644 (file)
index 420a5eb..0000000
+++ /dev/null
@@ -1,385 +0,0 @@
-#ifndef Mothur_pcrseqscommand_h
-#define Mothur_pcrseqscommand_h
-
-//
-//  pcrseqscommand.h
-//  Mothur
-//
-//  Created by Sarah Westcott on 3/14/12.
-//  Copyright (c) 2012 Schloss Lab. All rights reserved.
-//
-
-
-#include "command.hpp"
-#include "sequence.hpp"
-#include "trimoligos.h"
-#include "alignment.hpp"
-#include "needlemanoverlap.hpp"
-
-class PcrSeqsCommand : public Command {
-public:
-       PcrSeqsCommand(string);
-       PcrSeqsCommand();
-       ~PcrSeqsCommand(){}
-       
-       vector<string> setParameters();
-       string getCommandName()                 { return "pcr.seqs";    }
-       string getCommandCategory()             { return "Sequence Processing";         }
-       string getHelpString(); 
-       string getCitation() { return "http://www.mothur.org/wiki/Pcr.seqs"; }
-       string getDescription()         { return "pcr.seqs"; }
-    
-       int execute(); 
-       void help() { m->mothurOut(getHelpString()); }  
-       
-private:
-    
-    struct linePair {
-        unsigned long long start;
-        unsigned long long end;
-        linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
-        linePair() {}
-    };
-    
-    vector<linePair> lines;
-       bool getOligos(vector<vector<string> >&, vector<vector<string> >&, vector<vector<string> >&);
-    bool abort, keepprimer, keepdots;
-       string fastafile, oligosfile, taxfile, groupfile, namefile, ecolifile, outputDir, nomatch;
-       int start, end, processors, length;
-       
-    vector<string> revPrimer, outputNames;
-       vector<string> primers;
-    
-    int writeAccnos(set<string>);
-    int readName(set<string>&);
-    int readGroup(set<string>);
-    int readTax(set<string>);
-    bool readOligos();
-    bool readEcoli();
-       int driverPcr(string, string, string, set<string>&, linePair);  
-       int createProcesses(string, string, string, set<string>&);
-    bool findForward(Sequence&, int&, int&);
-    bool findReverse(Sequence&, int&, int&);
-    bool isAligned(string, map<int, int>&);
-    bool compareDNASeq(string, string);
-    string reverseOligo(string);
-};
-
-/**************************************************************************************************/
-//custom data structure for threads to use.
-// This is passed by void pointer so it can be any data type
-// that can be passed using a single void pointer (LPVOID).
-struct pcrData {
-       string filename; 
-    string goodFasta, badFasta, oligosfile, ecolifile, nomatch;
-       unsigned long long fstart;
-       unsigned long long fend;
-       int count, start, end, length;
-       MothurOut* m;
-       vector<string> primers;
-    vector<string> revPrimer;
-    set<string> badSeqNames;
-    bool keepprimer, keepdots;
-       
-       
-       pcrData(){}
-       pcrData(string f, string gf, string bfn, MothurOut* mout, string ol, string ec, vector<string> pr, vector<string> rpr, string nm, bool kp, bool kd, int st, int en, int l, unsigned long long fst, unsigned long long fen) {
-               filename = f;
-        goodFasta = gf;
-        badFasta = bfn;
-               m = mout;
-        oligosfile = ol;
-        ecolifile = ec;
-        primers = pr;
-        revPrimer = rpr;
-        nomatch = nm;
-        keepprimer = kp;
-        keepdots = kd;
-               start = st;
-               end = en;
-        length = l;
-               fstart = fst;
-        fend = fen;
-               count = 0;
-       }
-};
-/**************************************************************************************************/
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
-#else
-static DWORD WINAPI MyPcrThreadFunction(LPVOID lpParam){ 
-       pcrData* pDataArray;
-       pDataArray = (pcrData*)lpParam;
-       
-       try {
-        ofstream goodFile;
-               pDataArray->m->openOutputFile(pDataArray->goodFasta, goodFile);
-        
-        ofstream badFile;
-               pDataArray->m->openOutputFile(pDataArray->badFasta, badFile);
-               
-               ifstream inFASTA;
-               pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
-        
-               //print header if you are process 0
-               if ((pDataArray->fstart == 0) || (pDataArray->fstart == 1)) {
-                       inFASTA.seekg(0);
-               }else { //this accounts for the difference in line endings. 
-                       inFASTA.seekg(pDataArray->fstart-1); pDataArray->m->gobble(inFASTA); 
-               }
-        
-        set<int> lengths;
-               pDataArray->count = pDataArray->fend;
-               for(int i = 0; i < pDataArray->fend; i++){ //end is the number of sequences to process
-            
-                       if (pDataArray->m->control_pressed) {  break; }
-                       
-                       Sequence currSeq(inFASTA); pDataArray->m->gobble(inFASTA);
-          
-            string trashCode = "";
-                       if (currSeq.getName() != "") {
-                
-                bool goodSeq = true;
-                if (pDataArray->oligosfile != "") {
-                    map<int, int> mapAligned;
-                    //bool aligned = isAligned(currSeq.getAligned(), mapAligned);
-                    ///////////////////////////////////////////////////////////////
-                    bool aligned = false;
-                    string seq = currSeq.getAligned(); 
-                    int countBases = 0;
-                    for (int k = 0; k < seq.length(); k++) {
-                        if (!isalpha(seq[k])) { aligned = true; }
-                        else { mapAligned[countBases] = k; countBases++; } //maps location in unaligned -> location in aligned.
-                    }                                                   //ie. the 3rd base may be at spot 10 in the alignment
-                                                                        //later when we trim we want to trim from spot 10.
-                    ///////////////////////////////////////////////////////////////
-                    
-                    //process primers
-                    if (pDataArray->primers.size() != 0) {
-                        int primerStart = 0; int primerEnd = 0;
-                        //bool good = findForward(currSeq, primerStart, primerEnd);
-                        ///////////////////////////////////////////////////////////////
-                        bool good = false;
-                        string rawSequence = currSeq.getUnaligned();
-                        
-                        for(int j=0;j<pDataArray->primers.size();j++){
-                            string oligo = pDataArray->primers[j];
-                            
-                            if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
-                            
-                            if(rawSequence.length() < oligo.length()) {  break;  }
-                            
-                            //search for primer
-                            int olength = oligo.length();
-                            for (int l = 0; l < rawSequence.length()-olength; l++){
-                                if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
-                                string rawChunk = rawSequence.substr(l, olength);
-                                //compareDNASeq(oligo, rawChunk)
-                                ////////////////////////////////////////////////////////
-                                bool success = 1;
-                                for(int k=0;k<olength;k++){
-                                    
-                                    if(oligo[k] != rawChunk[k]){
-                                        if(oligo[k] == 'A' || oligo[k] == 'T' || oligo[k] == 'G' || oligo[k] == 'C')   {       success = 0;    }
-                                        else if((oligo[k] == 'N' || oligo[k] == 'I') && (rawChunk[k] == 'N'))                          {       success = 0;    }
-                                        else if(oligo[k] == 'R' && (rawChunk[k] != 'A' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'Y' && (rawChunk[k] != 'C' && rawChunk[k] != 'T'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'M' && (rawChunk[k] != 'C' && rawChunk[k] != 'A'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'K' && (rawChunk[k] != 'T' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'W' && (rawChunk[k] != 'T' && rawChunk[k] != 'A'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'S' && (rawChunk[k] != 'C' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'B' && (rawChunk[k] != 'C' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
-                                        else if(oligo[k] == 'D' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
-                                        else if(oligo[k] == 'H' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'C'))   {       success = 0;    }
-                                        else if(oligo[k] == 'V' && (rawChunk[k] != 'A' && rawChunk[k] != 'C' && rawChunk[k] != 'G'))   {       success = 0;    }                       
-                                        
-                                        if(success == 0)       {       break;   }
-                                    }
-                                    else{
-                                        success = 1;
-                                    }
-                                }
-                                
-                                ////////////////////////////////////////////////////////////////////
-                                if(success) {
-                                    primerStart = j;
-                                    primerEnd = primerStart + olength;
-                                    good = true; break;
-                                }
-                            }
-                            if (good) { break; }
-                        }      
-                        
-                        if (!good) { primerStart = 0; primerEnd = 0; }
-                        ///////////////////////////////////////////////////////////////
-                        
-                        
-                        if(!good){     if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "f";     }
-                        else{
-                            //are you aligned
-                            if (aligned) { 
-                                if (!pDataArray->keepprimer)    {  
-                                    if (pDataArray->keepdots)   { currSeq.filterToPos(mapAligned[primerEnd]);   }
-                                    else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerEnd]));                                              }
-                                } 
-                                else                {  
-                                    if (pDataArray->keepdots)   { currSeq.filterToPos(mapAligned[primerStart]);  }
-                                    else            { currSeq.setAligned(currSeq.getAligned().substr(mapAligned[primerStart]));                                              }
-                                }
-                            }else { 
-                                if (!pDataArray->keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(primerEnd)); } 
-                                else                { currSeq.setAligned(currSeq.getUnaligned().substr(primerStart)); } 
-                            }
-                        }
-                    }
-                    
-                    //process reverse primers
-                    if (pDataArray->revPrimer.size() != 0) {
-                        int primerStart = 0; int primerEnd = 0;
-                        bool good = false;
-                        //findReverse(currSeq, primerStart, primerEnd);
-                         ///////////////////////////////////////////////////////////////
-                        string rawSequence = currSeq.getUnaligned();
-                        
-                        for(int j=0;j<pDataArray->revPrimer.size();j++){
-                            string oligo = pDataArray->revPrimer[j];
-                            if (pDataArray->m->control_pressed) {  primerStart = 0; primerEnd = 0; good = false; break; }
-                            if(rawSequence.length() < oligo.length()) {  break;  }
-                            
-                            //search for primer
-                            int olength = oligo.length();
-                            for (int l = rawSequence.length()-olength; l >= 0; l--){
-                                
-                                string rawChunk = rawSequence.substr(l, olength);
-                                //compareDNASeq(oligo, rawChunk)
-                                ////////////////////////////////////////////////////////
-                                bool success = 1;
-                                for(int k=0;k<olength;k++){
-                                    
-                                    if(oligo[k] != rawChunk[k]){
-                                        if(oligo[k] == 'A' || oligo[k] == 'T' || oligo[k] == 'G' || oligo[k] == 'C')   {       success = 0;    }
-                                        else if((oligo[k] == 'N' || oligo[k] == 'I') && (rawChunk[k] == 'N'))                          {       success = 0;    }
-                                        else if(oligo[k] == 'R' && (rawChunk[k] != 'A' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'Y' && (rawChunk[k] != 'C' && rawChunk[k] != 'T'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'M' && (rawChunk[k] != 'C' && rawChunk[k] != 'A'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'K' && (rawChunk[k] != 'T' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'W' && (rawChunk[k] != 'T' && rawChunk[k] != 'A'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'S' && (rawChunk[k] != 'C' && rawChunk[k] != 'G'))                                 {       success = 0;    }
-                                        else if(oligo[k] == 'B' && (rawChunk[k] != 'C' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
-                                        else if(oligo[k] == 'D' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'G'))   {       success = 0;    }
-                                        else if(oligo[k] == 'H' && (rawChunk[k] != 'A' && rawChunk[k] != 'T' && rawChunk[k] != 'C'))   {       success = 0;    }
-                                        else if(oligo[k] == 'V' && (rawChunk[k] != 'A' && rawChunk[k] != 'C' && rawChunk[k] != 'G'))   {       success = 0;    }                       
-                                        
-                                        if(success == 0)       {       break;   }
-                                    }
-                                    else{
-                                        success = 1;
-                                    }
-                                }
-                                
-                                ////////////////////////////////////////////////////////////////////
-                                if(success) {
-                                    primerStart = j;
-                                    primerEnd = primerStart + olength;
-                                    good = true; break;
-                                }
-                            }
-                            if (good) { break; }
-                        }      
-                        
-                        if (!good) { primerStart = 0; primerEnd = 0; }
-
-                         ///////////////////////////////////////////////////////////////
-                        if(!good){     if (pDataArray->nomatch == "reject") { goodSeq = false; } trashCode += "r";     }
-                        else{ 
-                            //are you aligned
-                            if (aligned) { 
-                                if (!pDataArray->keepprimer)    {  
-                                    if (pDataArray->keepdots)   { currSeq.filterFromPos(mapAligned[primerStart]); }
-                                    else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerStart]));   }
-                                } 
-                                else                {  
-                                    if (pDataArray->keepdots)   { currSeq.filterFromPos(mapAligned[primerEnd]); }
-                                    else            { currSeq.setAligned(currSeq.getAligned().substr(0, mapAligned[primerEnd]));   }
-                                }                             }
-                            else { 
-                                if (!pDataArray->keepprimer)    { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerStart));   } 
-                                else                { currSeq.setAligned(currSeq.getUnaligned().substr(0, primerEnd));     }
-                            }
-                        }
-                    }
-                }else if (pDataArray->ecolifile != "") {
-                    //make sure the seqs are aligned
-                    lengths.insert(currSeq.getAligned().length());
-                    if (lengths.size() > 1) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
-                    else if (currSeq.getAligned().length() != pDataArray->length) {
-                        pDataArray->m->mothurOut("[ERROR]: seqs are not the same length as ecoli seq. When using ecoli option your sequences must be aligned and the same length as the ecoli sequence.\n"); pDataArray->m->control_pressed = true; break; 
-                    }else {
-                        if (pDataArray->keepdots)   { 
-                            currSeq.filterToPos(pDataArray->start); 
-                            currSeq.filterFromPos(pDataArray->end);
-                        }else {
-                            string seqString = currSeq.getAligned().substr(0, pDataArray->end);
-                            seqString = seqString.substr(pDataArray->start);
-                            currSeq.setAligned(seqString); 
-                        }
-                    }
-                }else{ //using start and end to trim
-                    //make sure the seqs are aligned
-                    lengths.insert(currSeq.getAligned().length());
-                    if (lengths.size() > 1) { pDataArray->m->mothurOut("[ERROR]: seqs are not aligned. When using start and end your sequences must be aligned.\n"); pDataArray->m->control_pressed = true; break; }
-                    else {
-                        if (pDataArray->end != -1) {
-                            if (pDataArray->end > currSeq.getAligned().length()) {  pDataArray->m->mothurOut("[ERROR]: end is longer than your sequence length, aborting.\n"); pDataArray->m->control_pressed = true; break; }
-                            else {
-                                if (pDataArray->keepdots)   { currSeq.filterFromPos(pDataArray->end); }
-                                else {
-                                    string seqString = currSeq.getAligned().substr(0, pDataArray->end);
-                                    currSeq.setAligned(seqString); 
-                                }
-                            }
-                        }
-                        if (pDataArray->start != -1) { 
-                            if (pDataArray->keepdots)   {  currSeq.filterToPos(pDataArray->start);  }
-                            else {
-                                string seqString = currSeq.getAligned().substr(pDataArray->start);
-                                currSeq.setAligned(seqString); 
-                            }
-                        }
-                        
-                    }
-                }
-                
-                               if(goodSeq == 1)    {   currSeq.printSequence(goodFile);        }
-                               else {  
-                    pDataArray->badSeqNames.insert(currSeq.getName()); 
-                    currSeq.setName(currSeq.getName() + '|' + trashCode);
-                    currSeq.printSequence(badFile); 
-                }
-                       }
-                                               
-                       //report progress
-                       if((i+1) % 100 == 0){   pDataArray->m->mothurOut("Processing sequence: " + toString(i+1)); pDataArray->m->mothurOutEndLine();           }
-               }
-               //report progress
-               if((pDataArray->count) % 100 != 0){     pDataArray->m->mothurOut("Thread Processing sequence: " + toString(pDataArray->count)); pDataArray->m->mothurOutEndLine();              }
-               
-               goodFile.close();
-               inFASTA.close();
-        badFile.close();
-        
-        return 0;
-               
-       }
-       catch(exception& e) {
-               pDataArray->m->errorOut(e, "PcrSeqsCommand", "MyPcrThreadFunction");
-               exit(1);
-       }
-} 
-
-#endif
-
-/**************************************************************************************************/
-
-
-
-#endif