]> git.donarmstrong.com Git - mothur.git/blobdiff - aligncommand.h
working on pam
[mothur.git] / aligncommand.h
index 952b9364dda6fe79e162e6577fb2d7c0e251c5b4..4e1b8d39f81dc067c60575d1938a21fd1de8dee0 100644 (file)
@@ -1,6 +1,5 @@
 #ifndef ALIGNCOMMAND_H
 #define ALIGNCOMMAND_H
-
 /*
  *  aligncommand.h
  *  Mothur
  *
  */
 
-#include "mothur.h"
 #include "command.hpp"
 #include "database.hpp"
 #include "alignment.hpp"
 #include "alignmentdb.h"
+#include "sequence.hpp"
+
+#include "gotohoverlap.hpp"
+#include "needlemanoverlap.hpp"
+#include "blastalign.hpp"
+#include "noalign.hpp"
 
+#include "nast.hpp"
+#include "nastreport.hpp"
+
+//test
 class AlignCommand : public Command {
        
 public:
@@ -26,7 +34,9 @@ public:
        vector<string> setParameters();
        string getCommandName()                 { return "align.seqs";                  }
        string getCommandCategory()             { return "Sequence Processing"; }
+       
        string getHelpString(); 
+    string getOutputPattern(string);   
        string getCitation() { return "DeSantis TZ, Jr., Hugenholtz P, Keller K, Brodie EL, Larsen N, Piceno YM, Phan R, Andersen GL (2006). NAST: a multiple sequence alignment server for comparative analysis of 16S rRNA genes. Nucleic Acids Res 34: W394-9.\nSchloss PD (2009). A high-throughput DNA sequence aligner for microbial ecology studies. PLoS ONE 4: e8230.\nSchloss PD (2010). The effects of alignment quality, distance calculation method, sequence filtering, and region on the analysis of 16S rRNA gene-based studies. PLoS Comput Biol 6: e1000844.\nhttp://www.mothur.org/wiki/Align.seqs http://www.mothur.org/wiki/Align.seqs"; }
        string getDescription()         { return "align sequences"; }
        
@@ -35,24 +45,22 @@ public:
        
 private:
        struct linePair {
-               unsigned long int start;
-               unsigned long int end;
-               linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}
+               unsigned long long start;
+               unsigned long long end;
+               linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {}
        };
        vector<int> processIDS;   //processid
        vector<linePair*> lines;
        bool MPIWroteAccnos;
        
        AlignmentDB* templateDB;
-       Alignment* alignment;
        
        int driver(linePair*, string, string, string, string);
        int createProcesses(string, string, string, string);
-       void appendAlignFiles(string, string); 
        void appendReportFiles(string, string);
        
        #ifdef USE_MPI
-       int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long int>&);
+       int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&);
        #endif
        
        string candidateFileName, templateFileName, distanceFileName, search, align, outputDir;
@@ -61,8 +69,201 @@ private:
        vector<string> candidateFileNames;
        vector<string> outputNames;
        
-       bool abort, flip, calledHelp;
+       bool abort, flip, calledHelp, save;
 
 };
 
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct alignData {
+       string templateFileName;
+       string alignFName; 
+       string reportFName; 
+       string accnosFName;
+       string filename;
+       string align;
+       string search;
+       bool flip;
+       unsigned long long start;
+       unsigned long long end;
+       MothurOut* m;
+       //AlignmentDB* templateDB;
+       float match, misMatch, gapOpen, gapExtend, threshold;
+       int count, kmerSize, threadID;
+       
+       alignData(){}
+       alignData(string te, string a, string r, string ac, string f, string al, string se, int ks, MothurOut* mout, unsigned long long st, unsigned long long en, bool fl, float ma, float misMa, float gapO, float gapE, float thr, int tid) {
+               templateFileName = te;
+               alignFName = a;
+               reportFName = r;
+               accnosFName = ac;
+               filename = f;
+               flip = fl;
+               m = mout;
+               start = st;
+               end = en;
+               //templateDB = tdb;
+               match = ma; 
+               misMatch = misMa;
+               gapOpen = gapO; 
+               gapExtend = gapE; 
+               threshold = thr;
+               align = al;
+               search = se;
+               count = 0;
+               kmerSize = ks;
+               threadID = tid;
+       }
+};
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyAlignThreadFunction(LPVOID lpParam){ 
+       alignData* pDataArray;
+       pDataArray = (alignData*)lpParam;
+       
+       try {
+               ofstream alignmentFile;
+               pDataArray->m->openOutputFile(pDataArray->alignFName, alignmentFile);
+               
+               ofstream accnosFile;
+               pDataArray->m->openOutputFile(pDataArray->accnosFName, accnosFile);
+               
+               NastReport report(pDataArray->reportFName);
+               
+               ifstream inFASTA;
+               pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
+               
+               //print header if you are process 0
+               if ((pDataArray->start == 0) || (pDataArray->start == 1)) {
+                       inFASTA.seekg(0);
+               }else { //this accounts for the difference in line endings. 
+                       inFASTA.seekg(pDataArray->start-1); pDataArray->m->gobble(inFASTA); 
+               }
+               
+               AlignmentDB* templateDB = new AlignmentDB(pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->threadID);
+               
+               //moved this into driver to avoid deep copies in windows paralellized version
+               Alignment* alignment;
+               int longestBase = templateDB->getLongestBase();
+               if(pDataArray->align == "gotoh")                        {       alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, longestBase);                 }
+               else if(pDataArray->align == "needleman")       {       alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, longestBase);                            }
+               else if(pDataArray->align == "blast")           {       alignment = new BlastAlignment(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch);            }
+               else if(pDataArray->align == "noalign")         {       alignment = new NoAlign();                                                                                                      }
+               else {
+                       pDataArray->m->mothurOut(pDataArray->align + " is not a valid alignment option. I will run the command using needleman.");
+                       pDataArray->m->mothurOutEndLine();
+                       alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, longestBase);
+               }
+               
+               pDataArray->count = 0;
+               for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process
+                       
+                       if (pDataArray->m->control_pressed) {  break; }
+                       
+                       Sequence* candidateSeq = new Sequence(inFASTA);  pDataArray->m->gobble(inFASTA);
+                       report.setCandidate(candidateSeq);
+                       
+                       int origNumBases = candidateSeq->getNumBases();
+                       string originalUnaligned = candidateSeq->getUnaligned();
+                       int numBasesNeeded = origNumBases * pDataArray->threshold;
+                       
+                       if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
+                               if (candidateSeq->getUnaligned().length() > alignment->getnRows()) {
+                                       alignment->resize(candidateSeq->getUnaligned().length()+1);
+                               }
+                               
+                               Sequence temp = templateDB->findClosestSequence(candidateSeq);
+                               Sequence* templateSeq = &temp;
+                               
+                               float searchScore = templateDB->getSearchScore();
+                               
+                               Nast* nast = new Nast(alignment, candidateSeq, templateSeq);
+                               
+                               Sequence* copy;
+                               
+                               Nast* nast2;
+                               bool needToDeleteCopy = false;  //this is needed in case you have you enter the ifs below
+                               //since nast does not make a copy of hte sequence passed, and it is used by the reporter below
+                               //you can't delete the copy sequence til after you report, but you may choose not to create it in the first place
+                               //so this bool tells you if you need to delete it
+                               
+                               //if there is a possibility that this sequence should be reversed
+                               if (candidateSeq->getNumBases() < numBasesNeeded) {
+                                       
+                                       string wasBetter =  "";
+                                       //if the user wants you to try the reverse
+                                       if (pDataArray->flip) {
+                                               
+                                               //get reverse compliment
+                                               copy = new Sequence(candidateSeq->getName(), originalUnaligned);
+                                               copy->reverseComplement();
+                                               
+                                               //rerun alignment
+                                               Sequence temp2 = templateDB->findClosestSequence(copy);
+                                               Sequence* templateSeq2 = &temp2;
+                                               
+                                               searchScore = templateDB->getSearchScore();
+                                               
+                                               nast2 = new Nast(alignment, copy, templateSeq2);
+                                               
+                                               //check if any better
+                                               if (copy->getNumBases() > candidateSeq->getNumBases()) {
+                                                       candidateSeq->setAligned(copy->getAligned());  //use reverse compliments alignment since its better
+                                                       templateSeq = templateSeq2; 
+                                                       delete nast;
+                                                       nast = nast2;
+                                                       needToDeleteCopy = true;
+                                                       wasBetter = "\treverse complement produced a better alignment, so mothur used the reverse complement.";
+                                               }else{  
+                                                       wasBetter = "\treverse complement did NOT produce a better alignment so it was not used, please check sequence.";
+                                                       delete nast2;
+                                                       delete copy;    
+                                               }
+                                       }
+                                       
+                                       //create accnos file with names
+                                       accnosFile << candidateSeq->getName() << wasBetter << endl;
+                               }
+                               
+                               report.setTemplate(templateSeq);
+                               report.setSearchParameters(pDataArray->search, searchScore);
+                               report.setAlignmentParameters(pDataArray->align, alignment);
+                               report.setNastParameters(*nast);
+                               
+                               alignmentFile << '>' << candidateSeq->getName() << '\n' << candidateSeq->getAligned() << endl;
+                               
+                               report.print();
+                               delete nast;
+                               if (needToDeleteCopy) {   delete copy;   }
+                               
+                               pDataArray->count++;
+                       }
+                       delete candidateSeq;
+                       
+                       //report progress
+                       if((pDataArray->count) % 100 == 0){     pDataArray->m->mothurOutJustToScreen(toString(pDataArray->count)+"\n");         }
+                       
+               }
+               //report progress
+               if((pDataArray->count) % 100 != 0){     pDataArray->m->mothurOutJustToScreen(toString(pDataArray->count)+"\n");         }
+               
+               delete alignment;
+               delete templateDB;
+               alignmentFile.close();
+               inFASTA.close();
+               accnosFile.close();
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "AlignCommand", "MyAlignThreadFunction");
+               exit(1);
+       }
+} 
+#endif
+
+
+
 #endif