]> git.donarmstrong.com Git - mothur.git/commitdiff
added check to cluster.classic to make sure file type is phylip. added mapping funct...
authorSarah Westcott <mothur.westcott@gmail.com>
Fri, 18 May 2012 16:35:51 +0000 (12:35 -0400)
committerSarah Westcott <mothur.westcott@gmail.com>
Fri, 18 May 2012 16:35:51 +0000 (12:35 -0400)
alignment.cpp
alignment.hpp
clusterclassic.cpp
commandfactory.cpp
makecontigscommand.cpp
makecontigscommand.h

index 03bf9bab62d34b657f1b4cb0dc076171e6d8d9d6..0dec2503f3e8db9284158eb2238ce555d155eb93 100644 (file)
@@ -53,7 +53,8 @@ void Alignment::resize(int A) {
 
 void Alignment::traceBack(){                   //      This traceback routine is used by the dynamic programming algorithms
        try {   
-                                                                       //      to fill the values of seqAaln and seqBaln
+               BBaseMap.clear();
+        ABaseMap.clear(); //   to fill the values of seqAaln and seqBaln
                seqAaln = "";
                seqBaln = "";
                int row = lB-1;
@@ -65,31 +66,50 @@ void Alignment::traceBack(){                        //      This traceback routine is used by the dynamic
                //      matrix
                
                if(currentCell.prevCell == 'x'){        seqAaln = seqBaln = "NOALIGNMENT";              }//If there's an 'x' in the bottom-
-               else{                                                                                           //      right corner bail out because it means nothing got aligned
+               else{   //      right corner bail out because it means nothing got aligned
+            int count = 0;
                        while(currentCell.prevCell != 'x'){                             //      while the previous cell isn't an 'x', keep going...
                                
                                if(currentCell.prevCell == 'u'){                        //      if the pointer to the previous cell is 'u', go up in the
                                        seqAaln = '-' + seqAaln;                                //      matrix.  this indicates that we need to insert a gap in
                                        seqBaln = seqB[row] + seqBaln;                  //      seqA and a base in seqB
+                    BBaseMap[row] = count;
                                        currentCell = alignment[--row][column];
                                }
                                else if(currentCell.prevCell == 'l'){           //      if the pointer to the previous cell is 'l', go to the left
                                        seqBaln = '-' + seqBaln;                                //      in the matrix.  this indicates that we need to insert a gap
                                        seqAaln = seqA[column] + seqAaln;               //      in seqB and a base in seqA
+                    ABaseMap[column] = count;
                                        currentCell = alignment[row][--column];
                                }
                                else{
                                        seqAaln = seqA[column] + seqAaln;               //      otherwise we need to go diagonally up and to the left,
                                        seqBaln = seqB[row] + seqBaln;                  //      here we add a base to both alignments
+                    BBaseMap[row] = count;
+                    ABaseMap[column] = count;
                                        currentCell = alignment[--row][--column];
                                }
+                count++;
                        }
                }
                
-               pairwiseLength = seqAaln.length();
+       
+        pairwiseLength = seqAaln.length();
                seqAstart = 1;  seqAend = 0;
                seqBstart = 1;  seqBend = 0;
-               
+        //flip maps since we now know the total length
+        map<int, int> newAMap;
+        for (map<int, int>::iterator it = ABaseMap.begin(); it != ABaseMap.end(); it++) {
+            int spot = it->second;
+            newAMap[pairwiseLength-spot-1] = it->first-1;
+        }
+        ABaseMap = newAMap;
+        map<int, int> newBMap;
+        for (map<int, int>::iterator it = BBaseMap.begin(); it != BBaseMap.end(); it++) {
+            int spot = it->second;
+            newBMap[pairwiseLength-spot-1] = it->first-1;
+        }
+               BBaseMap = newBMap;
                for(int i=0;i<seqAaln.length();i++){
                        if(seqAaln[i] != '-' && seqBaln[i] == '-')              {       seqAstart++;    }
                        else if(seqAaln[i] == '-' && seqBaln[i] != '-') {       seqBstart++;    }
@@ -156,7 +176,16 @@ int Alignment::getCandidateEndPos(){
 int Alignment::getTemplateStartPos(){
        return seqBstart;                                                                       //      this is called to report the quality of the alignment
 }
+/**************************************************************************************************/
+
+map<int, int> Alignment::getSeqAAlnBaseMap(){
+       return ABaseMap;                                                                        
+}
+/**************************************************************************************************/
 
+map<int, int> Alignment::getSeqBAlnBaseMap(){
+       return BBaseMap;                                                                        
+}
 /**************************************************************************************************/
 
 int Alignment::getTemplateEndPos(){
index 7a943fd0037430991a7baab418474e78a496d403..9d2197f5eaa7248b9bb8490c87c804f85931d095 100644 (file)
@@ -30,6 +30,8 @@ public:
 //     float getAlignmentScore();
        string getSeqAAln();
        string getSeqBAln();
+    map<int, int> getSeqAAlnBaseMap();
+    map<int, int> getSeqBAlnBaseMap();
        int getCandidateStartPos();
        int getCandidateEndPos();
        int getTemplateStartPos();
@@ -49,6 +51,8 @@ protected:
        int pairwiseLength;
        int nRows, nCols, lA, lB;
        vector<vector<AlignmentCell> > alignment;
+    map<int, int> ABaseMap;
+    map<int, int> BBaseMap;
        MothurOut* m;
 };
 
index f0bae59ab33de8c2f451b80c7180a33c49c28a54..2d1b9a6b781960a824fddafc03b904e8039179e1 100644 (file)
@@ -40,7 +40,12 @@ int ClusterClassic::readPhylipFile(string filename, NameAssignment* nameMap) {
                ifstream fileHandle;
                m->openInputFile(filename, fileHandle);
                
-               fileHandle >> nseqs >> name;
+        string numTest;
+               fileHandle >> numTest >> name;
+        
+        if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
+        else { convert(numTest, nseqs); }
+
 
                matrixNames.push_back(name);
 
index e73d60b3683acb5b8b3d25ea583be045354adce7..1241a04e998537fe33446a1c9ba3d933e690e557 100644 (file)
 #include "makebiomcommand.h"
 #include "getcoremicrobiomecommand.h"
 #include "listotulabelscommand.h"
+#include "makecontigscommand.h"
 
 /*******************************************************/
 
@@ -287,6 +288,7 @@ CommandFactory::CommandFactory(){
     commands["make.biom"]           = "make.biom";
     commands["get.coremicrobiome"]  = "get.coremicrobiome";
     commands["list.otulabels"]      = "list.otulabels";
+    commands["make.contigs"]        = "make.contigs";
        commands["quit"]                                = "MPIEnabled"; 
 
 }
@@ -499,6 +501,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){
         else if(commandName == "make.biom")             {      command = new MakeBiomCommand(optionString);                }
         else if(commandName == "get.coremicrobiome")    {      command = new GetCoreMicroBiomeCommand(optionString);       }
         else if(commandName == "list.otulabels")        {      command = new ListOtuLabelsCommand(optionString);           }
+        else if(commandName == "make.contigs")          {      command = new MakeContigsCommand(optionString);             }
                else                                                                                    {       command = new NoCommand(optionString);                                          }
 
                return command;
@@ -652,6 +655,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str
         else if(commandName == "make.biom")             {      pipecommand = new MakeBiomCommand(optionString);                }
         else if(commandName == "get.coremicrobiome")    {      pipecommand = new GetCoreMicroBiomeCommand(optionString);       }
         else if(commandName == "list.otulabels")        {      pipecommand = new ListOtuLabelsCommand(optionString);           }
+        else if(commandName == "make.contigs")          {      pipecommand = new MakeContigsCommand(optionString);             }
                else                                                                                    {       pipecommand = new NoCommand(optionString);                                              }
 
                return pipecommand;
@@ -791,6 +795,7 @@ Command* CommandFactory::getCommand(string commandName){
         else if(commandName == "make.biom")             {      shellcommand = new MakeBiomCommand();               }
         else if(commandName == "get.coremicrobiome")    {      shellcommand = new GetCoreMicroBiomeCommand();      }
         else if(commandName == "list.otulabels")        {      shellcommand = new ListOtuLabelsCommand();          }
+        else if(commandName == "make.contigs")          {      shellcommand = new MakeContigsCommand();            }
                else                                                                                    {       shellcommand = new NoCommand();                                         }
 
                return shellcommand;
index 697521c16abc44dbeee019590f66fb4d03032c64..560b39cdd6adfaec6fab5bafc8e30175038c0b9c 100644 (file)
@@ -13,7 +13,7 @@ vector<string> MakeContigsCommand::setParameters(){
        try {
                CommandParameter pfasta("ffastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
         CommandParameter prfasta("rfastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(prfasta);
-               CommandParameter palign("align", "Multiple", "needleman-gotoh-blast-noalign", "needleman", "", "", "",false,false); parameters.push_back(palign);
+               CommandParameter palign("align", "Multiple", "needleman-gotoh", "needleman", "", "", "",false,false); parameters.push_back(palign);
                CommandParameter pmatch("match", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pmatch);
                CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
                CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
@@ -63,6 +63,7 @@ MakeContigsCommand::MakeContigsCommand(){
                vector<string> tempOutNames;
                outputTypes["fasta"] = tempOutNames;
                outputTypes["qfile"] = tempOutNames;
+        outputTypes["mismatch"] = tempOutNames;
        }
        catch(exception& e) {
                m->errorOut(e, "MakeContigsCommand", "MakeContigsCommand");
@@ -96,6 +97,7 @@ MakeContigsCommand::MakeContigsCommand(string option)  {
                        vector<string> tempOutNames;
                        outputTypes["fasta"] = tempOutNames;
                        outputTypes["qfile"] = tempOutNames;
+            outputTypes["mismatch"] = tempOutNames;
                        
             
                        //if the user changes the input directory command factory will send this info to us in the output parameter 
@@ -155,7 +157,7 @@ MakeContigsCommand::MakeContigsCommand(string option)  {
                        m->mothurConvert(temp, processors);
                        
                        align = validParameter.validFile(parameters, "align", false);           if (align == "not found"){      align = "needleman";    }
-                       if ((align != "needleman") && (align != "blast") && (align != "gotoh") && (align != "noalign")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman, blast, gotoh and noalign. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
+                       if ((align != "needleman") && (align != "gotoh")) { m->mothurOut(align + " is not a valid alignment method. Options are needleman or gotoh. I will use needleman."); m->mothurOutEndLine(); align = "needleman"; }
         }
                
        }
@@ -173,12 +175,31 @@ int MakeContigsCommand::execute(){
         //this function will create a forward and reverse, fasta and qual files for each processor.
         //files has an entry for each processor. files[i][0] = forwardFasta, files[i][1] = forwardQual, files[i][2] = reverseFasta, files[i][3] = reverseQual
         int numReads = 0;
-        m->mothurOut("Reading fastq data..."); cout.flush();
+        int start = time(NULL);
+        longestBase = 1000;
+        m->mothurOut("Reading fastq data...\n"); 
         vector< vector<string> > files = readFastqFiles(numReads);  
         m->mothurOut("Done.\n");
+    
+        if (m->control_pressed) { return 0; }
         
+        string outFastaFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.fasta";
+        string outQualFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.qual";
+        string outMisMatchFile = outputDir + m->getRootName(m->getSimpleName(ffastqfile)) + "contigs.mismatches";
+        outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
+        outputNames.push_back(outQualFile); outputTypes["qfile"].push_back(outQualFile);
+        outputNames.push_back(outMisMatchFile); outputTypes["mismatch"].push_back(outMisMatchFile);
         
-       
+        m->mothurOut("Making contigs...\n"); 
+        createProcesses(files, outFastaFile, outQualFile, outMisMatchFile);
+        m->mothurOut("Done.\n");
+        
+        //remove temp fasta and qual files
+        for (int i = 0; i < processors; i++) { for(int j = 0; j < files[i].size(); j++) { m->mothurRemove(files[i][j]); }  }
+        
+        if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); }  return 0; }
+        
+        m->mothurOut("It took " + toString(time(NULL) - start) + " secs to process " + toString(numReads) + " sequences.\n");
         
         string currentFasta = "";
                itTypes = outputTypes.find("fasta");
@@ -207,6 +228,225 @@ int MakeContigsCommand::execute(){
        }
 }
 //**********************************************************************************************************************
+int MakeContigsCommand::createProcesses(vector< vector<string> > files, string outputFasta, string outputQual, string outputMisMatches) {
+       try {
+               int num = 0;
+               vector<int> processIDS;
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+               int process = 0;
+               
+               //loop through and create all the processes you want
+               while (process != processors-1) {
+                       int pid = fork();
+                       
+                       if (pid > 0) {
+                               processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                               process++;
+                       }else if (pid == 0){
+                               num = driver(files[process], outputFasta + toString(getpid()) + ".temp", outputQual + toString(getpid()) + ".temp", outputMisMatches + toString(getpid()) + ".temp");
+                               
+                               //pass numSeqs to parent
+                               ofstream out;
+                               string tempFile = outputFasta + toString(getpid()) + ".num.temp";
+                               m->openOutputFile(tempFile, out);
+                               out << num << endl;
+                               out.close();
+                               
+                               exit(0);
+                       }else { 
+                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
+                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                               exit(0);
+                       }
+               }
+               
+               //do my part
+               num = driver(files[processors-1], outputFasta, outputQual, outputMisMatches);
+               
+               //force parent to wait until all the processes are done
+               for (int i=0;i<processIDS.size();i++) { 
+                       int temp = processIDS[i];
+                       wait(&temp);
+               }
+        
+               for (int i = 0; i < processIDS.size(); i++) {
+                       ifstream in;
+                       string tempFile =  outputFasta + toString(processIDS[i]) + ".num.temp";
+                       m->openInputFile(tempFile, in);
+                       if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+                       in.close(); m->mothurRemove(tempFile);
+        }
+    #else
+        
+        //////////////////////////////////////////////////////////////////////////////////////////////////////
+               //Windows version shared memory, so be careful when passing variables through the contigsData struct. 
+               //Above fork() will clone, so memory is separate, but that's not the case with windows, 
+               //////////////////////////////////////////////////////////////////////////////////////////////////////
+               
+               vector<contigsData*> pDataArray; 
+               DWORD   dwThreadIdArray[processors-1];
+               HANDLE  hThreadArray[processors-1]; 
+               
+               //Create processor worker threads.
+               for( int i=0; i<processors-1; i++ ){
+                       string extension = toString(i) + ".temp";
+                       
+                       contigsData* tempcontig = new contigsData(files[i], (outputFasta + extension), (outputQual + extension), (outputMisMatches + extension), align, m, match, misMatch, gapOpen, gapExtend, i);
+                       pDataArray.push_back(tempcontig);
+                       processIDS.push_back(i);
+            
+                       hThreadArray[i] = CreateThread(NULL, 0, MyContigsThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
+               }
+               
+        num = driver(files[processors-1], outputFasta, outputQual, outputMisMatches);          
+        
+               //Wait until all threads have terminated.
+               WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+               
+               //Close all thread handles and free memory allocations.
+               for(int i=0; i < pDataArray.size(); i++){
+                       num += pDataArray[i]->count;
+            CloseHandle(hThreadArray[i]);
+                       delete pDataArray[i];
+        }
+                               
+    #endif     
+        
+        for (int i = 0; i < processIDS.size(); i++) {
+                       m->appendFiles((outputFasta + toString(processIDS[i]) + ".temp"), outputFasta);
+                       m->mothurRemove((outputFasta + toString(processIDS[i]) + ".temp"));
+                       
+                       m->appendFiles((outputQual + toString(processIDS[i]) + ".temp"), outputQual);
+                       m->mothurRemove((outputQual + toString(processIDS[i]) + ".temp"));
+            
+            m->appendFiles((outputMisMatches + toString(processIDS[i]) + ".temp"), outputMisMatches);
+                       m->mothurRemove((outputMisMatches + toString(processIDS[i]) + ".temp"));
+               }
+               
+               return num;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "MakeContigsCommand", "createProcesses");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
+int MakeContigsCommand::driver(vector<string> files, string outputFasta, string outputQual, string outputMisMatches){
+    try {
+        
+        Alignment* alignment;
+        if(align == "gotoh")                   {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
+               else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
+        
+        int num = 0;
+        string thisffastafile = files[0];
+        string thisfqualfile = files[1];
+        string thisrfastafile = files[2];
+        string thisrqualfile = files[3];
+        
+        if (m->debug) {  m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); }
+        
+        ifstream inFFasta, inRFasta, inFQual, inRQual;
+        m->openInputFile(thisffastafile, inFFasta);
+        m->openInputFile(thisfqualfile, inFQual);
+        m->openInputFile(thisrfastafile, inRFasta);
+        m->openInputFile(thisrqualfile, inRQual);
+        
+        ofstream outFasta, outQual, outMisMatch;
+        m->openOutputFile(outputFasta, outFasta);
+        m->openOutputFile(outputQual, outQual);
+        m->openOutputFile(outputMisMatches, outMisMatch);
+        outMisMatch << "Name\tLength\tMisMatches\n";
+        
+        while ((!inFQual.eof()) && (!inFFasta.eof()) && (!inRFasta.eof()) && (!inRQual.eof())) {
+            
+            if (m->control_pressed) { break; }
+            
+            //read seqs and quality info
+            Sequence fSeq(inFFasta); m->gobble(inFFasta);
+            Sequence rSeq(inRFasta); m->gobble(inRFasta);
+            QualityScores fQual(inFQual); m->gobble(inFQual);
+            QualityScores rQual(inRQual); m->gobble(inRQual);
+            
+            //flip the reverse reads
+            rSeq.reverseComplement();
+            rQual.flipQScores();
+            
+            //pairwise align
+            alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
+            map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
+            map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
+            fSeq.setAligned(alignment->getSeqAAln());
+            rSeq.setAligned(alignment->getSeqBAln());
+            int length = fSeq.getAligned().length();
+        
+            //traverse alignments merging into one contiguous seq
+            string contig = "";
+            vector<int> contigScores; 
+            int numMismatches = 0;
+            string seq1 = fSeq.getAligned();
+            string seq2 = rSeq.getAligned();
+           
+            vector<int> scores1 = fQual.getQualityScores();
+            vector<int> scores2 = rQual.getQualityScores();
+            
+            for (int i = 0; i < length; i++) {
+                if (seq1[i] == seq2[i]) { //match, add base and choose highest score
+                    contig += seq1[i];
+                    contigScores.push_back(scores1[ABaseMap[i]]);
+                    if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; }
+                }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2
+                    contig += seq2[i];
+                    contigScores.push_back(scores2[BBaseMap[i]]);
+                }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1
+                    contig += seq1[i];
+                    contigScores.push_back(scores1[ABaseMap[i]]);
+                }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
+                    char c = seq1[i];
+                    contigScores.push_back(scores1[ABaseMap[i]]);
+                    if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; c = seq2[i]; }
+                    contig += c;
+                    numMismatches++;
+                }else { //should never get here
+                    m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
+                }
+            }
+            
+            //output
+            outFasta << ">" << fSeq.getName() << endl << contig << endl;
+            outQual << ">" << fSeq.getName() << endl;
+            for (int i = 0; i < contigScores.size(); i++) { outQual << contigScores[i] << ' '; }
+            outQual << endl;
+            outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << numMismatches << endl;
+            
+            num++;
+            
+                       //report progress
+                       if((num) % 1000 == 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
+               }
+        
+               //report progress
+               if((num) % 1000 != 0){  m->mothurOut(toString(num)); m->mothurOutEndLine();             }
+        
+        inFFasta.close();
+        inFQual.close();
+        inRFasta.close();
+        inRQual.close();
+        outFasta.close();
+        outQual.close();
+        outMisMatch.close();
+        delete alignment;
+        
+        if (m->control_pressed) { m->mothurRemove(outputQual); m->mothurRemove(outputFasta);  m->mothurRemove(outputMisMatches);}
+        
+        return num;
+    }
+       catch(exception& e) {
+               m->errorOut(e, "MakeContigsCommand", "driver");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
 vector< vector<string> > MakeContigsCommand::readFastqFiles(int& count){
     try {
         vector< vector<string> > files;
@@ -287,7 +527,7 @@ vector< vector<string> > MakeContigsCommand::readFastqFiles(int& count){
                //report progress
                if((count) % 10000 != 0){       m->mothurOut(toString(count)); m->mothurOutEndLine();           }
                
-
+        
         
         //close files, delete ofstreams
         for (it = tempfiles.begin(); it!=tempfiles.end(); it++) { for (int i = 0; i < (it->second).size(); i++) { (*(it->second)[i]).close();  delete (it->second)[i]; } }
@@ -308,7 +548,6 @@ vector< vector<string> > MakeContigsCommand::readFastqFiles(int& count){
         exit(1);
     }
 }
-
 //**********************************************************************************************************************
 fastqRead MakeContigsCommand::readFastq(ifstream& in){
     try {
index e63695e77f401b0411df0e9405c1ce99fbd44b98..779d35cb284449293a53b60d8eed83755d08ffc2 100644 (file)
 //
 
 #include "command.hpp"
+#include "sequence.hpp"
+#include "qualityscores.h"
+#include "alignment.hpp"
+#include "gotohoverlap.hpp"
+#include "needlemanoverlap.hpp"
+#include "blastalign.hpp"
+#include "noalign.hpp"
+
 
 struct fastqRead {
        vector<int> scores;
@@ -50,14 +58,166 @@ private:
     fastqRead readFastq(ifstream&);
     vector< vector<string> > readFastqFiles(int&);
     bool checkReads(fastqRead&, fastqRead&);
+    int createProcesses(vector< vector<string> >, string, string, string);
+    int driver(vector<string>, string, string, string);
 };
 
 /**************************************************************************************************/
 
+/**************************************************************************************************/
+//custom data structure for threads to use.
+// This is passed by void pointer so it can be any data type
+// that can be passed using a single void pointer (LPVOID).
+struct contigsData {
+       string outputFasta; 
+       string outputQual; 
+       string outputMisMatches;
+       string align;
+    vector<string> files;
+       MothurOut* m;
+       float match, misMatch, gapOpen, gapExtend;
+       int count, threadID;
+       
+       contigsData(){}
+       contigsData(vector<string> f, string of, string oq, string om, string al, MothurOut* mout, float ma, float misMa, float gapO, float gapE, int tid) {
+        files = f;
+               outputFasta = of;
+        outputQual = oq;
+        outputMisMatches = om;
+        m = mout;
+               match = ma; 
+               misMatch = misMa;
+               gapOpen = gapO; 
+               gapExtend = gapE; 
+               align = al;
+               count = 0;
+               threadID = tid;
+       }
+};
 
-
-
-
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyContigsThreadFunction(LPVOID lpParam){ 
+       contigsData* pDataArray;
+       pDataArray = (contigsData*)lpParam;
+       
+       try {
+        int longestBase = 1000;
+        Alignment* alignment;
+        if(pDataArray->align == "gotoh")                       {       alignment = new GotohOverlap(pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, longestBase);                 }
+               else if(pDataArray->align == "needleman")       {       alignment = new NeedlemanOverlap(pDataArray->gapOpen, pDataArray->match, pDataArray->misMatch, longestBase);                            }
+        
+        int num = 0;
+        string thisffastafile = pDataArray->files[0];
+        string thisfqualfile = pDataArray->files[1];
+        string thisrfastafile = pDataArray->files[2];
+        string thisrqualfile = pDataArray->files[3];
+        
+        if (pDataArray->m->debug) {  pDataArray->m->mothurOut("[DEBUG]: ffasta = " + thisffastafile + ".\n[DEBUG]: fqual = " + thisfqualfile + ".\n[DEBUG]: rfasta = " + thisrfastafile + ".\n[DEBUG]: rqual = " + thisrqualfile + ".\n"); }
+        
+        ifstream inFFasta, inRFasta, inFQual, inRQual;
+        pDataArray->m->openInputFile(thisffastafile, inFFasta);
+        pDataArray->m->openInputFile(thisfqualfile, inFQual);
+        pDataArray->m->openInputFile(thisrfastafile, inRFasta);
+        pDataArray->m->openInputFile(thisrqualfile, inRQual);
+        
+        ofstream outFasta, outQual, outMisMatch;
+        pDataArray->m->openOutputFile(pDataArray->outputFasta, outFasta);
+        pDataArray->m->openOutputFile(pDataArray->outputQual, outQual);
+        pDataArray->m->openOutputFile(pDataArray->outputMisMatches, outMisMatch);
+        outMisMatch << "Name\tLength\tMisMatches\n";
+        
+        while ((!inFQual.eof()) && (!inFFasta.eof()) && (!inRFasta.eof()) && (!inRQual.eof())) {
+            
+            if (pDataArray->m->control_pressed) { break; }
+            
+            //read seqs and quality info
+            Sequence fSeq(inFFasta); pDataArray->m->gobble(inFFasta);
+            Sequence rSeq(inRFasta); pDataArray->m->gobble(inRFasta);
+            QualityScores fQual(inFQual); pDataArray->m->gobble(inFQual);
+            QualityScores rQual(inRQual); pDataArray->m->gobble(inRQual);
+            
+            //flip the reverse reads
+            rSeq.reverseComplement();
+            rQual.flipQScores();
+            
+            //pairwise align
+            alignment->align(fSeq.getUnaligned(), rSeq.getUnaligned());
+            map<int, int> ABaseMap = alignment->getSeqAAlnBaseMap();
+            map<int, int> BBaseMap = alignment->getSeqBAlnBaseMap();
+            fSeq.setAligned(alignment->getSeqAAln());
+            rSeq.setAligned(alignment->getSeqBAln());
+            int length = fSeq.getAligned().length();
+            
+            //traverse alignments merging into one contiguous seq
+            string contig = "";
+            vector<int> contigScores; 
+            int numMismatches = 0;
+            string seq1 = fSeq.getAligned();
+            string seq2 = rSeq.getAligned();
+            
+            vector<int> scores1 = fQual.getQualityScores();
+            vector<int> scores2 = rQual.getQualityScores();
+            
+            for (int i = 0; i < length; i++) {
+                if (seq1[i] == seq2[i]) { //match, add base and choose highest score
+                    contig += seq1[i];
+                    contigScores.push_back(scores1[ABaseMap[i]]);
+                    if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; }
+                }else if (((seq1[i] == '.') || (seq1[i] == '-')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //seq1 is a gap and seq2 is a base, choose seq2
+                    contig += seq2[i];
+                    contigScores.push_back(scores2[BBaseMap[i]]);
+                }else if (((seq2[i] == '.') || (seq2[i] == '-')) && ((seq1[i] != '-') && (seq1[i] != '.'))) { //seq2 is a gap and seq1 is a base, choose seq1
+                    contig += seq1[i];
+                    contigScores.push_back(scores1[ABaseMap[i]]);
+                }else if (((seq1[i] != '-') && (seq1[i] != '.')) && ((seq2[i] != '-') && (seq2[i] != '.'))) { //both bases choose one with better quality
+                    char c = seq1[i];
+                    contigScores.push_back(scores1[ABaseMap[i]]);
+                    if (scores1[ABaseMap[i]] < scores2[BBaseMap[i]]) { contigScores[i] = scores2[BBaseMap[i]]; c = seq2[i]; }
+                    contig += c;
+                    numMismatches++;
+                }else { //should never get here
+                    pDataArray->m->mothurOut("[ERROR]: case I didn't think of seq1 = " + toString(seq1[i]) + " and seq2 = " + toString(seq2[i]) + "\n");
+                }
+            }
+            
+            //output
+            outFasta << ">" << fSeq.getName() << endl << contig << endl;
+            outQual << ">" << fSeq.getName() << endl;
+            for (int i = 0; i < contigScores.size(); i++) { outQual << contigScores[i] << ' '; }
+            outQual << endl;
+            outMisMatch << fSeq.getName() << '\t' << contig.length() << '\t' << numMismatches << endl;
+            
+            num++;
+            
+                       //report progress
+                       if((num) % 1000 == 0){  pDataArray->m->mothurOut(toString(num)); pDataArray->m->mothurOutEndLine();             }
+               }
+        
+               //report progress
+               if((num) % 1000 != 0){  pDataArray->m->mothurOut(toString(num)); pDataArray->m->mothurOutEndLine();             }
+        
+        inFFasta.close();
+        inFQual.close();
+        inRFasta.close();
+        inRQual.close();
+        outFasta.close();
+        outQual.close();
+        outMisMatch.close();
+        delete alignment;
+        
+        if (pDataArray->m->control_pressed) { pDataArray->m->mothurRemove(pDataArray->outputQual); pDataArray->m->mothurRemove(pDataArray->outputFasta);  pDataArray->m->mothurRemove(pDataArray->outputMisMatches);}
+        
+        return 0;
+               
+       }
+       catch(exception& e) {
+               pDataArray->m->errorOut(e, "AlignCommand", "MyContigsThreadFunction");
+               exit(1);
+       }
+} 
+#endif
 
 
 #endif