]> git.donarmstrong.com Git - mothur.git/blobdiff - aligncommand.cpp
added oligos class. added check orient parameter to trim.flows, sffinfo, fastq.info...
[mothur.git] / aligncommand.cpp
index a871244f4538881c8d285c2a1202597be319498d..bcc59675d3b545f68197a3f56911a769ebe249bb 100644 (file)
@@ -110,7 +110,7 @@ AlignCommand::AlignCommand(){
 //**********************************************************************************************************************
 AlignCommand::AlignCommand(string option)  {
        try {
-               abort = false; calledHelp = false;  
+               abort = false; calledHelp = false;
                ReferenceDB* rdb = ReferenceDB::getInstance();
                
                //allow user to run help
@@ -543,6 +543,7 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam
                //moved this into driver to avoid deep copies in windows paralellized version
                Alignment* alignment;
                int longestBase = templateDB->getLongestBase();
+        if (m->debug) { m->mothurOut("[DEBUG]: template longest base = "  + toString(templateDB->getLongestBase()) + " \n"); }
                if(align == "gotoh")                    {       alignment = new GotohOverlap(gapOpen, gapExtend, match, misMatch, longestBase);                 }
                else if(align == "needleman")   {       alignment = new NeedlemanOverlap(gapOpen, match, misMatch, longestBase);                                }
                else if(align == "blast")               {       alignment = new BlastAlignment(gapOpen, gapExtend, match, misMatch);            }
@@ -565,11 +566,12 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam
                        int numBasesNeeded = origNumBases * threshold;
        
                        if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
-                               if (candidateSeq->getUnaligned().length() > alignment->getnRows()) {
-                                       alignment->resize(candidateSeq->getUnaligned().length()+1);
+                               if (candidateSeq->getUnaligned().length()+1 > alignment->getnRows()) {
+                    if (m->debug) { m->mothurOut("[DEBUG]: " + candidateSeq->getName() + " " + toString(candidateSeq->getUnaligned().length()) + " " + toString(alignment->getnRows()) + " \n"); }
+                                       alignment->resize(candidateSeq->getUnaligned().length()+2);
                                }
                                Sequence temp = templateDB->findClosestSequence(candidateSeq);
-                               Sequence* templateSeq = &temp;
+                               Sequence* templateSeq = new Sequence(temp.getName(), temp.getAligned());
                                
                                float searchScore = templateDB->getSearchScore();
                                                                
@@ -593,19 +595,26 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam
                                                //get reverse compliment
                                                copy = new Sequence(candidateSeq->getName(), originalUnaligned);
                                                copy->reverseComplement();
+                        
+                        if (m->debug) { m->mothurOut("[DEBUG]: flipping "  + candidateSeq->getName() + " \n"); }
                                                
                                                //rerun alignment
                                                Sequence temp2 = templateDB->findClosestSequence(copy);
-                                               Sequence* templateSeq2 = &temp2;
+                                               Sequence* templateSeq2 = new Sequence(temp2.getName(), temp2.getAligned());
+                        
+                        if (m->debug) { m->mothurOut("[DEBUG]: closest template "  + temp2.getName() + " \n"); }
                                                
                                                searchScore = templateDB->getSearchScore();
                                                
                                                nast2 = new Nast(alignment, copy, templateSeq2);
+                        
+                        if (m->debug) { m->mothurOut("[DEBUG]: completed Nast2 "  + candidateSeq->getName() + " flipped numBases = " + toString(copy->getNumBases()) + " old numbases = " + toString(candidateSeq->getNumBases()) +" \n"); }
                        
                                                //check if any better
                                                if (copy->getNumBases() > candidateSeq->getNumBases()) {
                                                        candidateSeq->setAligned(copy->getAligned());  //use reverse compliments alignment since its better
-                                                       templateSeq = templateSeq2; 
+                            delete templateSeq;
+                                                       templateSeq = templateSeq2;
                                                        delete nast;
                                                        nast = nast2;
                                                        needToDeleteCopy = true;
@@ -613,8 +622,10 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam
                                                }else{  
                                                        wasBetter = "\treverse complement did NOT produce a better alignment so it was not used, please check sequence.";
                                                        delete nast2;
+                            delete templateSeq2;
                                                        delete copy;    
                                                }
+                        if (m->debug) { m->mothurOut("[DEBUG]: done.\n"); }
                                        }
                                        
                                        //create accnos file with names
@@ -630,6 +641,7 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam
                                
                                report.print();
                                delete nast;
+                delete templateSeq;
                                if (needToDeleteCopy) {   delete copy;   }
                                
                                count++;
@@ -644,11 +656,11 @@ int AlignCommand::driver(linePair* filePos, string alignFName, string reportFNam
                        #endif
                        
                        //report progress
-                       if((count) % 100 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine();           }
+                       if((count) % 100 == 0){ m->mothurOutJustToScreen(toString(count) + "\n");               }
                        
                }
                //report progress
-               if((count) % 100 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine();           }
+               if((count) % 100 != 0){ m->mothurOutJustToScreen(toString(count) + "\n");               }
                
                delete alignment;
                alignmentFile.close();
@@ -734,7 +746,7 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align
                                }
                                                                
                                Sequence temp = templateDB->findClosestSequence(candidateSeq);
-                               Sequence* templateSeq = &temp;
+                               Sequence* templateSeq = new Sequence(temp.getName(), temp.getAligned());
                                
                                float searchScore = templateDB->getSearchScore();
                                                                
@@ -759,7 +771,7 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align
                                                
                                                //rerun alignment
                                                Sequence temp2 = templateDB->findClosestSequence(copy);
-                                               Sequence* templateSeq2 = &temp2;
+                                               Sequence* templateSeq2 = new Sequence(temp2.getName(), temp2.getAligned());
                                                
                                                searchScore = templateDB->getSearchScore();
                                                
@@ -768,7 +780,8 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align
                                                //check if any better
                                                if (copy->getNumBases() > candidateSeq->getNumBases()) {
                                                        candidateSeq->setAligned(copy->getAligned());  //use reverse compliments alignment since its better
-                                                       templateSeq = templateSeq2; 
+                                                       delete templateSeq;
+                                                       templateSeq = templateSeq2;
                                                        delete nast;
                                                        nast = nast2;
                                                        needToDeleteCopy = true;
@@ -776,6 +789,7 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align
                                                }else{  
                                                        wasBetter = "\treverse complement did NOT produce a better alignment, please check sequence.";
                                                        delete nast2;
+                            delete templateSeq2;
                                                        delete copy;    
                                                }
                                        }
@@ -821,6 +835,7 @@ int AlignCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& align
                                
                                delete buf3;
                                delete nast;
+                delete templateSeq;
                                if (needToDeleteCopy) {   delete copy;   }
                        }
                        delete candidateSeq;
@@ -845,34 +860,72 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
        try {
                int num = 0;
                processIDS.resize(0);
+        bool recalc = false;
+        
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
                int process = 1;
                
                //loop through and create all the processes you want
                while (process != processors) {
-                       int pid = fork();
+                       pid_t pid = fork();
                        
                        if (pid > 0) {
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
-                               num = driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp", accnosFName + toString(getpid()) + ".temp", filename);
+                               num = driver(lines[process], alignFileName + toString(m->mothurGetpid(process)) + ".temp", reportFileName + toString(m->mothurGetpid(process)) + ".temp", accnosFName + m->mothurGetpid(process) + ".temp", filename);
                                
                                //pass numSeqs to parent
                                ofstream out;
-                               string tempFile = alignFileName + toString(getpid()) + ".num.temp";
+                               string tempFile = alignFileName + toString(m->mothurGetpid(process)) + ".num.temp";
                                m->openOutputFile(tempFile, out);
                                out << num << endl;
                                out.close();
                                
                                exit(0);
                        }else { 
-                               m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
-                               for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
-                               exit(0);
+                               m->mothurOut("[ERROR]: unable to spawn the number of processes you requested, reducing number to " + toString(process) + "\n"); processors = process;
+                for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                recalc = true;
+                               break;
                        }
                }
                
+        if (recalc) {
+            for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
+            vector<unsigned long long> positions;
+                       positions = m->divideFile(filename, processors);
+                       for (int i = 0; i < (positions.size()-1); i++) {        lines.push_back(new linePair(positions[i], positions[(i+1)]));  }
+            
+            num = 0;
+            processIDS.resize(0);
+            process = 1;
+            
+            while (process != processors) {
+                pid_t pid = fork();
+                
+                if (pid > 0) {
+                    processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
+                    process++;
+                }else if (pid == 0){
+                    num = driver(lines[process], alignFileName + toString(m->mothurGetpid(process)) + ".temp", reportFileName + toString(m->mothurGetpid(process)) + ".temp", accnosFName + m->mothurGetpid(process) + ".temp", filename);
+                    
+                    //pass numSeqs to parent
+                    ofstream out;
+                    string tempFile = alignFileName + toString(m->mothurGetpid(process)) + ".num.temp";
+                    m->openOutputFile(tempFile, out);
+                    out << num << endl;
+                    out.close();
+                    
+                    exit(0);
+                }else {
+                    m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+                    for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+                    exit(0);
+                }
+            }
+        }
+        
                //do my part
                num = driver(lines[0], alignFileName, reportFileName, accnosFName, filename);
                
@@ -965,6 +1018,9 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
                
                //Close all thread handles and free memory allocations.
                for(int i=0; i < pDataArray.size(); i++){
+            if (pDataArray[i]->count != pDataArray[i]->end) {
+                m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
+            }
                        num += pDataArray[i]->count;
                        CloseHandle(hThreadArray[i]);
                        delete pDataArray[i];