]> git.donarmstrong.com Git - mothur.git/commitdiff
changed how we break up the files on parallelized commands to avoid scanning file.
authorwestcott <westcott>
Thu, 19 Aug 2010 16:28:18 +0000 (16:28 +0000)
committerwestcott <westcott>
Thu, 19 Aug 2010 16:28:18 +0000 (16:28 +0000)
23 files changed:
aligncommand.cpp
aligncommand.h
chimeraccodecommand.cpp
chimeraccodecommand.h
chimeracheckcommand.cpp
chimeracheckcommand.h
chimerapintailcommand.cpp
chimerapintailcommand.h
chimeraslayercommand.cpp
chimeraslayercommand.h
classifyseqscommand.cpp
classifyseqscommand.h
filterseqscommand.cpp
filterseqscommand.h
globaldata.cpp
globaldata.hpp
mothur.h
readotucommand.cpp
readotucommand.h
screenseqscommand.cpp
screenseqscommand.h
seqsummarycommand.cpp
seqsummarycommand.h

index 4641ed7f65a65374df52836bb6c9e6b943fa8e61..3f70e2d3733a2d40da2012d8ba31bff61c768b9b 100644 (file)
@@ -292,7 +292,6 @@ int AlignCommand::execute(){
                                        int startIndex =  pid * numSeqsPerProcessor;
                                        if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
                                        
-                               
                                        //align your part
                                        driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos);
                                        
@@ -347,24 +346,16 @@ int AlignCommand::execute(){
                                }
                                
 #else
-                       
+               vector<unsigned long int> positions = divideFile(candidateFileNames[s], processors);
+                               
+               for (int i = 0; i < (positions.size()-1); i++) {
+                       lines.push_back(new linePair(positions[i], positions[(i+1)]));
+               }       
        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                        if(processors == 1){
-                               ifstream inFASTA;
-                               openInputFile(candidateFileNames[s], inFASTA);
-                               getNumSeqs(inFASTA, numFastaSeqs);
-                               inFASTA.close();
-                               
-                               lines.push_back(new linePair(0, numFastaSeqs));
+                               numFastaSeqs = driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
                                
-                               driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
-                               
-                               if (m->control_pressed) { 
-                                       remove(accnosFileName.c_str()); 
-                                       remove(alignFileName.c_str()); 
-                                       remove(reportFileName.c_str()); 
-                                       return 0; 
-                               }
+                               if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); return 0; }
                                
                                //delete accnos file if its blank else report to user
                                if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
@@ -375,36 +366,10 @@ int AlignCommand::execute(){
                                        }else{  m->mothurOut(" If the reverse compliment proved to be better it was reported.");  }
                                        m->mothurOutEndLine();
                                }
-                       }
-                       else{
-                               vector<unsigned long int> positions;
+                       }else{
                                processIDS.resize(0);
                                
-                               ifstream inFASTA;
-                               openInputFile(candidateFileNames[s], inFASTA);
-                               
-                               string input;
-                               while(!inFASTA.eof()){
-                                       input = getline(inFASTA);
-                                       if (input.length() != 0) {
-                                               if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
-                                       }
-                               }
-                               inFASTA.close();
-                               
-                               numFastaSeqs = positions.size();
-                               
-                               int numSeqsPerProcessor = numFastaSeqs / processors;
-                               
-                               for (int i = 0; i < processors; i++) {
-                                       unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
-                                       if(i == processors - 1){
-                                               numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
-                                       }
-                                       lines.push_back(new linePair(startPos, numSeqsPerProcessor));
-                               }
-                               
-                               createProcesses(alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); 
+                               numFastaSeqs = createProcesses(alignFileName, reportFileName, accnosFileName, candidateFileNames[s]); 
                                
                                rename((alignFileName + toString(processIDS[0]) + ".temp").c_str(), alignFileName.c_str());
                                rename((reportFileName + toString(processIDS[0]) + ".temp").c_str(), reportFileName.c_str());
@@ -441,29 +406,12 @@ int AlignCommand::execute(){
                                        m->mothurOutEndLine();
                                }else{ hasAccnos = false;  }
                                
-                               if (m->control_pressed) { 
-                                       remove(accnosFileName.c_str()); 
-                                       remove(alignFileName.c_str()); 
-                                       remove(reportFileName.c_str()); 
-                                       return 0; 
-                               }
+                               if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); return 0; }
                        }
        #else
-                       ifstream inFASTA;
-                       openInputFile(candidateFileNames[s], inFASTA);
-                       getNumSeqs(inFASTA, numFastaSeqs);
-                       inFASTA.close();
-                       
-                       lines.push_back(new linePair(0, numFastaSeqs));
-                       
-                       driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
+                       numFastaSeqs = driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
                        
-                       if (m->control_pressed) { 
-                               remove(accnosFileName.c_str()); 
-                               remove(alignFileName.c_str()); 
-                               remove(reportFileName.c_str()); 
-                               return 0; 
-                       }
+                       if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); return 0; }
                        
                        //delete accnos file if its blank else report to user
                        if (isBlank(accnosFileName)) {  remove(accnosFileName.c_str());  hasAccnos = false; }
@@ -515,7 +463,7 @@ int AlignCommand::execute(){
 
 //**********************************************************************************************************************
 
-int AlignCommand::driver(linePair* line, string alignFName, string reportFName, string accnosFName, string filename){
+int AlignCommand::driver(linePair* filePos, string alignFName, string reportFName, string accnosFName, string filename){
        try {
                ofstream alignmentFile;
                openOutputFile(alignFName, alignmentFile);
@@ -528,9 +476,12 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName,
                ifstream inFASTA;
                openInputFile(filename, inFASTA);
 
-               inFASTA.seekg(line->start);
+               inFASTA.seekg(filePos->start);
+
+               bool done = false;
+               int count = 0;
        
-               for(int i=0;i<line->numSeqs;i++){
+               while (!done) {
                        
                        if (m->control_pressed) {  return 0; }
                        
@@ -607,20 +558,26 @@ int AlignCommand::driver(linePair* line, string alignFName, string reportFName,
                                report.print();
                                delete nast;
                                if (needToDeleteCopy) {   delete copy;   }
+                               
+                               count++;
                        }
                        delete candidateSeq;
                        
+                       unsigned long int pos = inFASTA.tellg();
+                       if ((pos == -1) || (pos >= filePos->end)) { break; }
+                       
                        //report progress
-                       if((i+1) % 100 == 0){   m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
+                       if((count) % 100 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine();           }
+                       
                }
                //report progress
-               if((line->numSeqs) % 100 != 0){ m->mothurOut(toString(line->numSeqs)); m->mothurOutEndLine();           }
+               if((count) % 100 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine();           }
                
                alignmentFile.close();
                inFASTA.close();
                accnosFile.close();
                
-               return 1;
+               return count;
        }
        catch(exception& e) {
                m->errorOut(e, "AlignCommand", "driver");
@@ -796,7 +753,7 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
        try {
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                int process = 0;
-               int exitCommand = 1;
+               int num = 0;
                //              processIDS.resize(0);
                
                //loop through and create all the processes you want
@@ -807,7 +764,15 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
-                               exitCommand = driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp", accnosFName + toString(getpid()) + ".temp", filename);
+                               num = driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp", accnosFName + toString(getpid()) + ".temp", filename);
+                               
+                               //pass numSeqs to parent
+                               ofstream out;
+                               string tempFile = toString(getpid()) + ".temp";
+                               openOutputFile(tempFile, out);
+                               out << num << endl;
+                               out.close();
+                               
                                exit(0);
                        }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
                }
@@ -818,7 +783,15 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
                        wait(&temp);
                }
                
-               return exitCommand;
+               for (int i = 0; i < processIDS.size(); i++) {
+                       ifstream in;
+                       string tempFile =  toString(processIDS[i]) + ".temp";
+                       openInputFile(tempFile, in);
+                       if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+                       in.close(); remove(tempFile.c_str());
+               }
+               
+               return num;
 #endif         
        }
        catch(exception& e) {
@@ -826,7 +799,6 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s
                exit(1);
        }
 }
-
 /**************************************************************************************************/
 
 void AlignCommand::appendAlignFiles(string temp, string filename) {
index fb47874f8e1ff619a015b66b1f60952aafd61eb0..b58c3f29bcc653e48c2d30fc9ae4a57344c4330e 100644 (file)
@@ -27,8 +27,8 @@ public:
 private:
        struct linePair {
                unsigned long int start;
-               int numSeqs;
-               linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
+               unsigned long int end;
+               linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}
        };
        vector<int> processIDS;   //processid
        vector<linePair*> lines;
@@ -43,7 +43,7 @@ private:
        void appendReportFiles(string, string);
        
        #ifdef USE_MPI
-       int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long int>&);
+       int driverMPI(MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long int>&);
        #endif
        
        string candidateFileName, templateFileName, distanceFileName, search, align, outputDir;
index 4c8848db7ace06c853e809d814f32042bab92cce..8ea91e53abe27cb75641bfc54c7ed293d2b17a65 100644 (file)
@@ -300,58 +300,24 @@ int ChimeraCcodeCommand::execute(){
 
                        outHeader.close();
                        
+                       vector<unsigned long int> positions = divideFile(fastaFileNames[s], processors);
+                               
+                       for (int i = 0; i < (positions.size()-1); i++) {
+                               lines.push_back(new linePair(positions[i], positions[(i+1)]));
+                       }       
+                       
                        //break up file
                        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                                if(processors == 1){
-                                       ifstream inFASTA;
-                                       openInputFile(fastaFileNames[s], inFASTA);
-                                       getNumSeqs(inFASTA, numSeqs);
-                                       inFASTA.close();
-                                       
-                                       lines.push_back(new linePair(0, numSeqs));
+                                                                               
+                                       numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
                                        
-                                       driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
-                                       
-                                       if (m->control_pressed) { 
-                                               remove(outputFileName.c_str()); 
-                                               remove(tempHeader.c_str()); 
-                                               remove(accnosFileName.c_str());
-                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
-                                               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-                                               delete chimera;
-                                               return 0;
-                                       }
+                                       if (m->control_pressed) { remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {    remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
                                        
                                }else{
-                                       vector<unsigned long int> positions;
                                        processIDS.resize(0);
                                        
-                                       ifstream inFASTA;
-                                       openInputFile(fastaFileNames[s], inFASTA);
-                                       
-                                       string input;
-                                       while(!inFASTA.eof()){
-                                               input = getline(inFASTA);
-                                               if (input.length() != 0) {
-                                                       if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
-                                               }
-                                       }
-                                       inFASTA.close();
-                                       
-                                       numSeqs = positions.size();
-                                       
-                                       int numSeqsPerProcessor = numSeqs / processors;
-                                       
-                                       for (int i = 0; i < processors; i++) {
-                                               unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
-                                               if(i == processors - 1){
-                                                       numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
-                                               }
-                                               lines.push_back(new linePair(startPos, numSeqsPerProcessor));
-                                       }
-                                       
-                                       
-                                       createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
+                                       numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
                                
                                        rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
                                        rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
@@ -380,23 +346,9 @@ int ChimeraCcodeCommand::execute(){
                                }
 
                        #else
-                               ifstream inFASTA;
-                               openInputFile(fastaFileNames[s], inFASTA);
-                               getNumSeqs(inFASTA, numSeqs);
-                               inFASTA.close();
-                               lines.push_back(new linePair(0, numSeqs));
-                               
-                               driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
+                               numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
                                
-                               if (m->control_pressed) { 
-                                               remove(outputFileName.c_str()); 
-                                               remove(tempHeader.c_str()); 
-                                               remove(accnosFileName.c_str());
-                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
-                                               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-                                               delete chimera;
-                                               return 0;
-                               }
+                               if (m->control_pressed) { remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {    remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
                                
                        #endif
        
@@ -432,7 +384,7 @@ int ChimeraCcodeCommand::execute(){
 }
 //**********************************************************************************************************************
 
-int ChimeraCcodeCommand::driver(linePair* line, string outputFName, string filename, string accnos){
+int ChimeraCcodeCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
        try {
                ofstream out;
                openOutputFile(outputFName, out);
@@ -443,9 +395,12 @@ int ChimeraCcodeCommand::driver(linePair* line, string outputFName, string filen
                ifstream inFASTA;
                openInputFile(filename, inFASTA);
 
-               inFASTA.seekg(line->start);
-               
-               for(int i=0;i<line->numSeqs;i++){
+               inFASTA.seekg(filePos->start);
+
+               bool done = false;
+               int count = 0;
+       
+               while (!done) {
                
                        if (m->control_pressed) {       return 1;       }
                
@@ -464,20 +419,24 @@ int ChimeraCcodeCommand::driver(linePair* line, string outputFName, string filen
                                        //print results
                                        chimera->print(out, out2);
                                }
+                               count++;
                        }
                        delete candidateSeq;
                        
+                       unsigned long int pos = inFASTA.tellg();
+                       if ((pos == -1) || (pos >= filePos->end)) { break; }
+                       
                        //report progress
-                       if((i+1) % 100 == 0){   m->mothurOut("Processing sequence: " + toString(i+1)); m->mothurOutEndLine();           }
+                       if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
                }
                //report progress
-               if((line->numSeqs) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(line->numSeqs)); m->mothurOutEndLine();         }
+               if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
                
                out.close();
                out2.close();
                inFASTA.close();
                                
-               return 0;
+               return count;
        }
        catch(exception& e) {
                m->errorOut(e, "ChimeraCcodeCommand", "driver");
@@ -549,7 +508,7 @@ int ChimeraCcodeCommand::createProcesses(string outputFileName, string filename,
        try {
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                int process = 0;
-               //              processIDS.resize(0);
+               int num = 0;
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -559,7 +518,15 @@ int ChimeraCcodeCommand::createProcesses(string outputFileName, string filename,
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
-                               driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
+                               num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
+                               
+                               //pass numSeqs to parent
+                               ofstream out;
+                               string tempFile = toString(getpid()) + ".temp";
+                               openOutputFile(tempFile, out);
+                               out << num << endl;
+                               out.close();
+
                                exit(0);
                        }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
                }
@@ -570,7 +537,15 @@ int ChimeraCcodeCommand::createProcesses(string outputFileName, string filename,
                        wait(&temp);
                }
                
-               return 0;
+               for (int i = 0; i < processIDS.size(); i++) {
+                       ifstream in;
+                       string tempFile =  toString(processIDS[i]) + ".temp";
+                       openInputFile(tempFile, in);
+                       if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+                       in.close(); remove(tempFile.c_str());
+               }
+               
+               return num;
 #endif         
        }
        catch(exception& e) {
index 5989eb25e0c5c1488e8bdf69e03157bbe1e7e6fd..d980288ac3585916321edec15114d0c47303e120 100644 (file)
@@ -26,11 +26,10 @@ public:
        
                
 private:
-
        struct linePair {
                unsigned long int start;
-               int numSeqs;
-               linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
+               unsigned long int end;
+               linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}
        };
        vector<int> processIDS;   //processid
        vector<linePair*> lines;
index 6cea5d5f9079701fdf58f1061cfe9836f1b93d3c..0ec5f4bcdba426acd22dd17f8e8e3b55f263c31a 100644 (file)
@@ -299,55 +299,23 @@ int ChimeraCheckCommand::execute(){
                                MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                #else
                        
+                       vector<unsigned long int> positions = divideFile(fastaFileNames[i], processors);
+                               
+                       for (int s = 0; s < (positions.size()-1); s++) {
+                               lines.push_back(new linePair(positions[s], positions[(s+1)]));
+                       }       
+                       
                        //break up file
                        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                                if(processors == 1){
-                                       ifstream inFASTA;
-                                       openInputFile(fastaFileNames[i], inFASTA);
-                                       getNumSeqs(inFASTA, numSeqs);
-                                       inFASTA.close();
-                                       
-                                       lines.push_back(new linePair(0, numSeqs));
+                                       numSeqs = driver(lines[0], outputFileName, fastaFileNames[i]);
                                        
-                                       driver(lines[0], outputFileName, fastaFileNames[i]);
-                                       
-                                       if (m->control_pressed) { 
-                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
-                                               for (int j = 0; j < lines.size(); j++) {  delete lines[j];  }  lines.clear();
-                                               delete chimera;
-                                               return 0;
-                                       }
+                                       if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); } for (int j = 0; j < lines.size(); j++) {  delete lines[j];  }  lines.clear(); delete chimera; return 0; }
                                                                        
                                }else{
-                                       vector<unsigned long int> positions;
                                        processIDS.resize(0);
                                        
-                                       ifstream inFASTA;
-                                       openInputFile(fastaFileNames[i], inFASTA);
-                                       
-                                       string input;
-                                       while(!inFASTA.eof()){
-                                               input = getline(inFASTA);
-                                               if (input.length() != 0) {
-                                                       if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
-                                               }
-                                       }
-                                       inFASTA.close();
-                                       
-                                       numSeqs = positions.size();
-                                       
-                                       int numSeqsPerProcessor = numSeqs / processors;
-                                       
-                                       for (int j = 0; j < processors; j++) {
-                                               unsigned long int startPos = positions[ j * numSeqsPerProcessor ];
-                                               if(j == processors - 1){
-                                                       numSeqsPerProcessor = numSeqs - j * numSeqsPerProcessor;
-                                               }
-                                               lines.push_back(new linePair(startPos, numSeqsPerProcessor));
-                                       }
-                                       
-                                       
-                                       createProcesses(outputFileName, fastaFileNames[i]); 
+                                       numSeqs = createProcesses(outputFileName, fastaFileNames[i]); 
                                
                                        rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
                                                
@@ -366,20 +334,9 @@ int ChimeraCheckCommand::execute(){
                                }
 
                        #else
-                               ifstream inFASTA;
-                               openInputFile(fastaFileNames[i], inFASTA);
-                               getNumSeqs(inFASTA, numSeqs);
-                               inFASTA.close();
-                               lines.push_back(new linePair(0, numSeqs));
-                               
-                               driver(lines[0], outputFileName, fastaFileNames[i]);
+                               numSeqs = driver(lines[0], outputFileName, fastaFileNames[i]);
                                
-                               if (m->control_pressed) { 
-                                               for (int j = 0; j < lines.size(); j++) {  delete lines[j];  }  lines.clear();
-                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
-                                               delete chimera;
-                                               return 0;
-                               }
+                               if (m->control_pressed) { for (int j = 0; j < lines.size(); j++) {  delete lines[j];  }  lines.clear(); for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } delete chimera; return 0; }
                        #endif
                #endif          
                        delete chimera;
@@ -405,7 +362,7 @@ int ChimeraCheckCommand::execute(){
 }
 //**********************************************************************************************************************
 
-int ChimeraCheckCommand::driver(linePair* line, string outputFName, string filename){
+int ChimeraCheckCommand::driver(linePair* filePos, string outputFName, string filename){
        try {
                ofstream out;
                openOutputFile(outputFName, out);
@@ -415,10 +372,13 @@ int ChimeraCheckCommand::driver(linePair* line, string outputFName, string filen
                ifstream inFASTA;
                openInputFile(filename, inFASTA);
 
-               inFASTA.seekg(line->start);
-               
-               for(int i=0;i<line->numSeqs;i++){
-               
+               inFASTA.seekg(filePos->start);
+
+               bool done = false;
+               int count = 0;
+       
+               while (!done) {
+
                        if (m->control_pressed) {       return 1;       }
                
                        Sequence* candidateSeq = new Sequence(inFASTA);  gobble(inFASTA);
@@ -434,16 +394,19 @@ int ChimeraCheckCommand::driver(linePair* line, string outputFName, string filen
                        }
                        delete candidateSeq;
                        
+                       unsigned long int pos = inFASTA.tellg();
+                       if ((pos == -1) || (pos >= filePos->end)) { break; }
+                       
                        //report progress
-                       if((i+1) % 100 == 0){   m->mothurOut("Processing sequence: " + toString(i+1)); m->mothurOutEndLine();           }
+                       if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
                }
                //report progress
-               if((line->numSeqs) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(line->numSeqs)); m->mothurOutEndLine();         }
+               if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
                
                out.close();
                inFASTA.close();
                                
-               return 0;
+               return count;
        }
        catch(exception& e) {
                m->errorOut(e, "ChimeraCheckCommand", "driver");
@@ -506,7 +469,7 @@ int ChimeraCheckCommand::createProcesses(string outputFileName, string filename)
        try {
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                int process = 0;
-               //              processIDS.resize(0);
+               int num = 0;
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -516,7 +479,15 @@ int ChimeraCheckCommand::createProcesses(string outputFileName, string filename)
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
-                               driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename);
+                               num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename);
+                               
+                               //pass numSeqs to parent
+                               ofstream out;
+                               string tempFile = toString(getpid()) + ".temp";
+                               openOutputFile(tempFile, out);
+                               out << num << endl;
+                               out.close();
+                               
                                exit(0);
                        }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
                }
@@ -527,7 +498,15 @@ int ChimeraCheckCommand::createProcesses(string outputFileName, string filename)
                        wait(&temp);
                }
                
-               return 0;
+               for (int i = 0; i < processIDS.size(); i++) {
+                       ifstream in;
+                       string tempFile =  toString(processIDS[i]) + ".temp";
+                       openInputFile(tempFile, in);
+                       if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+                       in.close(); remove(tempFile.c_str());
+               }
+               
+               return num;
 #endif         
        }
        catch(exception& e) {
index 84c75ac8b8ff49e85cec777ffb8d2beeec544990..71e1f0cdad86dc4610a65711c57c355eb9ee2aaf 100644 (file)
@@ -30,9 +30,10 @@ private:
 
        struct linePair {
                unsigned long int start;
-               int numSeqs;
-               linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
+               unsigned long int end;
+               linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}
        };
+
        vector<int> processIDS;   //processid
        vector<linePair*> lines;
        
index fd8724dc2f2908d3eebebb995e5ef22e697cfd5e..d9368cce2afee08e8d22c638a0df30b2ca6e2e4a 100644 (file)
@@ -337,57 +337,24 @@ int ChimeraPintailCommand::execute(){
                                MPI_File_close(&outMPIAccnos);
                                MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                #else
-               
+                       vector<unsigned long int> positions = divideFile(fastaFileNames[s], processors);
+                               
+                       for (int i = 0; i < (positions.size()-1); i++) {
+                               lines.push_back(new linePair(positions[i], positions[(i+1)]));
+                       }       
+                       
                        //break up file
                        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                                if(processors == 1){
-                                       ifstream inFASTA;
-                                       openInputFile(fastaFileNames[s], inFASTA);
-                                       getNumSeqs(inFASTA, numSeqs);
-                                       inFASTA.close();
-                                       
-                                       lines.push_back(new linePair(0, numSeqs));
-                                       
-                                       driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
+               
+                                       numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
                                        
-                                       if (m->control_pressed) { 
-                                               remove(outputFileName.c_str()); 
-                                               remove(accnosFileName.c_str());
-                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
-                                               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-                                               delete chimera;
-                                               return 0;
-                                       }
+                                       if (m->control_pressed) { remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
                                        
                                }else{
-                                       vector<unsigned long int> positions;
                                        processIDS.resize(0);
                                        
-                                       ifstream inFASTA;
-                                       openInputFile(fastaFileNames[s], inFASTA);
-                                       
-                                       string input;
-                                       while(!inFASTA.eof()){
-                                               input = getline(inFASTA);
-                                               if (input.length() != 0) {
-                                                       if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
-                                               }
-                                       }
-                                       inFASTA.close();
-                                       
-                                       numSeqs = positions.size();
-                                       
-                                       int numSeqsPerProcessor = numSeqs / processors;
-                                       
-                                       for (int i = 0; i < processors; i++) {
-                                               unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
-                                               if(i == processors - 1){
-                                                       numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
-                                               }
-                                               lines.push_back(new linePair(startPos, numSeqsPerProcessor));
-                                       }
-                                       
-                                       createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
+                                       numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
                                
                                        rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
                                        rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
@@ -415,22 +382,9 @@ int ChimeraPintailCommand::execute(){
                                }
 
                        #else
-                               ifstream inFASTA;
-                               openInputFile(fastaFileNames[s], inFASTA);
-                               getNumSeqs(inFASTA, numSeqs);
-                               inFASTA.close();
-                               lines.push_back(new linePair(0, numSeqs));
-                               
-                               driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
+                               numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
                                
-                               if (m->control_pressed) { 
-                                               remove(outputFileName.c_str()); 
-                                               remove(accnosFileName.c_str());
-                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
-                                               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-                                               delete chimera;
-                                               return 0;
-                               }
+                               if (m->control_pressed) { remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
                        #endif
                        
                #endif  
@@ -460,7 +414,7 @@ int ChimeraPintailCommand::execute(){
 }
 //**********************************************************************************************************************
 
-int ChimeraPintailCommand::driver(linePair* line, string outputFName, string filename, string accnos){
+int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
        try {
                ofstream out;
                openOutputFile(outputFName, out);
@@ -471,10 +425,13 @@ int ChimeraPintailCommand::driver(linePair* line, string outputFName, string fil
                ifstream inFASTA;
                openInputFile(filename, inFASTA);
 
-               inFASTA.seekg(line->start);
-               
-               for(int i=0;i<line->numSeqs;i++){
-               
+               inFASTA.seekg(filePos->start);
+
+               bool done = false;
+               int count = 0;
+       
+               while (!done) {
+                               
                        if (m->control_pressed) {       return 1;       }
                
                        Sequence* candidateSeq = new Sequence(inFASTA);  gobble(inFASTA);
@@ -492,20 +449,24 @@ int ChimeraPintailCommand::driver(linePair* line, string outputFName, string fil
                                        //print results
                                        chimera->print(out, out2);
                                }
+                               count++;
                        }
                        delete candidateSeq;
                        
+                       unsigned long int pos = inFASTA.tellg();
+                       if ((pos == -1) || (pos >= filePos->end)) { break; }
+                       
                        //report progress
-                       if((i+1) % 100 == 0){   m->mothurOut("Processing sequence: " + toString(i+1)); m->mothurOutEndLine();           }
+                       if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
                }
                //report progress
-               if((line->numSeqs) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(line->numSeqs)); m->mothurOutEndLine();         }
+               if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
                
                out.close();
                out2.close();
                inFASTA.close();
                                
-               return 0;
+               return count;
        }
        catch(exception& e) {
                m->errorOut(e, "ChimeraPintailCommand", "driver");
@@ -576,7 +537,7 @@ int ChimeraPintailCommand::createProcesses(string outputFileName, string filenam
        try {
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                int process = 0;
-               //              processIDS.resize(0);
+               int num = 0;
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -586,7 +547,15 @@ int ChimeraPintailCommand::createProcesses(string outputFileName, string filenam
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
-                               driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
+                               num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
+                               
+                               //pass numSeqs to parent
+                               ofstream out;
+                               string tempFile = toString(getpid()) + ".temp";
+                               openOutputFile(tempFile, out);
+                               out << num << endl;
+                               out.close();
+
                                exit(0);
                        }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
                }
@@ -597,7 +566,15 @@ int ChimeraPintailCommand::createProcesses(string outputFileName, string filenam
                        wait(&temp);
                }
                
-               return 0;
+               for (int i = 0; i < processIDS.size(); i++) {
+                       ifstream in;
+                       string tempFile =  toString(processIDS[i]) + ".temp";
+                       openInputFile(tempFile, in);
+                       if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+                       in.close(); remove(tempFile.c_str());
+               }
+               
+               return num;
 #endif         
        }
        catch(exception& e) {
index 93707458d3b7b7b09cd1b4eb3aa02f8f03bafa16..9082204bf80af5ccb83cba072078ef8b276f6e3a 100644 (file)
@@ -30,9 +30,10 @@ private:
 
        struct linePair {
                unsigned long int start;
-               int numSeqs;
-               linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
+               unsigned long int end;
+               linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}
        };
+
        vector<int> processIDS;   //processid
        vector<linePair*> lines;
        
index e675c00ab98b5fb8b369d5a129ae03a412d6eca8..1de7021098d834dc165b42bcc3da82480c6868dd 100644 (file)
@@ -325,57 +325,23 @@ int ChimeraSlayerCommand::execute(){
                        chimera->printHeader(outHeader);
                        outHeader.close();
                        
+                       vector<unsigned long int> positions = divideFile(fastaFileNames[s], processors);
+                               
+                       for (int i = 0; i < (positions.size()-1); i++) {
+                               lines.push_back(new linePair(positions[i], positions[(i+1)]));
+                       }       
+
                        //break up file
                        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                                if(processors == 1){
-                                       ifstream inFASTA;
-                                       openInputFile(fastaFileNames[s], inFASTA);
-                                       getNumSeqs(inFASTA, numSeqs);
-                                       inFASTA.close();
+                                       numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
                                        
-                                       lines.push_back(new linePair(0, numSeqs));
-                                       
-                                       driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
-                                       
-                                       if (m->control_pressed) { 
-                                               remove(outputFileName.c_str()); 
-                                               remove(tempHeader.c_str()); 
-                                               remove(accnosFileName.c_str());
-                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
-                                               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-                                               delete chimera;
-                                               return 0;
-                                       }
+                                       if (m->control_pressed) { remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {    remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
                                        
                                }else{
-                                       vector<unsigned long int> positions;
                                        processIDS.resize(0);
                                        
-                                       ifstream inFASTA;
-                                       openInputFile(fastaFileNames[s], inFASTA);
-                                       
-                                       string input;
-                                       while(!inFASTA.eof()){
-                                               input = getline(inFASTA);
-                                               if (input.length() != 0) {
-                                                       if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
-                                               }
-                                       }
-                                       inFASTA.close();
-                                       
-                                       numSeqs = positions.size();
-                                       
-                                       int numSeqsPerProcessor = numSeqs / processors;
-                                       
-                                       for (int i = 0; i < processors; i++) {
-                                               unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
-                                               if(i == processors - 1){
-                                                       numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
-                                               }
-                                               lines.push_back(new linePair(startPos, numSeqsPerProcessor));
-                                       }
-                                       
-                                       createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
+                                       numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
                                
                                        rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
                                        rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
@@ -392,35 +358,13 @@ int ChimeraSlayerCommand::execute(){
                                                remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
                                        }
                                        
-                                       if (m->control_pressed) { 
-                                               remove(outputFileName.c_str()); 
-                                               remove(accnosFileName.c_str());
-                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
-                                               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-                                               delete chimera;
-                                               return 0;
-                                       }
-
+                                       if (m->control_pressed) { remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
                                }
 
                        #else
-                               ifstream inFASTA;
-                               openInputFile(fastaFileNames[s], inFASTA);
-                               getNumSeqs(inFASTA, numSeqs);
-                               inFASTA.close();
-                               lines.push_back(new linePair(0, numSeqs));
+                               numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
                                
-                               driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
-                               
-                               if (m->control_pressed) { 
-                                               remove(outputFileName.c_str()); 
-                                               remove(tempHeader.c_str()); 
-                                               remove(accnosFileName.c_str());
-                                               for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
-                                               for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
-                                               delete chimera;
-                                               return 0;
-                               }
+                               if (m->control_pressed) { remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {    remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
                                
                        #endif
                        
@@ -456,7 +400,7 @@ int ChimeraSlayerCommand::execute(){
 }
 //**********************************************************************************************************************
 
-int ChimeraSlayerCommand::driver(linePair* line, string outputFName, string filename, string accnos){
+int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
        try {
                ofstream out;
                openOutputFile(outputFName, out);
@@ -467,9 +411,12 @@ int ChimeraSlayerCommand::driver(linePair* line, string outputFName, string file
                ifstream inFASTA;
                openInputFile(filename, inFASTA);
 
-               inFASTA.seekg(line->start);
-               
-               for(int i=0;i<line->numSeqs;i++){
+               inFASTA.seekg(filePos->start);
+
+               bool done = false;
+               int count = 0;
+       
+               while (!done) {
                
                        if (m->control_pressed) {       return 1;       }
                
@@ -488,20 +435,24 @@ int ChimeraSlayerCommand::driver(linePair* line, string outputFName, string file
                                        //print results
                                        chimera->print(out, out2);
                                }
+                       count++;
                        }
                        delete candidateSeq;
                        
+                       unsigned long int pos = inFASTA.tellg();
+                       if ((pos == -1) || (pos >= filePos->end)) { break; }
+                       
                        //report progress
-                       if((i+1) % 100 == 0){   m->mothurOut("Processing sequence: " + toString(i+1)); m->mothurOutEndLine();           }
+                       if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
                }
                //report progress
-               if((line->numSeqs) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(line->numSeqs)); m->mothurOutEndLine();         }
+               if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
                
                out.close();
                out2.close();
                inFASTA.close();
                                
-               return 0;
+               return count;
        }
        catch(exception& e) {
                m->errorOut(e, "ChimeraSlayerCommand", "driver");
@@ -573,7 +524,7 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename
        try {
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                int process = 0;
-               //              processIDS.resize(0);
+               int num = 0;
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -583,7 +534,15 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
-                               driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
+                               num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
+                               
+                               //pass numSeqs to parent
+                               ofstream out;
+                               string tempFile = toString(getpid()) + ".temp";
+                               openOutputFile(tempFile, out);
+                               out << num << endl;
+                               out.close();
+                               
                                exit(0);
                        }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
                }
@@ -594,7 +553,15 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename
                        wait(&temp);
                }
                
-               return 0;
+               for (int i = 0; i < processIDS.size(); i++) {
+                       ifstream in;
+                       string tempFile =  toString(processIDS[i]) + ".temp";
+                       openInputFile(tempFile, in);
+                       if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+                       in.close(); remove(tempFile.c_str());
+               }
+               
+               return num;
 #endif         
        }
        catch(exception& e) {
index 95541b2b6aac8a636242dba385c1114267416bb4..c5ef88885d9b4c1e7275c998e8a2b35bbbbba99b 100644 (file)
@@ -29,9 +29,10 @@ private:
 
        struct linePair {
                unsigned long int start;
-               int numSeqs;
-               linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
+               unsigned long int end;
+               linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}
        };
+
        vector<int> processIDS;   //processid
        vector<linePair*> lines;
        
index 128fd01fd9f8b042ba736b97fbf89cdf86626fc8..4c1a0251013afcd77a7137d09be0db5b1c60ffe3 100644 (file)
@@ -460,45 +460,21 @@ int ClassifySeqsCommand::execute(){
                                MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                                
 #else
+               
+                       vector<unsigned long int> positions = divideFile(fastaFileNames[s], processors);
+                               
+                       for (int i = 0; i < (positions.size()-1); i++) {
+                               lines.push_back(new linePair(positions[i], positions[(i+1)]));
+                       }       
+                       
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                        if(processors == 1){
-                               ifstream inFASTA;
-                               openInputFile(fastaFileNames[s], inFASTA);
-                               getNumSeqs(inFASTA, numFastaSeqs);
-                               inFASTA.close();
-                               
-                               lines.push_back(new linePair(0, numFastaSeqs));
-                               
-                               driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
+                               numFastaSeqs = driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
                        }
                        else{
-                               vector<unsigned long int> positions;
                                processIDS.resize(0);
                                
-                               ifstream inFASTA;
-                               openInputFile(fastaFileNames[s], inFASTA);
-                               
-                               string input;
-                               while(!inFASTA.eof()){
-                                       input = getline(inFASTA);
-                                       if (input.length() != 0) {
-                                               if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
-                                       }
-                               }
-                               inFASTA.close();
-                               
-                               numFastaSeqs = positions.size();
-                               
-                               int numSeqsPerProcessor = numFastaSeqs / processors;
-       
-                               for (int i = 0; i < processors; i++) {
-                                       unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
-                                       if(i == processors - 1){
-                                               numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
-                                       }
-                                       lines.push_back(new linePair(startPos, numSeqsPerProcessor));
-                               }
-                               createProcesses(newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]); 
+                               numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]); 
                                
                                rename((newTaxonomyFile + toString(processIDS[0]) + ".temp").c_str(), newTaxonomyFile.c_str());
                                rename((tempTaxonomyFile + toString(processIDS[0]) + ".temp").c_str(), tempTaxonomyFile.c_str());
@@ -512,14 +488,7 @@ int ClassifySeqsCommand::execute(){
                                
                        }
        #else
-                       ifstream inFASTA;
-                       openInputFile(fastaFileNames[s], inFASTA);
-                       getNumSeqs(inFASTA, numFastaSeqs);
-                       inFASTA.close();
-                       
-                       lines.push_back(new linePair(0, numFastaSeqs));
-                       
-                       driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
+                       numFastaSeqs = driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
        #endif  
 #endif
 
@@ -681,11 +650,11 @@ string ClassifySeqsCommand::addUnclassifieds(string tax, int maxlevel) {
 
 /**************************************************************************************************/
 
-void ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, string filename) {
+int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, string filename) {
        try {
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                int process = 0;
-               //              processIDS.resize(0);
+               int num = 0;
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -695,7 +664,15 @@ void ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
-                               driver(lines[process], taxFileName + toString(getpid()) + ".temp", tempTaxFile + toString(getpid()) + ".temp", filename);
+                               num = driver(lines[process], taxFileName + toString(getpid()) + ".temp", tempTaxFile + toString(getpid()) + ".temp", filename);
+                               
+                               //pass numSeqs to parent
+                               ofstream out;
+                               string tempFile = toString(getpid()) + ".temp";
+                               openOutputFile(tempFile, out);
+                               out << num << endl;
+                               out.close();
+
                                exit(0);
                        }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
                }
@@ -705,6 +682,16 @@ void ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile
                        int temp = processIDS[i];
                        wait(&temp);
                }
+               
+               for (int i = 0; i < processIDS.size(); i++) {
+                       ifstream in;
+                       string tempFile =  toString(processIDS[i]) + ".temp";
+                       openInputFile(tempFile, in);
+                       if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+                       in.close(); remove(tempFile.c_str());
+               }
+               
+               return num;
 #endif         
        }
        catch(exception& e) {
@@ -738,7 +725,7 @@ void ClassifySeqsCommand::appendTaxFiles(string temp, string filename) {
 
 //**********************************************************************************************************************
 
-int ClassifySeqsCommand::driver(linePair* line, string taxFName, string tempTFName, string filename){
+int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempTFName, string filename){
        try {
                ofstream outTax;
                openOutputFile(taxFName, outTax);
@@ -748,12 +735,15 @@ int ClassifySeqsCommand::driver(linePair* line, string taxFName, string tempTFNa
        
                ifstream inFASTA;
                openInputFile(filename, inFASTA);
-
-               inFASTA.seekg(line->start);
                
                string taxonomy;
 
-               for(int i=0;i<line->numSeqs;i++){
+               inFASTA.seekg(filePos->start);
+
+               bool done = false;
+               int count = 0;
+       
+               while (!done) {
                        if (m->control_pressed) { return 0; }
                
                        Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA);
@@ -773,19 +763,24 @@ int ClassifySeqsCommand::driver(linePair* line, string taxFName, string tempTFNa
                                        
                                        outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
                                }
-                       }                               
+                               count++;
+                       }
                        delete candidateSeq;
                        
-                       if((i+1) % 100 == 0){
-                               m->mothurOut("Classifying sequence " + toString(i+1)); m->mothurOutEndLine();
-                       }
+                       unsigned long int pos = inFASTA.tellg();
+                       if ((pos == -1) || (pos >= filePos->end)) { break; }
+                       
+                       //report progress
+                       if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
                }
-               
+               //report progress
+               if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
+                               
                inFASTA.close();
                outTax.close();
                outTaxSimple.close();
                
-               return 1;
+               return count;
        }
        catch(exception& e) {
                m->errorOut(e, "ClassifySeqsCommand", "driver");
index 03e6826d1276b4efbdf12b5fce0aeb4cdea8fb08..84f5f9f04d6752728c5f3bf72d5688cafd73b723 100644 (file)
@@ -36,9 +36,10 @@ public:
 private:
        struct linePair {
                unsigned long int start;
-               int numSeqs;
-               linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
+               unsigned long int end;
+               linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}
        };
+
        vector<int> processIDS;   //processid
        vector<linePair*> lines;
        vector<string> fastaFileNames;
@@ -56,7 +57,7 @@ private:
        
        int driver(linePair*, string, string, string);
        void appendTaxFiles(string, string);
-       void createProcesses(string, string, string); 
+       int createProcesses(string, string, string); 
        string addUnclassifieds(string, int);
        
        int MPIReadNamesFile(string);
index e6e616a3a5d3f4b7a81ef1ab91a974a9c993074c..1ebac0eba5a7e7fc54d85b24dc705c0f8a8314b5 100644 (file)
@@ -338,22 +338,18 @@ int FilterSeqsCommand::filterSequences() {
                                MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                                
 #else
+                       vector<unsigned long int> positions = divideFile(fastafileNames[s], processors);
+                               
+                       for (int i = 0; i < (positions.size()-1); i++) {
+                               lines.push_back(new linePair(positions[i], positions[(i+1)]));
+                       }       
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                                if(processors == 1){
-                                       ifstream inFASTA;
-                                       int numFastaSeqs;
-                                       openInputFile(fastafileNames[s], inFASTA);
-                                       getNumSeqs(inFASTA, numFastaSeqs);
-                                       inFASTA.close();
-                                       
-                                       lines.push_back(new linePair(0, numFastaSeqs));
-                                       
+                                       int numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]);
                                        numSeqs += numFastaSeqs;
-                                       
-                                       driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]);
                                }else{
-                                       setLines(fastafileNames[s]);
-                                       createProcessesRunFilter(filter, fastafileNames[s]); 
+                                       int numFastaSeqs = createProcessesRunFilter(filter, fastafileNames[s]); 
+                                       numSeqs += numFastaSeqs;
                                
                                        rename((fastafileNames[s] + toString(processIDS[0]) + ".temp").c_str(), filteredFasta.c_str());
                                
@@ -366,17 +362,8 @@ int FilterSeqsCommand::filterSequences() {
                                
                                if (m->control_pressed) {  return 1; }
                #else
-                               ifstream inFASTA;
-                               int numFastaSeqs;
-                               openInputFile(fastafileNames[s], inFASTA);
-                               getNumSeqs(inFASTA, numFastaSeqs);
-                               inFASTA.close();
-                                       
-                               lines.push_back(new linePair(0, numFastaSeqs));
-                               
+                               numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]);
                                numSeqs += numFastaSeqs;
-                               
-                               driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]);
 
                                if (m->control_pressed) {  return 1; }
                #endif
@@ -466,7 +453,7 @@ int FilterSeqsCommand::driverMPIRun(int start, int num, MPI_File& inMPI, MPI_Fil
 }
 #endif
 /**************************************************************************************/
-int FilterSeqsCommand::driverRunFilter(string F, string outputFilename, string inputFilename, linePair* line) {        
+int FilterSeqsCommand::driverRunFilter(string F, string outputFilename, string inputFilename, linePair* filePos) {     
        try {
                ofstream out;
                openOutputFile(outputFilename, out);
@@ -474,13 +461,16 @@ int FilterSeqsCommand::driverRunFilter(string F, string outputFilename, string i
                ifstream in;
                openInputFile(inputFilename, in);
                                
-               in.seekg(line->start);
-               
-               for(int i=0;i<line->num;i++){
+               in.seekg(filePos->start);
+
+               bool done = false;
+               int count = 0;
+       
+               while (!done) {
                                
                                if (m->control_pressed) { in.close(); out.close(); return 0; }
                                
-                               Sequence seq(in);
+                               Sequence seq(in); gobble(in);
                                if (seq.getName() != "") {
                                        string align = seq.getAligned();
                                        string filterSeq = "";
@@ -492,20 +482,23 @@ int FilterSeqsCommand::driverRunFilter(string F, string outputFilename, string i
                                        }
                                        
                                        out << '>' << seq.getName() << endl << filterSeq << endl;
-                               }
-                               gobble(in);
-                               
+                               count++;
+                       }
+                       
+                       unsigned long int pos = in.tellg();
+                       if ((pos == -1) || (pos >= filePos->end)) { break; }
+                       
                        //report progress
-                       if((i+1) % 100 == 0){   m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
+                       if((count) % 100 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine();           }
                }
-               
                //report progress
-               if((line->num) % 100 != 0){     m->mothurOut(toString(line->num)); m->mothurOutEndLine();               }
+               if((count) % 100 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine();           }
+               
                
                out.close();
                in.close();
                
-               return 0;
+               return count;
        }
        catch(exception& e) {
                m->errorOut(e, "FilterSeqsCommand", "driverRunFilter");
@@ -518,7 +511,7 @@ int FilterSeqsCommand::createProcessesRunFilter(string F, string filename) {
        try {
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                int process = 0;
-               int exitCommand = 1;
+               int num = 0;
                processIDS.clear();
                
                //loop through and create all the processes you want
@@ -530,7 +523,15 @@ int FilterSeqsCommand::createProcessesRunFilter(string F, string filename) {
                                process++;
                        }else if (pid == 0){
                                string filteredFasta = filename + toString(getpid()) + ".temp";
-                               driverRunFilter(F, filteredFasta, filename, lines[process]);
+                               num = driverRunFilter(F, filteredFasta, filename, lines[process]);
+                               
+                               //pass numSeqs to parent
+                               ofstream out;
+                               string tempFile = toString(getpid()) + ".temp";
+                               openOutputFile(tempFile, out);
+                               out << num << endl;
+                               out.close();
+                               
                                exit(0);
                        }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
                }
@@ -539,9 +540,18 @@ int FilterSeqsCommand::createProcessesRunFilter(string F, string filename) {
                for (int i=0;i<processors;i++) { 
                        int temp = processIDS[i];
                        wait(&temp);
+               }       
+                                       
+               for (int i = 0; i < processIDS.size(); i++) {
+                       ifstream in;
+                       string tempFile =  toString(processIDS[i]) + ".temp";
+                       openInputFile(tempFile, in);
+                       if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+                       in.close(); remove(tempFile.c_str());
                }
+
                
-               return exitCommand;
+               return num;
 #endif         
        }
        catch(exception& e) {
@@ -636,37 +646,24 @@ string FilterSeqsCommand::createFilter() {
                                MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                                
 #else
+               vector<unsigned long int> positions = divideFile(fastafileNames[s], processors);
+                               
+               for (int i = 0; i < (positions.size()-1); i++) {
+                       lines.push_back(new linePair(positions[i], positions[(i+1)]));
+               }       
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                                if(processors == 1){
-                                       ifstream inFASTA;
-                                       int numFastaSeqs;
-                                       openInputFile(fastafileNames[s], inFASTA);
-                                       getNumSeqs(inFASTA, numFastaSeqs);
-                                       inFASTA.close();
-                                       
+                                       int numFastaSeqs = driverCreateFilter(F, fastafileNames[s], lines[0]);
                                        numSeqs += numFastaSeqs;
-                                       
-                                       lines.push_back(new linePair(0, numFastaSeqs));
-                                       
-                                       driverCreateFilter(F, fastafileNames[s], lines[0]);
                                }else{
-                                       setLines(fastafileNames[s]);                                    
-                                       createProcessesCreateFilter(F, fastafileNames[s]); 
+                                       int numFastaSeqs = createProcessesCreateFilter(F, fastafileNames[s]); 
+                                       numSeqs += numFastaSeqs;
                                }
                                
                                if (m->control_pressed) {  return filterString; }
                #else
-                               ifstream inFASTA;
-                               int numFastaSeqs;
-                               openInputFile(fastafileNames[s], inFASTA);
-                               getNumSeqs(inFASTA, numFastaSeqs);
-                               inFASTA.close();
-                               
+                               numFastaSeqs = driverCreateFilter(F, fastafileNames[s], lines[0]);
                                numSeqs += numFastaSeqs;
-                               
-                               lines.push_back(new linePair(0, numFastaSeqs));
-                               
-                               driverCreateFilter(F, fastafileNames[s], lines[0]);
                                if (m->control_pressed) {  return filterString; }
                #endif
 #endif
@@ -765,37 +762,42 @@ string FilterSeqsCommand::createFilter() {
        }
 }
 /**************************************************************************************/
-int FilterSeqsCommand::driverCreateFilter(Filters& F, string filename, linePair* line) {       
+int FilterSeqsCommand::driverCreateFilter(Filters& F, string filename, linePair* filePos) {    
        try {
                
                ifstream in;
                openInputFile(filename, in);
                                
-               in.seekg(line->start);
-               
-               for(int i=0;i<line->num;i++){
+               in.seekg(filePos->start);
+
+               bool done = false;
+               int count = 0;
+       
+               while (!done) {
                                
                        if (m->control_pressed) { in.close(); return 1; }
                                        
-                       Sequence seq(in);
+                       Sequence seq(in); gobble(in);
                        if (seq.getName() != "") {
                                        if (seq.getAligned().length() != alignmentLength) { m->mothurOut("Sequences are not all the same length, please correct."); m->mothurOutEndLine(); m->control_pressed = true;  }
                                        
                                        if(trump != '*'){       F.doTrump(seq); }
                                        if(isTrue(vertical) || soft != 0){      F.getFreqs(seq);        }
                                        cout.flush();
+                                       count++;
                        }
                        
+                       unsigned long int pos = in.tellg();
+                       if ((pos == -1) || (pos >= filePos->end)) { break; }
+                       
                        //report progress
-                       if((i+1) % 100 == 0){   m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
+                       if((count) % 100 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine();           }
                }
-               
                //report progress
-               if((line->num) % 100 != 0){     m->mothurOut(toString(line->num)); m->mothurOutEndLine();               }
-               
+               if((count) % 100 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine();           }
                in.close();
                
-               return 0;
+               return count;
        }
        catch(exception& e) {
                m->errorOut(e, "FilterSeqsCommand", "driverCreateFilter");
@@ -855,7 +857,7 @@ int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename)
        try {
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                int process = 0;
-               int exitCommand = 1;
+               int num = 0;
                processIDS.clear();
                
                //loop through and create all the processes you want
@@ -866,13 +868,14 @@ int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename)
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
-                               driverCreateFilter(F, filename, lines[process]);
+                               num = driverCreateFilter(F, filename, lines[process]);
                                
                                //write out filter counts to file
                                filename += toString(getpid()) + "filterValues.temp";
                                ofstream out;
                                openOutputFile(filename, out);
                                
+                               out << num << endl;
                                for (int k = 0; k < alignmentLength; k++) {             out << F.a[k] << '\t'; }  out << endl;
                                for (int k = 0; k < alignmentLength; k++) {             out << F.t[k] << '\t'; }  out << endl;
                                for (int k = 0; k < alignmentLength; k++) {             out << F.g[k] << '\t'; }  out << endl;
@@ -897,7 +900,8 @@ int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename)
                        ifstream in;
                        openInputFile(tempFilename, in);
                        
-                       int temp;
+                       int temp, tempNum;
+                       in >> tempNum; gobble(in); num += tempNum;
                        for (int k = 0; k < alignmentLength; k++) {             in >> temp; F.a[k] += temp; }           gobble(in);
                        for (int k = 0; k < alignmentLength; k++) {             in >> temp; F.t[k] += temp; }           gobble(in);
                        for (int k = 0; k < alignmentLength; k++) {             in >> temp; F.g[k] += temp; }           gobble(in);
@@ -908,7 +912,7 @@ int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename)
                        remove(tempFilename.c_str());
                }
                
-               return exitCommand;
+               return num;
 #endif         
        }
        catch(exception& e) {
@@ -916,63 +920,4 @@ int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename)
                exit(1);
        }
 }
-/**************************************************************************************************/
-
-int FilterSeqsCommand::setLines(string filename) {
-       try {
-               
-               vector<unsigned long int> positions;
-               bufferSizes.clear();
-               
-               ifstream inFASTA;
-               openInputFile(filename, inFASTA);
-                       
-               string input;
-               while(!inFASTA.eof()){  
-                       input = getline(inFASTA);
-
-                       if (input.length() != 0) {
-                               if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);    }
-                       }
-               }
-               inFASTA.close();
-               
-               int numFastaSeqs = positions.size();
-       
-               FILE * pFile;
-               unsigned long int size;
-               
-               //get num bytes in file
-               pFile = fopen (filename.c_str(),"rb");
-               if (pFile==NULL) perror ("Error opening file");
-               else{
-                       fseek (pFile, 0, SEEK_END);
-                       size=ftell (pFile);
-                       fclose (pFile);
-               }
-       
-               numSeqs += numFastaSeqs;
-               
-               int numSeqsPerProcessor = numFastaSeqs / processors;
-               
-               for (int i = 0; i < processors; i++) {
-
-                       unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
-                       if(i == processors - 1){
-                               numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
-                               bufferSizes.push_back(size - startPos);
-                       }else{  
-                               unsigned long int myEnd = positions[ (i+1) * numSeqsPerProcessor ];
-                               bufferSizes.push_back(myEnd-startPos);
-                       }
-                       lines.push_back(new linePair(startPos, numSeqsPerProcessor));
-               }
-               
-               return 0;
-       }
-       catch(exception& e) {
-               m->errorOut(e, "FilterSeqsCommand", "setLines");
-               exit(1);
-       }
-}
 /**************************************************************************************/
index e068405816e3d8c0bc1e21b2fb8316f779009526..cb02732d1ede7c7a70ec5a518426bca18a0019bf 100644 (file)
@@ -25,9 +25,10 @@ public:
 private:\r
        struct linePair {\r
                unsigned long int start;\r
-               int num;\r
-               linePair(unsigned long int i, long int j) : start(i), num(j) {}\r
+               unsigned long int end;\r
+               linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}\r
        };\r
+\r
        vector<linePair*> lines;\r
        vector<int> processIDS;\r
 \r
@@ -52,8 +53,6 @@ private:
        int driverMPIRun(int, int, MPI_File&, MPI_File&, vector<unsigned long int>&);\r
        int MPICreateFilter(int, int, Filters&, MPI_File&, vector<unsigned long int>&); \r
        #endif\r
-       int setLines(string);\r
-       \r
        \r
 };\r
 \r
index affa4636ab4716746665b6b7c73ccccf1f7b9354..74daaa2cc87cea237619fec03afdf28a8095f65f 100644 (file)
@@ -29,6 +29,7 @@ string GlobalData::getOrderFile()             {       return orderfile;               }
 string GlobalData::getOrderGroupFile() {       return ordergroup;              }
 string GlobalData::getTreeFile()               {       return treefile;                }
 string GlobalData::getSharedFile()             {       return sharedfile;              }       
+string GlobalData::getRelAbundFile()   {       return relAbundfile;    }       
 string GlobalData::getFormat()                 {       return format;                  }
 
 void GlobalData::setListFile(string file)              {       listfile = file;        inputFileName = file;                                   }
@@ -39,6 +40,7 @@ void GlobalData::setPhylipFile(string file)           {       phylipfile = file;    inputFileNa
 void GlobalData::setColumnFile(string file)            {       columnfile = file;    inputFileName = file;                                     }
 void GlobalData::setGroupFile(string file)             {       groupfile = file;                                                                                       }
 void GlobalData::setSharedFile(string file)            {       sharedfile = file;      inputFileName = file;                                   }
+void GlobalData::setRelAbundFile(string file)  {       relAbundfile = file;    inputFileName = file;                           }
 void GlobalData::setNameFile(string file)              {       namefile = file;                }
 void GlobalData::setOrderFile(string file)             {       orderfile = file;               }
 void GlobalData::setOrderGroupFile(string file)        {       ordergroup = file;              }
@@ -83,6 +85,7 @@ void GlobalData::clear() {
 //     fastafile               =   ""; //do we need this?
        treefile                =       "";
        sharedfile              =       "";
+       relAbundfile    =       "";
        format = "";
 }
 
index 301527a3f9b1e07d7f82ecbd4a04778ec0f39c37..bfb8a0688e1fe6e811ef83dc08a803c2753fae46 100644 (file)
@@ -61,6 +61,7 @@ public:
        string getOrderGroupFile();
        string getTreeFile();
        string getSharedFile();
+       string getRelAbundFile();
        string getFormat();     //do we need this?
 
 
@@ -73,6 +74,7 @@ public:
        void setRabundFile(string);
        void setSabundFile(string);
        void setSharedFile(string);
+       void setRelAbundFile(string);
        void setOrderFile(string file);
        void setOrderGroupFile(string file);
        void setFormat(string); //do we need this?
@@ -87,7 +89,7 @@ public:
        
 private:
        MothurOut* m;
-       string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, treefile, sharedfile, format, distfile, ordergroup;
+       string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, treefile, sharedfile, format, distfile, ordergroup, relAbundfile;
 
        static GlobalData* _uniqueInstance;
        GlobalData( const GlobalData& ); // Disable copy constructor
index 765c90ab27026ee256f15fc7efc582a06eec5303..2622e2515e87ed9914f4491b716dc03c501d5411 100644 (file)
--- a/mothur.h
+++ b/mothur.h
@@ -1134,6 +1134,72 @@ inline vector<unsigned long int> setFilePosEachLine(string filename, int& num) {
                        return positions;
 }
 /**************************************************************************************************/
+
+inline vector<unsigned long int> divideFile(string filename, int& proc) {
+       try{
+       
+               vector<unsigned long int> filePos;
+               filePos.push_back(0);
+               
+               FILE * pFile;
+               unsigned long int size;
+               
+               //get num bytes in file
+               pFile = fopen (filename.c_str(),"rb");
+               if (pFile==NULL) perror ("Error opening file");
+               else{
+                       fseek (pFile, 0, SEEK_END);
+                       size=ftell (pFile);
+                       fclose (pFile);
+               }
+       
+               //estimate file breaks
+               unsigned long int chunkSize = 0;
+               chunkSize = size / proc;
+               
+               //file to small to divide by processors
+               if (chunkSize == 0)  {  proc = 1;       filePos.push_back(size); return filePos;        }
+       
+               //for each process seekg to closest file break and search for next '>' char. make that the filebreak
+               for (int i = 0; i < proc; i++) {
+                       unsigned long int spot = (i+1) * chunkSize;
+                       
+                       ifstream in;
+                       openInputFile(filename, in);
+                       in.seekg(spot);
+                       
+                       //look for next '>'
+                       unsigned long int newSpot = spot;
+                       while (!in.eof()) {
+                          char c = in.get();
+                          if (c == '>') {   in.putback(c); newSpot = in.tellg(); break;  }
+                       }
+                       
+                       //there was not another sequence before the end of the file
+                       if (newSpot == spot) {  break;  }
+                       else {   filePos.push_back(newSpot);  }
+                       
+                       in.close();
+               }
+               
+               //save end pos
+               filePos.push_back(size);
+               
+               //sanity check filePos
+               for (int i = 0; i < (filePos.size()-1); i++) {
+                       if (filePos[(i+1)] <= filePos[i]) {  filePos.erase(filePos.begin()+(i+1)); i--; }
+               }
+
+               proc = (filePos.size() - 1);
+               
+               return filePos;
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function divideFile. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
+               exit(1);
+       }
+}
+/**************************************************************************************************/
 inline bool checkReleaseVersion(ifstream& file, string version) {
        try {
                
index c4bf523fb30be6f1bd84dc81eac85e3e92c9e6f8..5b9cde8c84b10e0d4f6dd938dbf68191000930f4 100644 (file)
@@ -21,7 +21,7 @@ ReadOtuCommand::ReadOtuCommand(string option)  {
                
                else {
                        //valid paramters for this command
-                       string Array[] =  {"list","order","shared", "label","group","sabund", "rabund","groups","ordergroup","outputdir","inputdir"};
+                       string Array[] =  {"list","order","shared","relabund","label","group","sabund", "rabund","groups","ordergroup","outputdir","inputdir"};
                        vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                        
                        OptionParser parser(option);
@@ -97,7 +97,14 @@ ReadOtuCommand::ReadOtuCommand(string option)  {
                                        //if the user has not given a path then, add inputdir. else leave path alone.
                                        if (path == "") {       parameters["ordergroup"] = inputDir + it->second;               }
                                }
-
+                               
+                               it = parameters.find("relabund");
+                               //user has given a template file
+                               if(it != parameters.end()){ 
+                                       path = hasPath(it->second);
+                                       //if the user has not given a path then, add inputdir. else leave path alone.
+                                       if (path == "") {       parameters["relabund"] = inputDir + it->second;         }
+                               }
                        }
 
                        
@@ -130,6 +137,12 @@ ReadOtuCommand::ReadOtuCommand(string option)  {
                        else if (sharedfile == "not found") { sharedfile = ""; }
                        else {  globaldata->setSharedFile(sharedfile); globaldata->setFormat("sharedfile");     }
                        
+                       relAbundfile = validParameter.validFile(parameters, "relabund", true);
+                       if (relAbundfile == "not open") { abort = true; }       
+                       else if (relAbundfile == "not found") { relAbundfile = ""; }
+                       else {  globaldata->setRelAbundFile(relAbundfile); globaldata->setFormat("relabund");   }
+
+                       
                        groupfile = validParameter.validFile(parameters, "group", true);
                        if (groupfile == "not open") { abort = true; }  
                        else if (groupfile == "not found") { groupfile = ""; }
@@ -154,8 +167,8 @@ ReadOtuCommand::ReadOtuCommand(string option)  {
                        if ((listfile != "") && (groupfile != "")) { globaldata->setFormat("shared"); }
                        
                        //you have not given a file
-                       if ((listfile == "") && (sharedfile == "") && (rabundfile == "") && (sabundfile == "")) {
-                               m->mothurOut("You must enter either a listfile, rabundfile, sabundfile or a sharedfile with the read.otu command. "); m->mothurOutEndLine(); abort = true; 
+                       if ((listfile == "") && (sharedfile == "") && (rabundfile == "") && (sabundfile == "") && (relAbundfile == "")) {
+                               m->mothurOut("You must enter either a listfile, rabundfile, sabundfile, relabund or a sharedfile with the read.otu command. "); m->mothurOutEndLine(); abort = true; 
                        }
                
                        //check for optional parameter and set defaults
@@ -194,7 +207,7 @@ void ReadOtuCommand::help(){
        try {
                m->mothurOut("The read.otu command must be run before you execute a collect.single, rarefaction.single, summary.single, \n");
                m->mothurOut("collect.shared, rarefaction.shared, summary.shared heatmap.bin, heatmap.sim or venn command.   Mothur will generate a .list, .rabund and .sabund upon completion of the cluster command \n");
-               m->mothurOut("or you may use your own. The read.otu command parameter options are list, rabund, sabund, shared, group, order, ordergroup, label and groups.\n");
+               m->mothurOut("or you may use your own. The read.otu command parameter options are list, rabund, sabund, shared, relabund, group, order, ordergroup, label and groups.\n");
                m->mothurOut("The read.otu command can be used in two ways.  The first is to read a list, rabund or sabund and run the collect.single, rarefaction.single or summary.single.\n");
                m->mothurOut("For this use the read.otu command should be in the following format: read.otu(list=yourListFile, order=yourOrderFile, label=yourLabels).\n");
                m->mothurOut("The list, rabund or sabund parameter is required, but you may only use one of them.\n");
index b5e32d897b2dd824d81b257fff12517a6a90bc6f..bf9ccc00d0e7151ea4dce787b42265f092416bbd 100644 (file)
@@ -29,7 +29,7 @@ private:
        InputData* input;
        Command* shared;
        GroupMap* groupMap;
-       string filename, listfile, orderfile, sharedfile, label, groupfile, sabundfile, rabundfile, format, groups, outputDir, ordergroupfile;
+       string filename, listfile, orderfile, sharedfile, label, groupfile, sabundfile, rabundfile, format, groups, outputDir, ordergroupfile, relAbundfile;
        vector<string> Groups;
 
        bool abort, allLines;
index b2fdaff640557e7f9b840748db2afbde438bf5e5..fb97c0a4562ccf3bf887aae0ceafb2f529be65fa 100644 (file)
@@ -290,50 +290,22 @@ int ScreenSeqsCommand::execute(){
                        MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
                                        
 #else
-                                       
+                       vector<unsigned long int> positions = divideFile(fastafile, processors);
+                               
+                       for (int i = 0; i < (positions.size()-1); i++) {
+                               lines.push_back(new linePair(positions[i], positions[(i+1)]));
+                       }       
+                                               
        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                        if(processors == 1){
-                               ifstream inFASTA;
-                               openInputFile(fastafile, inFASTA);
-                               getNumSeqs(inFASTA, numFastaSeqs);
-                               inFASTA.close();
-                               
-                               lines.push_back(new linePair(0, numFastaSeqs));
-                               
-                               driver(lines[0], goodSeqFile, badSeqFile, badAccnosFile, fastafile, badSeqNames);
+                               numFastaSeqs = driver(lines[0], goodSeqFile, badSeqFile, badAccnosFile, fastafile, badSeqNames);
                                
                                if (m->control_pressed) { remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; }
                                
                        }else{
-                               vector<unsigned long int> positions;
                                processIDS.resize(0);
                                
-                               ifstream inFASTA;
-                               openInputFile(fastafile, inFASTA);
-                               
-                               string input;
-                               while(!inFASTA.eof()){
-                                       input = getline(inFASTA);
-                                       if (input.length() != 0) {
-                                               if(input[0] == '>'){    unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
-                                       }
-                               }
-                               inFASTA.close();
-                               
-                               numFastaSeqs = positions.size();
-                       
-                               int numSeqsPerProcessor = numFastaSeqs / processors;
-                                       
-                               for (int i = 0; i < processors; i++) {
-                                       unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
-                                       if(i == processors - 1){
-                                               numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
-                                       }
-                                       lines.push_back(new linePair(startPos, numSeqsPerProcessor));
-               
-                               }
-                               
-                               createProcesses(goodSeqFile, badSeqFile, badAccnosFile, fastafile, badSeqNames); 
+                               numFastaSeqs = createProcesses(goodSeqFile, badSeqFile, badAccnosFile, fastafile, badSeqNames); 
                                
                                rename((goodSeqFile + toString(processIDS[0]) + ".temp").c_str(), goodSeqFile.c_str());
                                rename((badSeqFile + toString(processIDS[0]) + ".temp").c_str(), badSeqFile.c_str());
@@ -368,14 +340,7 @@ int ScreenSeqsCommand::execute(){
                                }
                        }
        #else
-                       ifstream inFASTA;
-                       openInputFile(fastafile, inFASTA);
-                       getNumSeqs(inFASTA, numFastaSeqs);
-                       inFASTA.close();
-                       
-                       lines.push_back(new linePair(0, numFastaSeqs));
-                       
-                       driver(lines[0], goodSeqFile, badSeqFile, badAccnosFile, fastafile, badSeqNames);
+                       numFastaSeqs = driver(lines[0], goodSeqFile, badSeqFile, badAccnosFile, fastafile, badSeqNames);
                        
                        if (m->control_pressed) { remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; }
                        
@@ -692,7 +657,7 @@ int ScreenSeqsCommand::screenAlignReport(set<string> badSeqNames){
 }
 //**********************************************************************************************************************
 
-int ScreenSeqsCommand::driver(linePair* line, string goodFName, string badFName, string badAccnosFName, string filename, set<string>& badSeqNames){
+int ScreenSeqsCommand::driver(linePair* filePos, string goodFName, string badFName, string badAccnosFName, string filename, set<string>& badSeqNames){
        try {
                ofstream goodFile;
                openOutputFile(goodFName, goodFile);
@@ -706,13 +671,16 @@ int ScreenSeqsCommand::driver(linePair* line, string goodFName, string badFName,
                ifstream inFASTA;
                openInputFile(filename, inFASTA);
 
-               inFASTA.seekg(line->start);
+               inFASTA.seekg(filePos->start);
+
+               bool done = false;
+               int count = 0;
        
-               for(int i=0;i<line->numSeqs;i++){
+               while (!done) {
                
                        if (m->control_pressed) {  return 0; }
                        
-                       Sequence currSeq(inFASTA);
+                       Sequence currSeq(inFASTA); gobble(inFASTA);
                        if (currSeq.getName() != "") {
                                bool goodSeq = 1;               //      innocent until proven guilty
                                if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
@@ -730,9 +698,17 @@ int ScreenSeqsCommand::driver(linePair* line, string goodFName, string badFName,
                                        badAccnosFile << currSeq.getName() << endl;
                                        badSeqNames.insert(currSeq.getName());
                                }
+                       count++;
                        }
-                       gobble(inFASTA);
+                       
+                       unsigned long int pos = inFASTA.tellg();
+                       if ((pos == -1) || (pos >= filePos->end)) { break; }
+                       
+                       //report progress
+                       if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
                }
+               //report progress
+               if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
                
                        
                goodFile.close();
@@ -740,7 +716,7 @@ int ScreenSeqsCommand::driver(linePair* line, string goodFName, string badFName,
                badFile.close();
                badAccnosFile.close();
                
-               return 1;
+               return count;
        }
        catch(exception& e) {
                m->errorOut(e, "ScreenSeqsCommand", "driver");
@@ -838,7 +814,7 @@ int ScreenSeqsCommand::createProcesses(string goodFileName, string badFileName,
        try {
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                int process = 0;
-               int exitCommand = 1;
+               int num = 0;
                
                //loop through and create all the processes you want
                while (process != processors) {
@@ -848,7 +824,15 @@ int ScreenSeqsCommand::createProcesses(string goodFileName, string badFileName,
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
-                               exitCommand = driver(lines[process], goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", badAccnos + toString(getpid()) + ".temp", filename, badSeqNames);
+                               num = driver(lines[process], goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", badAccnos + toString(getpid()) + ".temp", filename, badSeqNames);
+                               
+                               //pass numSeqs to parent
+                               ofstream out;
+                               string tempFile = toString(getpid()) + ".temp";
+                               openOutputFile(tempFile, out);
+                               out << num << endl;
+                               out.close();
+                               
                                exit(0);
                        }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
                }
@@ -859,7 +843,15 @@ int ScreenSeqsCommand::createProcesses(string goodFileName, string badFileName,
                        wait(&temp);
                }
                
-               return exitCommand;
+               for (int i = 0; i < processIDS.size(); i++) {
+                       ifstream in;
+                       string tempFile =  toString(processIDS[i]) + ".temp";
+                       openInputFile(tempFile, in);
+                       if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+                       in.close(); remove(tempFile.c_str());
+               }
+               
+               return num;
 #endif         
        }
        catch(exception& e) {
index 071724f79cb45c69bb4c4237c072ddc462188439..8f62ae48af466148c56b2b7b53aef7240cb95761 100644 (file)
@@ -24,9 +24,10 @@ private:
 
        struct linePair {
                unsigned long int start;
-               int numSeqs;
-               linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
+               unsigned long int end;
+               linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}
        };
+
        vector<int> processIDS;   //processid
        vector<linePair*> lines;
 
index 5009ff5c2eeb87e1cbe49fdd0f240355bfcaaf74..7362c1b6cfe9e8b11c03ff4c69d3e32b83531942 100644 (file)
@@ -224,19 +224,17 @@ int SeqSummaryCommand::execute(){
                                
                                MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
 #else
+                       vector<unsigned long int> positions = divideFile(fastafile, processors);
+                               
+                       for (int i = 0; i < (positions.size()-1); i++) {
+                               lines.push_back(new linePair(positions[i], positions[(i+1)]));
+                       }       
+
                #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                                if(processors == 1){
-                                       ifstream inFASTA;
-                                       openInputFile(fastafile, inFASTA);
-                                       getNumSeqs(inFASTA, numSeqs);
-                                       inFASTA.close();        
-                                               
-                                       lines.push_back(new linePair(0, numSeqs));
-                                       
-                                       driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile, lines[0]);
+                                       numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile, lines[0]);
                                }else{
-                                       numSeqs = setLines(fastafile);                                  
-                                       createProcessesCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile); 
+                                       numSeqs = createProcessesCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile); 
                                        
                                        rename((summaryFile + toString(processIDS[0]) + ".temp").c_str(), summaryFile.c_str());
                                        //append files
@@ -248,14 +246,7 @@ int SeqSummaryCommand::execute(){
                                
                                if (m->control_pressed) {  return 0; }
                #else
-                               ifstream inFASTA;
-                               openInputFile(fastafile, inFASTA);
-                               getNumSeqs(inFASTA, numSeqs);
-                               inFASTA.close();
-                               
-                               lines.push_back(new linePair(0, numSeqs));
-                               
-                               driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile, lines[0]);
+                               numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile, lines[0]);
                                if (m->control_pressed) {  return 0; }
                #endif
 #endif
@@ -313,27 +304,30 @@ int SeqSummaryCommand::execute(){
        }
 }
 /**************************************************************************************/
-int SeqSummaryCommand::driverCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, string filename, string sumFile, linePair* line) {     
+int SeqSummaryCommand::driverCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, string filename, string sumFile, linePair* filePos) {  
        try {
                
                ofstream outSummary;
                openOutputFile(sumFile, outSummary);
                
                //print header if you are process 0
-               if (line->start == 0) {
+               if (filePos->start == 0) {
                        outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer" << endl;   
                }
                                
                ifstream in;
                openInputFile(filename, in);
                                
-               in.seekg(line->start);
-               
-               for(int i=0;i<line->num;i++){
+               in.seekg(filePos->start);
+
+               bool done = false;
+               int count = 0;
+       
+               while (!done) {
                                
                        if (m->control_pressed) { in.close(); outSummary.close(); return 1; }
                                        
-                       Sequence current(in);
+                       Sequence current(in); gobble(in);
        
                        if (current.getName() != "") {
                                startPosition.push_back(current.getStartPos());
@@ -346,12 +340,21 @@ int SeqSummaryCommand::driverCreateSummary(vector<int>& startPosition, vector<in
                                outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
                                outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
                                outSummary << current.getLongHomoPolymer() << endl;
+                               count++;
                        }
-                       gobble(in);
+                       
+                       unsigned long int pos = in.tellg();
+                       if ((pos == -1) || (pos >= filePos->end)) { break; }
+                       
+                       //report progress
+                       if((count) % 100 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine();           }
                }
+               //report progress
+               if((count) % 100 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine();           }
+               
                in.close();
                
-               return 0;
+               return count;
        }
        catch(exception& e) {
                m->errorOut(e, "SeqSummaryCommand", "driverCreateSummary");
@@ -418,18 +421,33 @@ int SeqSummaryCommand::createProcessesCreateSummary(vector<int>& startPosition,
        try {
 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
                int process = 0;
-               int exitCommand = 1;
+               int num = 0;
                processIDS.clear();
                
                //loop through and create all the processes you want
                while (process != processors) {
-                       int pid = vfork();
+                       int pid = fork();
                        
                        if (pid > 0) {
                                processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
                                process++;
                        }else if (pid == 0){
-                               driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, sumFile + toString(getpid()) + ".temp", lines[process]);
+                               num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, sumFile + toString(getpid()) + ".temp", lines[process]);
+                               
+                               //pass numSeqs to parent
+                               ofstream out;
+                               string tempFile = toString(getpid()) + ".temp";
+                               openOutputFile(tempFile, out);
+                               
+                               out << num << endl;
+                               for (int k = 0; k < startPosition.size(); k++)          {               out << startPosition[k] << '\t'; }  out << endl;
+                               for (int k = 0; k < endPosition.size(); k++)            {               out << endPosition[k] << '\t'; }  out << endl;
+                               for (int k = 0; k < seqLength.size(); k++)                      {               out << seqLength[k] << '\t'; }  out << endl;
+                               for (int k = 0; k < ambigBases.size(); k++)                     {               out << ambigBases[k] << '\t'; }  out << endl;
+                               for (int k = 0; k < longHomoPolymer.size(); k++)        {               out << longHomoPolymer[k] << '\t'; }  out << endl;
+                               
+                               out.close();
+                               
                                exit(0);
                        }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
                }
@@ -440,65 +458,29 @@ int SeqSummaryCommand::createProcessesCreateSummary(vector<int>& startPosition,
                        wait(&temp);
                }
                
-               return exitCommand;
-#endif         
-       }
-       catch(exception& e) {
-               m->errorOut(e, "SeqSummaryCommand", "createProcessesCreateSummary");
-               exit(1);
-       }
-}
-/**************************************************************************************************/
-
-int SeqSummaryCommand::setLines(string filename) {
-       try {
-               
-               vector<unsigned long int> positions;
-               
-               ifstream inFASTA;
-               openInputFile(filename, inFASTA);
+               //parent reads in and combine Filter info
+               for (int i = 0; i < processIDS.size(); i++) {
+                       string tempFilename = toString(processIDS[i]) + ".temp";
+                       ifstream in;
+                       openInputFile(tempFilename, in);
                        
-               string input;
-               while(!inFASTA.eof()){  
-                       input = getline(inFASTA);
-
-                       if (input.length() != 0) {
-                               if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);    }
-                       }
-               }
-               inFASTA.close();
-               
-               int numFastaSeqs = positions.size();
-       
-               FILE * pFile;
-               unsigned long int size;
-               
-               //get num bytes in file
-               pFile = fopen (filename.c_str(),"rb");
-               if (pFile==NULL) perror ("Error opening file");
-               else{
-                       fseek (pFile, 0, SEEK_END);
-                       size=ftell (pFile);
-                       fclose (pFile);
-               }
-               
-               int numSeqsPerProcessor = numFastaSeqs / processors;
-               
-               for (int i = 0; i < processors; i++) {
-
-                       unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
-                       if(i == processors - 1){
-                               numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
-                       }else{  
-                               unsigned long int myEnd = positions[ (i+1) * numSeqsPerProcessor ];
-                       }
-                       lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+                       int temp, tempNum;
+                       in >> tempNum; gobble(in); num += tempNum;
+                       for (int k = 0; k < tempNum; k++)                       {               in >> temp; startPosition.push_back(temp);              }               gobble(in);
+                       for (int k = 0; k < tempNum; k++)                       {               in >> temp; endPosition.push_back(temp);                }               gobble(in);
+                       for (int k = 0; k < tempNum; k++)                       {               in >> temp; seqLength.push_back(temp);                  }               gobble(in);
+                       for (int k = 0; k < tempNum; k++)                       {               in >> temp; ambigBases.push_back(temp);                 }               gobble(in);
+                       for (int k = 0; k < tempNum; k++)                       {               in >> temp; longHomoPolymer.push_back(temp);    }               gobble(in);
+                               
+                       in.close();
+                       remove(tempFilename.c_str());
                }
                
-               return numFastaSeqs;
+               return num;
+#endif         
        }
        catch(exception& e) {
-               m->errorOut(e, "SeqSummaryCommand", "setLines");
+               m->errorOut(e, "SeqSummaryCommand", "createProcessesCreateSummary");
                exit(1);
        }
 }
index a62572673c3d7497007ab959157fd41dfb6b124b..6abf06a6fcf11bba1a714c6b3efdfd2be37b7285 100644 (file)
@@ -27,15 +27,15 @@ private:
        
        struct linePair {
                unsigned long int start;
-               int num;
-               linePair(unsigned long int i, long int j) : start(i), num(j) {}
+               unsigned long int end;
+               linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}
        };
+
        vector<linePair*> lines;
        vector<int> processIDS;
        
        int createProcessesCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, string);
        int driverCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, string, linePair*);       
-       int setLines(string);
 
        #ifdef USE_MPI
        int MPICreateSummary(int, int, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, MPI_File&, MPI_File&, vector<unsigned long int>&);