int startIndex = pid * numSeqsPerProcessor;
if(pid == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor; }
-
//align your part
driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIAlign, outMPIReport, outMPIAccnos, MPIPos);
}
#else
-
+ vector<unsigned long int> positions = divideFile(candidateFileNames[s], processors);
+
+ for (int i = 0; i < (positions.size()-1); i++) {
+ lines.push_back(new linePair(positions[i], positions[(i+1)]));
+ }
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
if(processors == 1){
- ifstream inFASTA;
- openInputFile(candidateFileNames[s], inFASTA);
- getNumSeqs(inFASTA, numFastaSeqs);
- inFASTA.close();
-
- lines.push_back(new linePair(0, numFastaSeqs));
+ numFastaSeqs = driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
- driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
-
- if (m->control_pressed) {
- remove(accnosFileName.c_str());
- remove(alignFileName.c_str());
- remove(reportFileName.c_str());
- return 0;
- }
+ if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); return 0; }
//delete accnos file if its blank else report to user
if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; }
}else{ m->mothurOut(" If the reverse compliment proved to be better it was reported."); }
m->mothurOutEndLine();
}
- }
- else{
- vector<unsigned long int> positions;
+ }else{
processIDS.resize(0);
- ifstream inFASTA;
- openInputFile(candidateFileNames[s], inFASTA);
-
- string input;
- while(!inFASTA.eof()){
- input = getline(inFASTA);
- if (input.length() != 0) {
- if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
- }
- }
- inFASTA.close();
-
- numFastaSeqs = positions.size();
-
- int numSeqsPerProcessor = numFastaSeqs / processors;
-
- for (int i = 0; i < processors; i++) {
- unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
- if(i == processors - 1){
- numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
- }
- lines.push_back(new linePair(startPos, numSeqsPerProcessor));
- }
-
- createProcesses(alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
+ numFastaSeqs = createProcesses(alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
rename((alignFileName + toString(processIDS[0]) + ".temp").c_str(), alignFileName.c_str());
rename((reportFileName + toString(processIDS[0]) + ".temp").c_str(), reportFileName.c_str());
m->mothurOutEndLine();
}else{ hasAccnos = false; }
- if (m->control_pressed) {
- remove(accnosFileName.c_str());
- remove(alignFileName.c_str());
- remove(reportFileName.c_str());
- return 0;
- }
+ if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); return 0; }
}
#else
- ifstream inFASTA;
- openInputFile(candidateFileNames[s], inFASTA);
- getNumSeqs(inFASTA, numFastaSeqs);
- inFASTA.close();
-
- lines.push_back(new linePair(0, numFastaSeqs));
-
- driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
+ numFastaSeqs = driver(lines[0], alignFileName, reportFileName, accnosFileName, candidateFileNames[s]);
- if (m->control_pressed) {
- remove(accnosFileName.c_str());
- remove(alignFileName.c_str());
- remove(reportFileName.c_str());
- return 0;
- }
+ if (m->control_pressed) { remove(accnosFileName.c_str()); remove(alignFileName.c_str()); remove(reportFileName.c_str()); return 0; }
//delete accnos file if its blank else report to user
if (isBlank(accnosFileName)) { remove(accnosFileName.c_str()); hasAccnos = false; }
//**********************************************************************************************************************
-int AlignCommand::driver(linePair* line, string alignFName, string reportFName, string accnosFName, string filename){
+int AlignCommand::driver(linePair* filePos, string alignFName, string reportFName, string accnosFName, string filename){
try {
ofstream alignmentFile;
openOutputFile(alignFName, alignmentFile);
ifstream inFASTA;
openInputFile(filename, inFASTA);
- inFASTA.seekg(line->start);
+ inFASTA.seekg(filePos->start);
+
+ bool done = false;
+ int count = 0;
- for(int i=0;i<line->numSeqs;i++){
+ while (!done) {
if (m->control_pressed) { return 0; }
report.print();
delete nast;
if (needToDeleteCopy) { delete copy; }
+
+ count++;
}
delete candidateSeq;
+ unsigned long int pos = inFASTA.tellg();
+ if ((pos == -1) || (pos >= filePos->end)) { break; }
+
//report progress
- if((i+1) % 100 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine(); }
+ if((count) % 100 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
+
}
//report progress
- if((line->numSeqs) % 100 != 0){ m->mothurOut(toString(line->numSeqs)); m->mothurOutEndLine(); }
+ if((count) % 100 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
alignmentFile.close();
inFASTA.close();
accnosFile.close();
- return 1;
+ return count;
}
catch(exception& e) {
m->errorOut(e, "AlignCommand", "driver");
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
int process = 0;
- int exitCommand = 1;
+ int num = 0;
// processIDS.resize(0);
//loop through and create all the processes you want
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- exitCommand = driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp", accnosFName + toString(getpid()) + ".temp", filename);
+ num = driver(lines[process], alignFileName + toString(getpid()) + ".temp", reportFileName + toString(getpid()) + ".temp", accnosFName + toString(getpid()) + ".temp", filename);
+
+ //pass numSeqs to parent
+ ofstream out;
+ string tempFile = toString(getpid()) + ".temp";
+ openOutputFile(tempFile, out);
+ out << num << endl;
+ out.close();
+
exit(0);
}else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
}
wait(&temp);
}
- return exitCommand;
+ for (int i = 0; i < processIDS.size(); i++) {
+ ifstream in;
+ string tempFile = toString(processIDS[i]) + ".temp";
+ openInputFile(tempFile, in);
+ if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+ in.close(); remove(tempFile.c_str());
+ }
+
+ return num;
#endif
}
catch(exception& e) {
exit(1);
}
}
-
/**************************************************************************************************/
void AlignCommand::appendAlignFiles(string temp, string filename) {
private:
struct linePair {
unsigned long int start;
- int numSeqs;
- linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
+ unsigned long int end;
+ linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}
};
vector<int> processIDS; //processid
vector<linePair*> lines;
void appendReportFiles(string, string);
#ifdef USE_MPI
- int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long int>&);
+ int driverMPI(MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long int>&);
#endif
string candidateFileName, templateFileName, distanceFileName, search, align, outputDir;
outHeader.close();
+ vector<unsigned long int> positions = divideFile(fastaFileNames[s], processors);
+
+ for (int i = 0; i < (positions.size()-1); i++) {
+ lines.push_back(new linePair(positions[i], positions[(i+1)]));
+ }
+
//break up file
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
if(processors == 1){
- ifstream inFASTA;
- openInputFile(fastaFileNames[s], inFASTA);
- getNumSeqs(inFASTA, numSeqs);
- inFASTA.close();
-
- lines.push_back(new linePair(0, numSeqs));
+
+ numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
- driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
-
- if (m->control_pressed) {
- remove(outputFileName.c_str());
- remove(tempHeader.c_str());
- remove(accnosFileName.c_str());
- for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- delete chimera;
- return 0;
- }
+ if (m->control_pressed) { remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
}else{
- vector<unsigned long int> positions;
processIDS.resize(0);
- ifstream inFASTA;
- openInputFile(fastaFileNames[s], inFASTA);
-
- string input;
- while(!inFASTA.eof()){
- input = getline(inFASTA);
- if (input.length() != 0) {
- if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
- }
- }
- inFASTA.close();
-
- numSeqs = positions.size();
-
- int numSeqsPerProcessor = numSeqs / processors;
-
- for (int i = 0; i < processors; i++) {
- unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
- if(i == processors - 1){
- numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
- }
- lines.push_back(new linePair(startPos, numSeqsPerProcessor));
- }
-
-
- createProcesses(outputFileName, fastaFileNames[s], accnosFileName);
+ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName);
rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
}
#else
- ifstream inFASTA;
- openInputFile(fastaFileNames[s], inFASTA);
- getNumSeqs(inFASTA, numSeqs);
- inFASTA.close();
- lines.push_back(new linePair(0, numSeqs));
-
- driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
+ numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
- if (m->control_pressed) {
- remove(outputFileName.c_str());
- remove(tempHeader.c_str());
- remove(accnosFileName.c_str());
- for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- delete chimera;
- return 0;
- }
+ if (m->control_pressed) { remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
#endif
}
//**********************************************************************************************************************
-int ChimeraCcodeCommand::driver(linePair* line, string outputFName, string filename, string accnos){
+int ChimeraCcodeCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
try {
ofstream out;
openOutputFile(outputFName, out);
ifstream inFASTA;
openInputFile(filename, inFASTA);
- inFASTA.seekg(line->start);
-
- for(int i=0;i<line->numSeqs;i++){
+ inFASTA.seekg(filePos->start);
+
+ bool done = false;
+ int count = 0;
+
+ while (!done) {
if (m->control_pressed) { return 1; }
//print results
chimera->print(out, out2);
}
+ count++;
}
delete candidateSeq;
+ unsigned long int pos = inFASTA.tellg();
+ if ((pos == -1) || (pos >= filePos->end)) { break; }
+
//report progress
- if((i+1) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(i+1)); m->mothurOutEndLine(); }
+ if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
}
//report progress
- if((line->numSeqs) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(line->numSeqs)); m->mothurOutEndLine(); }
+ if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
out.close();
out2.close();
inFASTA.close();
- return 0;
+ return count;
}
catch(exception& e) {
m->errorOut(e, "ChimeraCcodeCommand", "driver");
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
int process = 0;
- // processIDS.resize(0);
+ int num = 0;
//loop through and create all the processes you want
while (process != processors) {
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
+ num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
+
+ //pass numSeqs to parent
+ ofstream out;
+ string tempFile = toString(getpid()) + ".temp";
+ openOutputFile(tempFile, out);
+ out << num << endl;
+ out.close();
+
exit(0);
}else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
}
wait(&temp);
}
- return 0;
+ for (int i = 0; i < processIDS.size(); i++) {
+ ifstream in;
+ string tempFile = toString(processIDS[i]) + ".temp";
+ openInputFile(tempFile, in);
+ if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+ in.close(); remove(tempFile.c_str());
+ }
+
+ return num;
#endif
}
catch(exception& e) {
private:
-
struct linePair {
unsigned long int start;
- int numSeqs;
- linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
+ unsigned long int end;
+ linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}
};
vector<int> processIDS; //processid
vector<linePair*> lines;
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
+ vector<unsigned long int> positions = divideFile(fastaFileNames[i], processors);
+
+ for (int s = 0; s < (positions.size()-1); s++) {
+ lines.push_back(new linePair(positions[s], positions[(s+1)]));
+ }
+
//break up file
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
if(processors == 1){
- ifstream inFASTA;
- openInputFile(fastaFileNames[i], inFASTA);
- getNumSeqs(inFASTA, numSeqs);
- inFASTA.close();
-
- lines.push_back(new linePair(0, numSeqs));
+ numSeqs = driver(lines[0], outputFileName, fastaFileNames[i]);
- driver(lines[0], outputFileName, fastaFileNames[i]);
-
- if (m->control_pressed) {
- for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
- for (int j = 0; j < lines.size(); j++) { delete lines[j]; } lines.clear();
- delete chimera;
- return 0;
- }
+ if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int j = 0; j < lines.size(); j++) { delete lines[j]; } lines.clear(); delete chimera; return 0; }
}else{
- vector<unsigned long int> positions;
processIDS.resize(0);
- ifstream inFASTA;
- openInputFile(fastaFileNames[i], inFASTA);
-
- string input;
- while(!inFASTA.eof()){
- input = getline(inFASTA);
- if (input.length() != 0) {
- if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
- }
- }
- inFASTA.close();
-
- numSeqs = positions.size();
-
- int numSeqsPerProcessor = numSeqs / processors;
-
- for (int j = 0; j < processors; j++) {
- unsigned long int startPos = positions[ j * numSeqsPerProcessor ];
- if(j == processors - 1){
- numSeqsPerProcessor = numSeqs - j * numSeqsPerProcessor;
- }
- lines.push_back(new linePair(startPos, numSeqsPerProcessor));
- }
-
-
- createProcesses(outputFileName, fastaFileNames[i]);
+ numSeqs = createProcesses(outputFileName, fastaFileNames[i]);
rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
}
#else
- ifstream inFASTA;
- openInputFile(fastaFileNames[i], inFASTA);
- getNumSeqs(inFASTA, numSeqs);
- inFASTA.close();
- lines.push_back(new linePair(0, numSeqs));
-
- driver(lines[0], outputFileName, fastaFileNames[i]);
+ numSeqs = driver(lines[0], outputFileName, fastaFileNames[i]);
- if (m->control_pressed) {
- for (int j = 0; j < lines.size(); j++) { delete lines[j]; } lines.clear();
- for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
- delete chimera;
- return 0;
- }
+ if (m->control_pressed) { for (int j = 0; j < lines.size(); j++) { delete lines[j]; } lines.clear(); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
#endif
#endif
delete chimera;
}
//**********************************************************************************************************************
-int ChimeraCheckCommand::driver(linePair* line, string outputFName, string filename){
+int ChimeraCheckCommand::driver(linePair* filePos, string outputFName, string filename){
try {
ofstream out;
openOutputFile(outputFName, out);
ifstream inFASTA;
openInputFile(filename, inFASTA);
- inFASTA.seekg(line->start);
-
- for(int i=0;i<line->numSeqs;i++){
-
+ inFASTA.seekg(filePos->start);
+
+ bool done = false;
+ int count = 0;
+
+ while (!done) {
+
if (m->control_pressed) { return 1; }
Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA);
}
delete candidateSeq;
+ unsigned long int pos = inFASTA.tellg();
+ if ((pos == -1) || (pos >= filePos->end)) { break; }
+
//report progress
- if((i+1) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(i+1)); m->mothurOutEndLine(); }
+ if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
}
//report progress
- if((line->numSeqs) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(line->numSeqs)); m->mothurOutEndLine(); }
+ if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
out.close();
inFASTA.close();
- return 0;
+ return count;
}
catch(exception& e) {
m->errorOut(e, "ChimeraCheckCommand", "driver");
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
int process = 0;
- // processIDS.resize(0);
+ int num = 0;
//loop through and create all the processes you want
while (process != processors) {
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename);
+ num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename);
+
+ //pass numSeqs to parent
+ ofstream out;
+ string tempFile = toString(getpid()) + ".temp";
+ openOutputFile(tempFile, out);
+ out << num << endl;
+ out.close();
+
exit(0);
}else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
}
wait(&temp);
}
- return 0;
+ for (int i = 0; i < processIDS.size(); i++) {
+ ifstream in;
+ string tempFile = toString(processIDS[i]) + ".temp";
+ openInputFile(tempFile, in);
+ if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+ in.close(); remove(tempFile.c_str());
+ }
+
+ return num;
#endif
}
catch(exception& e) {
struct linePair {
unsigned long int start;
- int numSeqs;
- linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
+ unsigned long int end;
+ linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}
};
+
vector<int> processIDS; //processid
vector<linePair*> lines;
MPI_File_close(&outMPIAccnos);
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
-
+ vector<unsigned long int> positions = divideFile(fastaFileNames[s], processors);
+
+ for (int i = 0; i < (positions.size()-1); i++) {
+ lines.push_back(new linePair(positions[i], positions[(i+1)]));
+ }
+
//break up file
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
if(processors == 1){
- ifstream inFASTA;
- openInputFile(fastaFileNames[s], inFASTA);
- getNumSeqs(inFASTA, numSeqs);
- inFASTA.close();
-
- lines.push_back(new linePair(0, numSeqs));
-
- driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
+
+ numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
- if (m->control_pressed) {
- remove(outputFileName.c_str());
- remove(accnosFileName.c_str());
- for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- delete chimera;
- return 0;
- }
+ if (m->control_pressed) { remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
}else{
- vector<unsigned long int> positions;
processIDS.resize(0);
- ifstream inFASTA;
- openInputFile(fastaFileNames[s], inFASTA);
-
- string input;
- while(!inFASTA.eof()){
- input = getline(inFASTA);
- if (input.length() != 0) {
- if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
- }
- }
- inFASTA.close();
-
- numSeqs = positions.size();
-
- int numSeqsPerProcessor = numSeqs / processors;
-
- for (int i = 0; i < processors; i++) {
- unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
- if(i == processors - 1){
- numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
- }
- lines.push_back(new linePair(startPos, numSeqsPerProcessor));
- }
-
- createProcesses(outputFileName, fastaFileNames[s], accnosFileName);
+ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName);
rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
}
#else
- ifstream inFASTA;
- openInputFile(fastaFileNames[s], inFASTA);
- getNumSeqs(inFASTA, numSeqs);
- inFASTA.close();
- lines.push_back(new linePair(0, numSeqs));
-
- driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
+ numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
- if (m->control_pressed) {
- remove(outputFileName.c_str());
- remove(accnosFileName.c_str());
- for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- delete chimera;
- return 0;
- }
+ if (m->control_pressed) { remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
#endif
#endif
}
//**********************************************************************************************************************
-int ChimeraPintailCommand::driver(linePair* line, string outputFName, string filename, string accnos){
+int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
try {
ofstream out;
openOutputFile(outputFName, out);
ifstream inFASTA;
openInputFile(filename, inFASTA);
- inFASTA.seekg(line->start);
-
- for(int i=0;i<line->numSeqs;i++){
-
+ inFASTA.seekg(filePos->start);
+
+ bool done = false;
+ int count = 0;
+
+ while (!done) {
+
if (m->control_pressed) { return 1; }
Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA);
//print results
chimera->print(out, out2);
}
+ count++;
}
delete candidateSeq;
+ unsigned long int pos = inFASTA.tellg();
+ if ((pos == -1) || (pos >= filePos->end)) { break; }
+
//report progress
- if((i+1) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(i+1)); m->mothurOutEndLine(); }
+ if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
}
//report progress
- if((line->numSeqs) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(line->numSeqs)); m->mothurOutEndLine(); }
+ if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
out.close();
out2.close();
inFASTA.close();
- return 0;
+ return count;
}
catch(exception& e) {
m->errorOut(e, "ChimeraPintailCommand", "driver");
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
int process = 0;
- // processIDS.resize(0);
+ int num = 0;
//loop through and create all the processes you want
while (process != processors) {
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
+ num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
+
+ //pass numSeqs to parent
+ ofstream out;
+ string tempFile = toString(getpid()) + ".temp";
+ openOutputFile(tempFile, out);
+ out << num << endl;
+ out.close();
+
exit(0);
}else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
}
wait(&temp);
}
- return 0;
+ for (int i = 0; i < processIDS.size(); i++) {
+ ifstream in;
+ string tempFile = toString(processIDS[i]) + ".temp";
+ openInputFile(tempFile, in);
+ if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+ in.close(); remove(tempFile.c_str());
+ }
+
+ return num;
#endif
}
catch(exception& e) {
struct linePair {
unsigned long int start;
- int numSeqs;
- linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
+ unsigned long int end;
+ linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}
};
+
vector<int> processIDS; //processid
vector<linePair*> lines;
chimera->printHeader(outHeader);
outHeader.close();
+ vector<unsigned long int> positions = divideFile(fastaFileNames[s], processors);
+
+ for (int i = 0; i < (positions.size()-1); i++) {
+ lines.push_back(new linePair(positions[i], positions[(i+1)]));
+ }
+
//break up file
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
if(processors == 1){
- ifstream inFASTA;
- openInputFile(fastaFileNames[s], inFASTA);
- getNumSeqs(inFASTA, numSeqs);
- inFASTA.close();
+ numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
- lines.push_back(new linePair(0, numSeqs));
-
- driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
-
- if (m->control_pressed) {
- remove(outputFileName.c_str());
- remove(tempHeader.c_str());
- remove(accnosFileName.c_str());
- for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- delete chimera;
- return 0;
- }
+ if (m->control_pressed) { remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
}else{
- vector<unsigned long int> positions;
processIDS.resize(0);
- ifstream inFASTA;
- openInputFile(fastaFileNames[s], inFASTA);
-
- string input;
- while(!inFASTA.eof()){
- input = getline(inFASTA);
- if (input.length() != 0) {
- if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
- }
- }
- inFASTA.close();
-
- numSeqs = positions.size();
-
- int numSeqsPerProcessor = numSeqs / processors;
-
- for (int i = 0; i < processors; i++) {
- unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
- if(i == processors - 1){
- numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor;
- }
- lines.push_back(new linePair(startPos, numSeqsPerProcessor));
- }
-
- createProcesses(outputFileName, fastaFileNames[s], accnosFileName);
+ numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName);
rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
}
- if (m->control_pressed) {
- remove(outputFileName.c_str());
- remove(accnosFileName.c_str());
- for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- delete chimera;
- return 0;
- }
-
+ if (m->control_pressed) { remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
}
#else
- ifstream inFASTA;
- openInputFile(fastaFileNames[s], inFASTA);
- getNumSeqs(inFASTA, numSeqs);
- inFASTA.close();
- lines.push_back(new linePair(0, numSeqs));
+ numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
- driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
-
- if (m->control_pressed) {
- remove(outputFileName.c_str());
- remove(tempHeader.c_str());
- remove(accnosFileName.c_str());
- for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }
- for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
- delete chimera;
- return 0;
- }
+ if (m->control_pressed) { remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
#endif
}
//**********************************************************************************************************************
-int ChimeraSlayerCommand::driver(linePair* line, string outputFName, string filename, string accnos){
+int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
try {
ofstream out;
openOutputFile(outputFName, out);
ifstream inFASTA;
openInputFile(filename, inFASTA);
- inFASTA.seekg(line->start);
-
- for(int i=0;i<line->numSeqs;i++){
+ inFASTA.seekg(filePos->start);
+
+ bool done = false;
+ int count = 0;
+
+ while (!done) {
if (m->control_pressed) { return 1; }
//print results
chimera->print(out, out2);
}
+ count++;
}
delete candidateSeq;
+ unsigned long int pos = inFASTA.tellg();
+ if ((pos == -1) || (pos >= filePos->end)) { break; }
+
//report progress
- if((i+1) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(i+1)); m->mothurOutEndLine(); }
+ if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
}
//report progress
- if((line->numSeqs) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(line->numSeqs)); m->mothurOutEndLine(); }
+ if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
out.close();
out2.close();
inFASTA.close();
- return 0;
+ return count;
}
catch(exception& e) {
m->errorOut(e, "ChimeraSlayerCommand", "driver");
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
int process = 0;
- // processIDS.resize(0);
+ int num = 0;
//loop through and create all the processes you want
while (process != processors) {
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
+ num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
+
+ //pass numSeqs to parent
+ ofstream out;
+ string tempFile = toString(getpid()) + ".temp";
+ openOutputFile(tempFile, out);
+ out << num << endl;
+ out.close();
+
exit(0);
}else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
}
wait(&temp);
}
- return 0;
+ for (int i = 0; i < processIDS.size(); i++) {
+ ifstream in;
+ string tempFile = toString(processIDS[i]) + ".temp";
+ openInputFile(tempFile, in);
+ if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+ in.close(); remove(tempFile.c_str());
+ }
+
+ return num;
#endif
}
catch(exception& e) {
struct linePair {
unsigned long int start;
- int numSeqs;
- linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
+ unsigned long int end;
+ linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}
};
+
vector<int> processIDS; //processid
vector<linePair*> lines;
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
+
+ vector<unsigned long int> positions = divideFile(fastaFileNames[s], processors);
+
+ for (int i = 0; i < (positions.size()-1); i++) {
+ lines.push_back(new linePair(positions[i], positions[(i+1)]));
+ }
+
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
if(processors == 1){
- ifstream inFASTA;
- openInputFile(fastaFileNames[s], inFASTA);
- getNumSeqs(inFASTA, numFastaSeqs);
- inFASTA.close();
-
- lines.push_back(new linePair(0, numFastaSeqs));
-
- driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
+ numFastaSeqs = driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
}
else{
- vector<unsigned long int> positions;
processIDS.resize(0);
- ifstream inFASTA;
- openInputFile(fastaFileNames[s], inFASTA);
-
- string input;
- while(!inFASTA.eof()){
- input = getline(inFASTA);
- if (input.length() != 0) {
- if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
- }
- }
- inFASTA.close();
-
- numFastaSeqs = positions.size();
-
- int numSeqsPerProcessor = numFastaSeqs / processors;
-
- for (int i = 0; i < processors; i++) {
- unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
- if(i == processors - 1){
- numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
- }
- lines.push_back(new linePair(startPos, numSeqsPerProcessor));
- }
- createProcesses(newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
+ numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
rename((newTaxonomyFile + toString(processIDS[0]) + ".temp").c_str(), newTaxonomyFile.c_str());
rename((tempTaxonomyFile + toString(processIDS[0]) + ".temp").c_str(), tempTaxonomyFile.c_str());
}
#else
- ifstream inFASTA;
- openInputFile(fastaFileNames[s], inFASTA);
- getNumSeqs(inFASTA, numFastaSeqs);
- inFASTA.close();
-
- lines.push_back(new linePair(0, numFastaSeqs));
-
- driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
+ numFastaSeqs = driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
#endif
#endif
/**************************************************************************************************/
-void ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, string filename) {
+int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, string filename) {
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
int process = 0;
- // processIDS.resize(0);
+ int num = 0;
//loop through and create all the processes you want
while (process != processors) {
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- driver(lines[process], taxFileName + toString(getpid()) + ".temp", tempTaxFile + toString(getpid()) + ".temp", filename);
+ num = driver(lines[process], taxFileName + toString(getpid()) + ".temp", tempTaxFile + toString(getpid()) + ".temp", filename);
+
+ //pass numSeqs to parent
+ ofstream out;
+ string tempFile = toString(getpid()) + ".temp";
+ openOutputFile(tempFile, out);
+ out << num << endl;
+ out.close();
+
exit(0);
}else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
}
int temp = processIDS[i];
wait(&temp);
}
+
+ for (int i = 0; i < processIDS.size(); i++) {
+ ifstream in;
+ string tempFile = toString(processIDS[i]) + ".temp";
+ openInputFile(tempFile, in);
+ if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+ in.close(); remove(tempFile.c_str());
+ }
+
+ return num;
#endif
}
catch(exception& e) {
//**********************************************************************************************************************
-int ClassifySeqsCommand::driver(linePair* line, string taxFName, string tempTFName, string filename){
+int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempTFName, string filename){
try {
ofstream outTax;
openOutputFile(taxFName, outTax);
ifstream inFASTA;
openInputFile(filename, inFASTA);
-
- inFASTA.seekg(line->start);
string taxonomy;
- for(int i=0;i<line->numSeqs;i++){
+ inFASTA.seekg(filePos->start);
+
+ bool done = false;
+ int count = 0;
+
+ while (!done) {
if (m->control_pressed) { return 0; }
Sequence* candidateSeq = new Sequence(inFASTA); gobble(inFASTA);
outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
}
- }
+ count++;
+ }
delete candidateSeq;
- if((i+1) % 100 == 0){
- m->mothurOut("Classifying sequence " + toString(i+1)); m->mothurOutEndLine();
- }
+ unsigned long int pos = inFASTA.tellg();
+ if ((pos == -1) || (pos >= filePos->end)) { break; }
+
+ //report progress
+ if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
}
-
+ //report progress
+ if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
+
inFASTA.close();
outTax.close();
outTaxSimple.close();
- return 1;
+ return count;
}
catch(exception& e) {
m->errorOut(e, "ClassifySeqsCommand", "driver");
private:
struct linePair {
unsigned long int start;
- int numSeqs;
- linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
+ unsigned long int end;
+ linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}
};
+
vector<int> processIDS; //processid
vector<linePair*> lines;
vector<string> fastaFileNames;
int driver(linePair*, string, string, string);
void appendTaxFiles(string, string);
- void createProcesses(string, string, string);
+ int createProcesses(string, string, string);
string addUnclassifieds(string, int);
int MPIReadNamesFile(string);
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
+ vector<unsigned long int> positions = divideFile(fastafileNames[s], processors);
+
+ for (int i = 0; i < (positions.size()-1); i++) {
+ lines.push_back(new linePair(positions[i], positions[(i+1)]));
+ }
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
if(processors == 1){
- ifstream inFASTA;
- int numFastaSeqs;
- openInputFile(fastafileNames[s], inFASTA);
- getNumSeqs(inFASTA, numFastaSeqs);
- inFASTA.close();
-
- lines.push_back(new linePair(0, numFastaSeqs));
-
+ int numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]);
numSeqs += numFastaSeqs;
-
- driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]);
}else{
- setLines(fastafileNames[s]);
- createProcessesRunFilter(filter, fastafileNames[s]);
+ int numFastaSeqs = createProcessesRunFilter(filter, fastafileNames[s]);
+ numSeqs += numFastaSeqs;
rename((fastafileNames[s] + toString(processIDS[0]) + ".temp").c_str(), filteredFasta.c_str());
if (m->control_pressed) { return 1; }
#else
- ifstream inFASTA;
- int numFastaSeqs;
- openInputFile(fastafileNames[s], inFASTA);
- getNumSeqs(inFASTA, numFastaSeqs);
- inFASTA.close();
-
- lines.push_back(new linePair(0, numFastaSeqs));
-
+ numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]);
numSeqs += numFastaSeqs;
-
- driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]);
if (m->control_pressed) { return 1; }
#endif
}
#endif
/**************************************************************************************/
-int FilterSeqsCommand::driverRunFilter(string F, string outputFilename, string inputFilename, linePair* line) {
+int FilterSeqsCommand::driverRunFilter(string F, string outputFilename, string inputFilename, linePair* filePos) {
try {
ofstream out;
openOutputFile(outputFilename, out);
ifstream in;
openInputFile(inputFilename, in);
- in.seekg(line->start);
-
- for(int i=0;i<line->num;i++){
+ in.seekg(filePos->start);
+
+ bool done = false;
+ int count = 0;
+
+ while (!done) {
if (m->control_pressed) { in.close(); out.close(); return 0; }
- Sequence seq(in);
+ Sequence seq(in); gobble(in);
if (seq.getName() != "") {
string align = seq.getAligned();
string filterSeq = "";
}
out << '>' << seq.getName() << endl << filterSeq << endl;
- }
- gobble(in);
-
+ count++;
+ }
+
+ unsigned long int pos = in.tellg();
+ if ((pos == -1) || (pos >= filePos->end)) { break; }
+
//report progress
- if((i+1) % 100 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine(); }
+ if((count) % 100 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
}
-
//report progress
- if((line->num) % 100 != 0){ m->mothurOut(toString(line->num)); m->mothurOutEndLine(); }
+ if((count) % 100 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
+
out.close();
in.close();
- return 0;
+ return count;
}
catch(exception& e) {
m->errorOut(e, "FilterSeqsCommand", "driverRunFilter");
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
int process = 0;
- int exitCommand = 1;
+ int num = 0;
processIDS.clear();
//loop through and create all the processes you want
process++;
}else if (pid == 0){
string filteredFasta = filename + toString(getpid()) + ".temp";
- driverRunFilter(F, filteredFasta, filename, lines[process]);
+ num = driverRunFilter(F, filteredFasta, filename, lines[process]);
+
+ //pass numSeqs to parent
+ ofstream out;
+ string tempFile = toString(getpid()) + ".temp";
+ openOutputFile(tempFile, out);
+ out << num << endl;
+ out.close();
+
exit(0);
}else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
}
for (int i=0;i<processors;i++) {
int temp = processIDS[i];
wait(&temp);
+ }
+
+ for (int i = 0; i < processIDS.size(); i++) {
+ ifstream in;
+ string tempFile = toString(processIDS[i]) + ".temp";
+ openInputFile(tempFile, in);
+ if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+ in.close(); remove(tempFile.c_str());
}
+
- return exitCommand;
+ return num;
#endif
}
catch(exception& e) {
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
+ vector<unsigned long int> positions = divideFile(fastafileNames[s], processors);
+
+ for (int i = 0; i < (positions.size()-1); i++) {
+ lines.push_back(new linePair(positions[i], positions[(i+1)]));
+ }
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
if(processors == 1){
- ifstream inFASTA;
- int numFastaSeqs;
- openInputFile(fastafileNames[s], inFASTA);
- getNumSeqs(inFASTA, numFastaSeqs);
- inFASTA.close();
-
+ int numFastaSeqs = driverCreateFilter(F, fastafileNames[s], lines[0]);
numSeqs += numFastaSeqs;
-
- lines.push_back(new linePair(0, numFastaSeqs));
-
- driverCreateFilter(F, fastafileNames[s], lines[0]);
}else{
- setLines(fastafileNames[s]);
- createProcessesCreateFilter(F, fastafileNames[s]);
+ int numFastaSeqs = createProcessesCreateFilter(F, fastafileNames[s]);
+ numSeqs += numFastaSeqs;
}
if (m->control_pressed) { return filterString; }
#else
- ifstream inFASTA;
- int numFastaSeqs;
- openInputFile(fastafileNames[s], inFASTA);
- getNumSeqs(inFASTA, numFastaSeqs);
- inFASTA.close();
-
+ numFastaSeqs = driverCreateFilter(F, fastafileNames[s], lines[0]);
numSeqs += numFastaSeqs;
-
- lines.push_back(new linePair(0, numFastaSeqs));
-
- driverCreateFilter(F, fastafileNames[s], lines[0]);
if (m->control_pressed) { return filterString; }
#endif
#endif
}
}
/**************************************************************************************/
-int FilterSeqsCommand::driverCreateFilter(Filters& F, string filename, linePair* line) {
+int FilterSeqsCommand::driverCreateFilter(Filters& F, string filename, linePair* filePos) {
try {
ifstream in;
openInputFile(filename, in);
- in.seekg(line->start);
-
- for(int i=0;i<line->num;i++){
+ in.seekg(filePos->start);
+
+ bool done = false;
+ int count = 0;
+
+ while (!done) {
if (m->control_pressed) { in.close(); return 1; }
- Sequence seq(in);
+ Sequence seq(in); gobble(in);
if (seq.getName() != "") {
if (seq.getAligned().length() != alignmentLength) { m->mothurOut("Sequences are not all the same length, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
if(trump != '*'){ F.doTrump(seq); }
if(isTrue(vertical) || soft != 0){ F.getFreqs(seq); }
cout.flush();
+ count++;
}
+ unsigned long int pos = in.tellg();
+ if ((pos == -1) || (pos >= filePos->end)) { break; }
+
//report progress
- if((i+1) % 100 == 0){ m->mothurOut(toString(i+1)); m->mothurOutEndLine(); }
+ if((count) % 100 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
}
-
//report progress
- if((line->num) % 100 != 0){ m->mothurOut(toString(line->num)); m->mothurOutEndLine(); }
-
+ if((count) % 100 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
in.close();
- return 0;
+ return count;
}
catch(exception& e) {
m->errorOut(e, "FilterSeqsCommand", "driverCreateFilter");
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
int process = 0;
- int exitCommand = 1;
+ int num = 0;
processIDS.clear();
//loop through and create all the processes you want
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- driverCreateFilter(F, filename, lines[process]);
+ num = driverCreateFilter(F, filename, lines[process]);
//write out filter counts to file
filename += toString(getpid()) + "filterValues.temp";
ofstream out;
openOutputFile(filename, out);
+ out << num << endl;
for (int k = 0; k < alignmentLength; k++) { out << F.a[k] << '\t'; } out << endl;
for (int k = 0; k < alignmentLength; k++) { out << F.t[k] << '\t'; } out << endl;
for (int k = 0; k < alignmentLength; k++) { out << F.g[k] << '\t'; } out << endl;
ifstream in;
openInputFile(tempFilename, in);
- int temp;
+ int temp, tempNum;
+ in >> tempNum; gobble(in); num += tempNum;
for (int k = 0; k < alignmentLength; k++) { in >> temp; F.a[k] += temp; } gobble(in);
for (int k = 0; k < alignmentLength; k++) { in >> temp; F.t[k] += temp; } gobble(in);
for (int k = 0; k < alignmentLength; k++) { in >> temp; F.g[k] += temp; } gobble(in);
remove(tempFilename.c_str());
}
- return exitCommand;
+ return num;
#endif
}
catch(exception& e) {
exit(1);
}
}
-/**************************************************************************************************/
-
-int FilterSeqsCommand::setLines(string filename) {
- try {
-
- vector<unsigned long int> positions;
- bufferSizes.clear();
-
- ifstream inFASTA;
- openInputFile(filename, inFASTA);
-
- string input;
- while(!inFASTA.eof()){
- input = getline(inFASTA);
-
- if (input.length() != 0) {
- if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
- }
- }
- inFASTA.close();
-
- int numFastaSeqs = positions.size();
-
- FILE * pFile;
- unsigned long int size;
-
- //get num bytes in file
- pFile = fopen (filename.c_str(),"rb");
- if (pFile==NULL) perror ("Error opening file");
- else{
- fseek (pFile, 0, SEEK_END);
- size=ftell (pFile);
- fclose (pFile);
- }
-
- numSeqs += numFastaSeqs;
-
- int numSeqsPerProcessor = numFastaSeqs / processors;
-
- for (int i = 0; i < processors; i++) {
-
- unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
- if(i == processors - 1){
- numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
- bufferSizes.push_back(size - startPos);
- }else{
- unsigned long int myEnd = positions[ (i+1) * numSeqsPerProcessor ];
- bufferSizes.push_back(myEnd-startPos);
- }
- lines.push_back(new linePair(startPos, numSeqsPerProcessor));
- }
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "FilterSeqsCommand", "setLines");
- exit(1);
- }
-}
/**************************************************************************************/
private:\r
struct linePair {\r
unsigned long int start;\r
- int num;\r
- linePair(unsigned long int i, long int j) : start(i), num(j) {}\r
+ unsigned long int end;\r
+ linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}\r
};\r
+\r
vector<linePair*> lines;\r
vector<int> processIDS;\r
\r
int driverMPIRun(int, int, MPI_File&, MPI_File&, vector<unsigned long int>&);\r
int MPICreateFilter(int, int, Filters&, MPI_File&, vector<unsigned long int>&); \r
#endif\r
- int setLines(string);\r
- \r
\r
};\r
\r
string GlobalData::getOrderGroupFile() { return ordergroup; }
string GlobalData::getTreeFile() { return treefile; }
string GlobalData::getSharedFile() { return sharedfile; }
+string GlobalData::getRelAbundFile() { return relAbundfile; }
string GlobalData::getFormat() { return format; }
void GlobalData::setListFile(string file) { listfile = file; inputFileName = file; }
void GlobalData::setColumnFile(string file) { columnfile = file; inputFileName = file; }
void GlobalData::setGroupFile(string file) { groupfile = file; }
void GlobalData::setSharedFile(string file) { sharedfile = file; inputFileName = file; }
+void GlobalData::setRelAbundFile(string file) { relAbundfile = file; inputFileName = file; }
void GlobalData::setNameFile(string file) { namefile = file; }
void GlobalData::setOrderFile(string file) { orderfile = file; }
void GlobalData::setOrderGroupFile(string file) { ordergroup = file; }
// fastafile = ""; //do we need this?
treefile = "";
sharedfile = "";
+ relAbundfile = "";
format = "";
}
string getOrderGroupFile();
string getTreeFile();
string getSharedFile();
+ string getRelAbundFile();
string getFormat(); //do we need this?
void setRabundFile(string);
void setSabundFile(string);
void setSharedFile(string);
+ void setRelAbundFile(string);
void setOrderFile(string file);
void setOrderGroupFile(string file);
void setFormat(string); //do we need this?
private:
MothurOut* m;
- string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, treefile, sharedfile, format, distfile, ordergroup;
+ string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, treefile, sharedfile, format, distfile, ordergroup, relAbundfile;
static GlobalData* _uniqueInstance;
GlobalData( const GlobalData& ); // Disable copy constructor
return positions;
}
/**************************************************************************************************/
+
+inline vector<unsigned long int> divideFile(string filename, int& proc) {
+ try{
+
+ vector<unsigned long int> filePos;
+ filePos.push_back(0);
+
+ FILE * pFile;
+ unsigned long int size;
+
+ //get num bytes in file
+ pFile = fopen (filename.c_str(),"rb");
+ if (pFile==NULL) perror ("Error opening file");
+ else{
+ fseek (pFile, 0, SEEK_END);
+ size=ftell (pFile);
+ fclose (pFile);
+ }
+
+ //estimate file breaks
+ unsigned long int chunkSize = 0;
+ chunkSize = size / proc;
+
+ //file to small to divide by processors
+ if (chunkSize == 0) { proc = 1; filePos.push_back(size); return filePos; }
+
+ //for each process seekg to closest file break and search for next '>' char. make that the filebreak
+ for (int i = 0; i < proc; i++) {
+ unsigned long int spot = (i+1) * chunkSize;
+
+ ifstream in;
+ openInputFile(filename, in);
+ in.seekg(spot);
+
+ //look for next '>'
+ unsigned long int newSpot = spot;
+ while (!in.eof()) {
+ char c = in.get();
+ if (c == '>') { in.putback(c); newSpot = in.tellg(); break; }
+ }
+
+ //there was not another sequence before the end of the file
+ if (newSpot == spot) { break; }
+ else { filePos.push_back(newSpot); }
+
+ in.close();
+ }
+
+ //save end pos
+ filePos.push_back(size);
+
+ //sanity check filePos
+ for (int i = 0; i < (filePos.size()-1); i++) {
+ if (filePos[(i+1)] <= filePos[i]) { filePos.erase(filePos.begin()+(i+1)); i--; }
+ }
+
+ proc = (filePos.size() - 1);
+
+ return filePos;
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the mothur.h function divideFile. Please contact Pat Schloss at mothur.bugs@gmail.com." << "\n";
+ exit(1);
+ }
+}
+/**************************************************************************************************/
inline bool checkReleaseVersion(ifstream& file, string version) {
try {
else {
//valid paramters for this command
- string Array[] = {"list","order","shared", "label","group","sabund", "rabund","groups","ordergroup","outputdir","inputdir"};
+ string Array[] = {"list","order","shared","relabund","label","group","sabund", "rabund","groups","ordergroup","outputdir","inputdir"};
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
OptionParser parser(option);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["ordergroup"] = inputDir + it->second; }
}
-
+
+ it = parameters.find("relabund");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["relabund"] = inputDir + it->second; }
+ }
}
else if (sharedfile == "not found") { sharedfile = ""; }
else { globaldata->setSharedFile(sharedfile); globaldata->setFormat("sharedfile"); }
+ relAbundfile = validParameter.validFile(parameters, "relabund", true);
+ if (relAbundfile == "not open") { abort = true; }
+ else if (relAbundfile == "not found") { relAbundfile = ""; }
+ else { globaldata->setRelAbundFile(relAbundfile); globaldata->setFormat("relabund"); }
+
+
groupfile = validParameter.validFile(parameters, "group", true);
if (groupfile == "not open") { abort = true; }
else if (groupfile == "not found") { groupfile = ""; }
if ((listfile != "") && (groupfile != "")) { globaldata->setFormat("shared"); }
//you have not given a file
- if ((listfile == "") && (sharedfile == "") && (rabundfile == "") && (sabundfile == "")) {
- m->mothurOut("You must enter either a listfile, rabundfile, sabundfile or a sharedfile with the read.otu command. "); m->mothurOutEndLine(); abort = true;
+ if ((listfile == "") && (sharedfile == "") && (rabundfile == "") && (sabundfile == "") && (relAbundfile == "")) {
+ m->mothurOut("You must enter either a listfile, rabundfile, sabundfile, relabund or a sharedfile with the read.otu command. "); m->mothurOutEndLine(); abort = true;
}
//check for optional parameter and set defaults
try {
m->mothurOut("The read.otu command must be run before you execute a collect.single, rarefaction.single, summary.single, \n");
m->mothurOut("collect.shared, rarefaction.shared, summary.shared heatmap.bin, heatmap.sim or venn command. Mothur will generate a .list, .rabund and .sabund upon completion of the cluster command \n");
- m->mothurOut("or you may use your own. The read.otu command parameter options are list, rabund, sabund, shared, group, order, ordergroup, label and groups.\n");
+ m->mothurOut("or you may use your own. The read.otu command parameter options are list, rabund, sabund, shared, relabund, group, order, ordergroup, label and groups.\n");
m->mothurOut("The read.otu command can be used in two ways. The first is to read a list, rabund or sabund and run the collect.single, rarefaction.single or summary.single.\n");
m->mothurOut("For this use the read.otu command should be in the following format: read.otu(list=yourListFile, order=yourOrderFile, label=yourLabels).\n");
m->mothurOut("The list, rabund or sabund parameter is required, but you may only use one of them.\n");
InputData* input;
Command* shared;
GroupMap* groupMap;
- string filename, listfile, orderfile, sharedfile, label, groupfile, sabundfile, rabundfile, format, groups, outputDir, ordergroupfile;
+ string filename, listfile, orderfile, sharedfile, label, groupfile, sabundfile, rabundfile, format, groups, outputDir, ordergroupfile, relAbundfile;
vector<string> Groups;
bool abort, allLines;
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
-
+ vector<unsigned long int> positions = divideFile(fastafile, processors);
+
+ for (int i = 0; i < (positions.size()-1); i++) {
+ lines.push_back(new linePair(positions[i], positions[(i+1)]));
+ }
+
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
if(processors == 1){
- ifstream inFASTA;
- openInputFile(fastafile, inFASTA);
- getNumSeqs(inFASTA, numFastaSeqs);
- inFASTA.close();
-
- lines.push_back(new linePair(0, numFastaSeqs));
-
- driver(lines[0], goodSeqFile, badSeqFile, badAccnosFile, fastafile, badSeqNames);
+ numFastaSeqs = driver(lines[0], goodSeqFile, badSeqFile, badAccnosFile, fastafile, badSeqNames);
if (m->control_pressed) { remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; }
}else{
- vector<unsigned long int> positions;
processIDS.resize(0);
- ifstream inFASTA;
- openInputFile(fastafile, inFASTA);
-
- string input;
- while(!inFASTA.eof()){
- input = getline(inFASTA);
- if (input.length() != 0) {
- if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
- }
- }
- inFASTA.close();
-
- numFastaSeqs = positions.size();
-
- int numSeqsPerProcessor = numFastaSeqs / processors;
-
- for (int i = 0; i < processors; i++) {
- unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
- if(i == processors - 1){
- numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
- }
- lines.push_back(new linePair(startPos, numSeqsPerProcessor));
-
- }
-
- createProcesses(goodSeqFile, badSeqFile, badAccnosFile, fastafile, badSeqNames);
+ numFastaSeqs = createProcesses(goodSeqFile, badSeqFile, badAccnosFile, fastafile, badSeqNames);
rename((goodSeqFile + toString(processIDS[0]) + ".temp").c_str(), goodSeqFile.c_str());
rename((badSeqFile + toString(processIDS[0]) + ".temp").c_str(), badSeqFile.c_str());
}
}
#else
- ifstream inFASTA;
- openInputFile(fastafile, inFASTA);
- getNumSeqs(inFASTA, numFastaSeqs);
- inFASTA.close();
-
- lines.push_back(new linePair(0, numFastaSeqs));
-
- driver(lines[0], goodSeqFile, badSeqFile, badAccnosFile, fastafile, badSeqNames);
+ numFastaSeqs = driver(lines[0], goodSeqFile, badSeqFile, badAccnosFile, fastafile, badSeqNames);
if (m->control_pressed) { remove(goodSeqFile.c_str()); remove(badSeqFile.c_str()); return 0; }
}
//**********************************************************************************************************************
-int ScreenSeqsCommand::driver(linePair* line, string goodFName, string badFName, string badAccnosFName, string filename, set<string>& badSeqNames){
+int ScreenSeqsCommand::driver(linePair* filePos, string goodFName, string badFName, string badAccnosFName, string filename, set<string>& badSeqNames){
try {
ofstream goodFile;
openOutputFile(goodFName, goodFile);
ifstream inFASTA;
openInputFile(filename, inFASTA);
- inFASTA.seekg(line->start);
+ inFASTA.seekg(filePos->start);
+
+ bool done = false;
+ int count = 0;
- for(int i=0;i<line->numSeqs;i++){
+ while (!done) {
if (m->control_pressed) { return 0; }
- Sequence currSeq(inFASTA);
+ Sequence currSeq(inFASTA); gobble(inFASTA);
if (currSeq.getName() != "") {
bool goodSeq = 1; // innocent until proven guilty
if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos()) { goodSeq = 0; }
badAccnosFile << currSeq.getName() << endl;
badSeqNames.insert(currSeq.getName());
}
+ count++;
}
- gobble(inFASTA);
+
+ unsigned long int pos = inFASTA.tellg();
+ if ((pos == -1) || (pos >= filePos->end)) { break; }
+
+ //report progress
+ if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
}
+ //report progress
+ if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
goodFile.close();
badFile.close();
badAccnosFile.close();
- return 1;
+ return count;
}
catch(exception& e) {
m->errorOut(e, "ScreenSeqsCommand", "driver");
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
int process = 0;
- int exitCommand = 1;
+ int num = 0;
//loop through and create all the processes you want
while (process != processors) {
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- exitCommand = driver(lines[process], goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", badAccnos + toString(getpid()) + ".temp", filename, badSeqNames);
+ num = driver(lines[process], goodFileName + toString(getpid()) + ".temp", badFileName + toString(getpid()) + ".temp", badAccnos + toString(getpid()) + ".temp", filename, badSeqNames);
+
+ //pass numSeqs to parent
+ ofstream out;
+ string tempFile = toString(getpid()) + ".temp";
+ openOutputFile(tempFile, out);
+ out << num << endl;
+ out.close();
+
exit(0);
}else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
}
wait(&temp);
}
- return exitCommand;
+ for (int i = 0; i < processIDS.size(); i++) {
+ ifstream in;
+ string tempFile = toString(processIDS[i]) + ".temp";
+ openInputFile(tempFile, in);
+ if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+ in.close(); remove(tempFile.c_str());
+ }
+
+ return num;
#endif
}
catch(exception& e) {
struct linePair {
unsigned long int start;
- int numSeqs;
- linePair(unsigned long int i, int j) : start(i), numSeqs(j) {}
+ unsigned long int end;
+ linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}
};
+
vector<int> processIDS; //processid
vector<linePair*> lines;
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
+ vector<unsigned long int> positions = divideFile(fastafile, processors);
+
+ for (int i = 0; i < (positions.size()-1); i++) {
+ lines.push_back(new linePair(positions[i], positions[(i+1)]));
+ }
+
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
if(processors == 1){
- ifstream inFASTA;
- openInputFile(fastafile, inFASTA);
- getNumSeqs(inFASTA, numSeqs);
- inFASTA.close();
-
- lines.push_back(new linePair(0, numSeqs));
-
- driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile, lines[0]);
+ numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile, lines[0]);
}else{
- numSeqs = setLines(fastafile);
- createProcessesCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile);
+ numSeqs = createProcessesCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile);
rename((summaryFile + toString(processIDS[0]) + ".temp").c_str(), summaryFile.c_str());
//append files
if (m->control_pressed) { return 0; }
#else
- ifstream inFASTA;
- openInputFile(fastafile, inFASTA);
- getNumSeqs(inFASTA, numSeqs);
- inFASTA.close();
-
- lines.push_back(new linePair(0, numSeqs));
-
- driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile, lines[0]);
+ numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, summaryFile, lines[0]);
if (m->control_pressed) { return 0; }
#endif
#endif
}
}
/**************************************************************************************/
-int SeqSummaryCommand::driverCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, string filename, string sumFile, linePair* line) {
+int SeqSummaryCommand::driverCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, string filename, string sumFile, linePair* filePos) {
try {
ofstream outSummary;
openOutputFile(sumFile, outSummary);
//print header if you are process 0
- if (line->start == 0) {
+ if (filePos->start == 0) {
outSummary << "seqname\tstart\tend\tnbases\tambigs\tpolymer" << endl;
}
ifstream in;
openInputFile(filename, in);
- in.seekg(line->start);
-
- for(int i=0;i<line->num;i++){
+ in.seekg(filePos->start);
+
+ bool done = false;
+ int count = 0;
+
+ while (!done) {
if (m->control_pressed) { in.close(); outSummary.close(); return 1; }
- Sequence current(in);
+ Sequence current(in); gobble(in);
if (current.getName() != "") {
startPosition.push_back(current.getStartPos());
outSummary << current.getStartPos() << '\t' << current.getEndPos() << '\t';
outSummary << current.getNumBases() << '\t' << current.getAmbigBases() << '\t';
outSummary << current.getLongHomoPolymer() << endl;
+ count++;
}
- gobble(in);
+
+ unsigned long int pos = in.tellg();
+ if ((pos == -1) || (pos >= filePos->end)) { break; }
+
+ //report progress
+ if((count) % 100 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
}
+ //report progress
+ if((count) % 100 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine(); }
+
in.close();
- return 0;
+ return count;
}
catch(exception& e) {
m->errorOut(e, "SeqSummaryCommand", "driverCreateSummary");
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
int process = 0;
- int exitCommand = 1;
+ int num = 0;
processIDS.clear();
//loop through and create all the processes you want
while (process != processors) {
- int pid = vfork();
+ int pid = fork();
if (pid > 0) {
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, sumFile + toString(getpid()) + ".temp", lines[process]);
+ num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, sumFile + toString(getpid()) + ".temp", lines[process]);
+
+ //pass numSeqs to parent
+ ofstream out;
+ string tempFile = toString(getpid()) + ".temp";
+ openOutputFile(tempFile, out);
+
+ out << num << endl;
+ for (int k = 0; k < startPosition.size(); k++) { out << startPosition[k] << '\t'; } out << endl;
+ for (int k = 0; k < endPosition.size(); k++) { out << endPosition[k] << '\t'; } out << endl;
+ for (int k = 0; k < seqLength.size(); k++) { out << seqLength[k] << '\t'; } out << endl;
+ for (int k = 0; k < ambigBases.size(); k++) { out << ambigBases[k] << '\t'; } out << endl;
+ for (int k = 0; k < longHomoPolymer.size(); k++) { out << longHomoPolymer[k] << '\t'; } out << endl;
+
+ out.close();
+
exit(0);
}else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
}
wait(&temp);
}
- return exitCommand;
-#endif
- }
- catch(exception& e) {
- m->errorOut(e, "SeqSummaryCommand", "createProcessesCreateSummary");
- exit(1);
- }
-}
-/**************************************************************************************************/
-
-int SeqSummaryCommand::setLines(string filename) {
- try {
-
- vector<unsigned long int> positions;
-
- ifstream inFASTA;
- openInputFile(filename, inFASTA);
+ //parent reads in and combine Filter info
+ for (int i = 0; i < processIDS.size(); i++) {
+ string tempFilename = toString(processIDS[i]) + ".temp";
+ ifstream in;
+ openInputFile(tempFilename, in);
- string input;
- while(!inFASTA.eof()){
- input = getline(inFASTA);
-
- if (input.length() != 0) {
- if(input[0] == '>'){ unsigned long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1); }
- }
- }
- inFASTA.close();
-
- int numFastaSeqs = positions.size();
-
- FILE * pFile;
- unsigned long int size;
-
- //get num bytes in file
- pFile = fopen (filename.c_str(),"rb");
- if (pFile==NULL) perror ("Error opening file");
- else{
- fseek (pFile, 0, SEEK_END);
- size=ftell (pFile);
- fclose (pFile);
- }
-
- int numSeqsPerProcessor = numFastaSeqs / processors;
-
- for (int i = 0; i < processors; i++) {
-
- unsigned long int startPos = positions[ i * numSeqsPerProcessor ];
- if(i == processors - 1){
- numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
- }else{
- unsigned long int myEnd = positions[ (i+1) * numSeqsPerProcessor ];
- }
- lines.push_back(new linePair(startPos, numSeqsPerProcessor));
+ int temp, tempNum;
+ in >> tempNum; gobble(in); num += tempNum;
+ for (int k = 0; k < tempNum; k++) { in >> temp; startPosition.push_back(temp); } gobble(in);
+ for (int k = 0; k < tempNum; k++) { in >> temp; endPosition.push_back(temp); } gobble(in);
+ for (int k = 0; k < tempNum; k++) { in >> temp; seqLength.push_back(temp); } gobble(in);
+ for (int k = 0; k < tempNum; k++) { in >> temp; ambigBases.push_back(temp); } gobble(in);
+ for (int k = 0; k < tempNum; k++) { in >> temp; longHomoPolymer.push_back(temp); } gobble(in);
+
+ in.close();
+ remove(tempFilename.c_str());
}
- return numFastaSeqs;
+ return num;
+#endif
}
catch(exception& e) {
- m->errorOut(e, "SeqSummaryCommand", "setLines");
+ m->errorOut(e, "SeqSummaryCommand", "createProcessesCreateSummary");
exit(1);
}
}
struct linePair {
unsigned long int start;
- int num;
- linePair(unsigned long int i, long int j) : start(i), num(j) {}
+ unsigned long int end;
+ linePair(unsigned long int i, unsigned long int j) : start(i), end(j) {}
};
+
vector<linePair*> lines;
vector<int> processIDS;
int createProcessesCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, string);
int driverCreateSummary(vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, string, string, linePair*);
- int setLines(string);
#ifdef USE_MPI
int MPICreateSummary(int, int, vector<int>&, vector<int>&, vector<int>&, vector<int>&, vector<int>&, MPI_File&, MPI_File&, vector<unsigned long int>&);