+
+ if (m->control_pressed) { remove(goodGroupFile.c_str()); }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ScreenSeqsCommand", "screenGroupFile");
+ exit(1);
+ }
+}
+
+//***************************************************************************************************************
+
+int ScreenSeqsCommand::screenAlignReport(set<string> badSeqNames){
+ try {
+ ifstream inputAlignReport;
+ m->openInputFile(alignreport, inputAlignReport);
+ string seqName, group;
+ set<string>::iterator it;
+
+ string goodAlignReportFile = outputDir + m->getRootName(m->getSimpleName(alignreport)) + "good" + m->getExtension(alignreport);
+ outputNames.push_back(goodAlignReportFile); outputTypes["alignreport"].push_back(goodAlignReportFile);
+ ofstream goodAlignReportOut; m->openOutputFile(goodAlignReportFile, goodAlignReportOut);
+
+ while (!inputAlignReport.eof()) { // need to copy header
+ char c = inputAlignReport.get();
+ goodAlignReportOut << c;
+ if (c == 10 || c == 13){ break; }
+ }
+
+ while(!inputAlignReport.eof()){
+ if (m->control_pressed) { goodAlignReportOut.close(); inputAlignReport.close(); remove(goodAlignReportFile.c_str()); return 0; }
+
+ inputAlignReport >> seqName;
+ it = badSeqNames.find(seqName);
+ string line;
+ while (!inputAlignReport.eof()) { // need to copy header
+ char c = inputAlignReport.get();
+ line += c;
+ if (c == 10 || c == 13){ break; }
+ }
+
+ if(it != badSeqNames.end()){
+ badSeqNames.erase(it);
+ }
+ else{
+ goodAlignReportOut << seqName << '\t' << line;
+ }
+ m->gobble(inputAlignReport);
+ }
+
+ if (m->control_pressed) { goodAlignReportOut.close(); inputAlignReport.close(); remove(goodAlignReportFile.c_str()); return 0; }
+
+ //we were unable to remove some of the bad sequences
+ if (badSeqNames.size() != 0) {
+ for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
+ m->mothurOut("Your alignreport file does not include the sequence " + *it + " please correct.");
+ m->mothurOutEndLine();
+ }
+ }
+
+ inputAlignReport.close();
+ goodAlignReportOut.close();
+
+ if (m->control_pressed) { remove(goodAlignReportFile.c_str()); return 0; }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ScreenSeqsCommand", "screenAlignReport");
+ exit(1);
+ }
+
+}
+//**********************************************************************************************************************
+
+int ScreenSeqsCommand::driver(linePair* filePos, string goodFName, string badAccnosFName, string filename, set<string>& badSeqNames){
+ try {
+ ofstream goodFile;
+ m->openOutputFile(goodFName, goodFile);
+
+ ofstream badAccnosFile;
+ m->openOutputFile(badAccnosFName, badAccnosFile);
+
+ ifstream inFASTA;
+ m->openInputFile(filename, inFASTA);
+
+ inFASTA.seekg(filePos->start);
+
+ bool done = false;
+ int count = 0;
+
+ while (!done) {
+
+ if (m->control_pressed) { return 0; }
+
+ Sequence currSeq(inFASTA); m->gobble(inFASTA);
+ if (currSeq.getName() != "") {
+ bool goodSeq = 1; // innocent until proven guilty
+ if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos()) { goodSeq = 0; }
+ if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos()) { goodSeq = 0; }
+ if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases()) { goodSeq = 0; }
+ if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer()) { goodSeq = 0; }
+ if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases()) { goodSeq = 0; }
+ if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases()) { goodSeq = 0; }
+
+ if(goodSeq == 1){
+ currSeq.printSequence(goodFile);
+ }
+ else{
+ badAccnosFile << currSeq.getName() << endl;
+ badSeqNames.insert(currSeq.getName());
+ }
+ count++;
+ }
+
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ unsigned long int pos = inFASTA.tellg();
+ if ((pos == -1) || (pos >= filePos->end)) { break; }
+ #else
+ if (inFASTA.eof()) { break; }
+ #endif
+
+ //report progress
+ if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
+ }
+ //report progress
+ if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
+
+
+ goodFile.close();
+ inFASTA.close();
+ badAccnosFile.close();
+
+ return count;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ScreenSeqsCommand", "driver");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+#ifdef USE_MPI
+int ScreenSeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& goodFile, MPI_File& badAccnosFile, vector<unsigned long int>& MPIPos, set<string>& badSeqNames){
+ try {
+ string outputString = "";
+ MPI_Status statusGood;
+ MPI_Status statusBadAccnos;
+ MPI_Status status;
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+
+ for(int i=0;i<num;i++){
+
+ if (m->control_pressed) { return 0; }
+
+ //read next sequence
+ int length = MPIPos[start+i+1] - MPIPos[start+i];
+
+ char* buf4 = new char[length];
+ memcpy(buf4, outputString.c_str(), length);
+
+ MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
+
+ string tempBuf = buf4; delete buf4;
+ if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
+ istringstream iss (tempBuf,istringstream::in);
+
+ Sequence currSeq(iss);
+
+ //process seq
+ if (currSeq.getName() != "") {
+ bool goodSeq = 1; // innocent until proven guilty
+ if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos()) { goodSeq = 0; }
+ if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos()) { goodSeq = 0; }
+ if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases()) { goodSeq = 0; }
+ if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer()) { goodSeq = 0; }
+ if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases()) { goodSeq = 0; }
+ if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases()) { goodSeq = 0; }
+
+ if(goodSeq == 1){
+ outputString = ">" + currSeq.getName() + "\n" + currSeq.getAligned() + "\n";
+
+ //print good seq
+ length = outputString.length();
+ char* buf2 = new char[length];
+ memcpy(buf2, outputString.c_str(), length);
+
+ MPI_File_write_shared(goodFile, buf2, length, MPI_CHAR, &statusGood);
+ delete buf2;
+ }
+ else{
+
+ badSeqNames.insert(currSeq.getName());
+
+ //write to bad accnos file
+ outputString = currSeq.getName() + "\n";
+
+ length = outputString.length();
+ char* buf3 = new char[length];
+ memcpy(buf3, outputString.c_str(), length);
+
+ MPI_File_write_shared(badAccnosFile, buf3, length, MPI_CHAR, &statusBadAccnos);
+ delete buf3;
+ }
+ }
+
+ //report progress
+ if((i) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(i)); m->mothurOutEndLine(); }
+ }
+
+ return 1;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ScreenSeqsCommand", "driverMPI");
+ exit(1);
+ }
+}
+#endif
+/**************************************************************************************************/
+
+int ScreenSeqsCommand::createProcesses(string goodFileName, string badAccnos, string filename, set<string>& badSeqNames) {
+ try {
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ int process = 0;
+ int num = 0;
+
+ //loop through and create all the processes you want
+ while (process != processors) {
+ int pid = fork();
+
+ if (pid > 0) {
+ processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
+ process++;
+ }else if (pid == 0){
+ num = driver(lines[process], goodFileName + toString(getpid()) + ".temp", badAccnos + toString(getpid()) + ".temp", filename, badSeqNames);
+
+ //pass numSeqs to parent
+ ofstream out;
+ string tempFile = filename + toString(getpid()) + ".num.temp";
+ m->openOutputFile(tempFile, out);
+ out << num << endl;
+ out.close();
+
+ exit(0);
+ }else {
+ m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+ for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+ exit(0);
+ }
+ }
+
+ //force parent to wait until all the processes are done
+ for (int i=0;i<processors;i++) {
+ int temp = processIDS[i];
+ wait(&temp);
+ }
+
+ for (int i = 0; i < processIDS.size(); i++) {
+ ifstream in;
+ string tempFile = filename + toString(processIDS[i]) + ".num.temp";
+ m->openInputFile(tempFile, in);
+ if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+ in.close(); remove(tempFile.c_str());
+ }
+
+ return num;
+#endif
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ScreenSeqsCommand", "createProcesses");
+ exit(1);