+
+ if (m->control_pressed) { m->mothurRemove(goodGroupFile); }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ScreenSeqsCommand", "screenGroupFile");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+int ScreenSeqsCommand::screenCountFile(set<string> badSeqNames){
+ try {
+ ifstream in;
+ m->openInputFile(countfile, in);
+ set<string>::iterator it;
+
+ string goodCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
+ outputNames.push_back(goodCountFile); outputTypes["count"].push_back(goodCountFile);
+ ofstream goodCountOut; m->openOutputFile(goodCountFile, goodCountOut);
+
+ string headers = m->getline(in); m->gobble(in);
+ goodCountOut << headers << endl;
+
+ string name, rest; int thisTotal;
+ while (!in.eof()) {
+
+ if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; }
+
+ in >> name; m->gobble(in);
+ in >> thisTotal; m->gobble(in);
+ rest = m->getline(in); m->gobble(in);
+
+ it = badSeqNames.find(name);
+
+ if(it != badSeqNames.end()){
+ badSeqNames.erase(it);
+ }
+ else{
+ goodCountOut << name << '\t' << thisTotal << '\t' << rest << endl;
+ }
+ }
+
+ if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; }
+
+ //we were unable to remove some of the bad sequences
+ if (badSeqNames.size() != 0) {
+ for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
+ m->mothurOut("Your count file does not include the sequence " + *it + " please correct.");
+ m->mothurOutEndLine();
+ }
+ }
+
+ in.close();
+ goodCountOut.close();
+
+ //check for groups that have been eliminated
+ CountTable ct;
+ if (ct.testGroups(goodCountFile)) {
+ ct.readTable(goodCountFile);
+ ct.printTable(goodCountFile);
+ }
+
+ if (m->control_pressed) { m->mothurRemove(goodCountFile); }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ScreenSeqsCommand", "screenCountFile");
+ exit(1);
+ }
+}
+//***************************************************************************************************************
+
+int ScreenSeqsCommand::screenAlignReport(set<string> badSeqNames){
+ try {
+ ifstream inputAlignReport;
+ m->openInputFile(alignreport, inputAlignReport);
+ string seqName, group;
+ set<string>::iterator it;
+
+ string goodAlignReportFile = outputDir + m->getRootName(m->getSimpleName(alignreport)) + getOutputFileNameTag("alignreport");
+ outputNames.push_back(goodAlignReportFile); outputTypes["alignreport"].push_back(goodAlignReportFile);
+ ofstream goodAlignReportOut; m->openOutputFile(goodAlignReportFile, goodAlignReportOut);
+
+ while (!inputAlignReport.eof()) { // need to copy header
+ char c = inputAlignReport.get();
+ goodAlignReportOut << c;
+ if (c == 10 || c == 13){ break; }
+ }
+
+ while(!inputAlignReport.eof()){
+ if (m->control_pressed) { goodAlignReportOut.close(); inputAlignReport.close(); m->mothurRemove(goodAlignReportFile); return 0; }
+
+ inputAlignReport >> seqName;
+ it = badSeqNames.find(seqName);
+ string line;
+ while (!inputAlignReport.eof()) { // need to copy header
+ char c = inputAlignReport.get();
+ line += c;
+ if (c == 10 || c == 13){ break; }
+ }
+
+ if(it != badSeqNames.end()){
+ badSeqNames.erase(it);
+ }
+ else{
+ goodAlignReportOut << seqName << '\t' << line;
+ }
+ m->gobble(inputAlignReport);
+ }
+
+ if (m->control_pressed) { goodAlignReportOut.close(); inputAlignReport.close(); m->mothurRemove(goodAlignReportFile); return 0; }
+
+ //we were unable to remove some of the bad sequences
+ if (badSeqNames.size() != 0) {
+ for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
+ m->mothurOut("Your alignreport file does not include the sequence " + *it + " please correct.");
+ m->mothurOutEndLine();
+ }
+ }
+
+ inputAlignReport.close();
+ goodAlignReportOut.close();
+
+ if (m->control_pressed) { m->mothurRemove(goodAlignReportFile); return 0; }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ScreenSeqsCommand", "screenAlignReport");
+ exit(1);
+ }
+
+}
+//***************************************************************************************************************
+
+int ScreenSeqsCommand::screenTaxonomy(set<string> badSeqNames){
+ try {
+ ifstream input;
+ m->openInputFile(taxonomy, input);
+ string seqName, tax;
+ set<string>::iterator it;
+
+ string goodTaxFile = outputDir + m->getRootName(m->getSimpleName(taxonomy)) + getOutputFileNameTag("taxonomy", taxonomy);
+ outputNames.push_back(goodTaxFile); outputTypes["taxonomy"].push_back(goodTaxFile);
+ ofstream goodTaxOut; m->openOutputFile(goodTaxFile, goodTaxOut);
+
+ while(!input.eof()){
+ if (m->control_pressed) { goodTaxOut.close(); input.close(); m->mothurRemove(goodTaxFile); return 0; }
+
+ input >> seqName >> tax;
+ it = badSeqNames.find(seqName);
+
+ if(it != badSeqNames.end()){ badSeqNames.erase(it); }
+ else{
+ goodTaxOut << seqName << '\t' << tax << endl;
+ }
+ m->gobble(input);
+ }
+
+ if (m->control_pressed) { goodTaxOut.close(); input.close(); m->mothurRemove(goodTaxFile); return 0; }
+
+ //we were unable to remove some of the bad sequences
+ if (badSeqNames.size() != 0) {
+ for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
+ m->mothurOut("Your taxonomy file does not include the sequence " + *it + " please correct.");
+ m->mothurOutEndLine();
+ }
+ }
+
+ input.close();
+ goodTaxOut.close();
+
+ if (m->control_pressed) { m->mothurRemove(goodTaxFile); return 0; }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ScreenSeqsCommand", "screenTaxonomy");
+ exit(1);
+ }
+
+}
+//***************************************************************************************************************
+
+int ScreenSeqsCommand::screenQual(set<string> badSeqNames){
+ try {
+ ifstream in;
+ m->openInputFile(qualfile, in);
+ set<string>::iterator it;
+
+ string goodQualFile = outputDir + m->getRootName(m->getSimpleName(qualfile)) + getOutputFileNameTag("qfile", qualfile);
+ outputNames.push_back(goodQualFile); outputTypes["qfile"].push_back(goodQualFile);
+ ofstream goodQual; m->openOutputFile(goodQualFile, goodQual);
+
+ while(!in.eof()){
+
+ if (m->control_pressed) { goodQual.close(); in.close(); m->mothurRemove(goodQualFile); return 0; }
+
+ string saveName = "";
+ string name = "";
+ string scores = "";
+
+ in >> name;
+
+ if (name.length() != 0) {
+ saveName = name.substr(1);
+ while (!in.eof()) {
+ char c = in.get();
+ if (c == 10 || c == 13){ break; }
+ else { name += c; }
+ }
+ m->gobble(in);
+ }
+
+ while(in){
+ char letter= in.get();
+ if(letter == '>'){ in.putback(letter); break; }
+ else{ scores += letter; }
+ }
+
+ m->gobble(in);
+
+ it = badSeqNames.find(saveName);
+
+ if(it != badSeqNames.end()){
+ badSeqNames.erase(it);
+ }else{
+ goodQual << name << endl << scores;
+ }
+
+ m->gobble(in);
+ }
+
+ in.close();
+ goodQual.close();
+
+ //we were unable to remove some of the bad sequences
+ if (badSeqNames.size() != 0) {
+ for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
+ m->mothurOut("Your qual file does not include the sequence " + *it + " please correct.");
+ m->mothurOutEndLine();
+ }
+ }
+
+ if (m->control_pressed) { m->mothurRemove(goodQualFile); return 0; }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ScreenSeqsCommand", "screenQual");
+ exit(1);
+ }
+
+}
+//**********************************************************************************************************************
+
+int ScreenSeqsCommand::driver(linePair filePos, string goodFName, string badAccnosFName, string filename, set<string>& badSeqNames){
+ try {
+ ofstream goodFile;
+ m->openOutputFile(goodFName, goodFile);
+
+ ofstream badAccnosFile;
+ m->openOutputFile(badAccnosFName, badAccnosFile);
+
+ ifstream inFASTA;
+ m->openInputFile(filename, inFASTA);
+
+ inFASTA.seekg(filePos.start);
+
+ bool done = false;
+ int count = 0;
+
+ while (!done) {
+
+ if (m->control_pressed) { return 0; }
+
+ Sequence currSeq(inFASTA); m->gobble(inFASTA);
+ if (currSeq.getName() != "") {
+ bool goodSeq = 1; // innocent until proven guilty
+ if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos()) { goodSeq = 0; }
+ if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos()) { goodSeq = 0; }
+ if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases()) { goodSeq = 0; }
+ if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer()) { goodSeq = 0; }
+ if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases()) { goodSeq = 0; }
+ if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases()) { goodSeq = 0; }
+
+ if(goodSeq == 1){
+ currSeq.printSequence(goodFile);
+ }
+ else{
+ badAccnosFile << currSeq.getName() << endl;
+ badSeqNames.insert(currSeq.getName());
+ }
+ count++;
+ }
+
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ unsigned long long pos = inFASTA.tellg();
+ if ((pos == -1) || (pos >= filePos.end)) { break; }
+ #else
+ if (inFASTA.eof()) { break; }
+ #endif
+
+ //report progress
+ if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
+ }
+ //report progress
+ if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
+
+
+ goodFile.close();
+ inFASTA.close();
+ badAccnosFile.close();
+
+ return count;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ScreenSeqsCommand", "driver");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+#ifdef USE_MPI
+int ScreenSeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& goodFile, MPI_File& badAccnosFile, vector<unsigned long long>& MPIPos, set<string>& badSeqNames){
+ try {
+ string outputString = "";
+ MPI_Status statusGood;
+ MPI_Status statusBadAccnos;
+ MPI_Status status;
+ int pid;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+
+ for(int i=0;i<num;i++){
+
+ if (m->control_pressed) { return 0; }
+
+ //read next sequence
+ int length = MPIPos[start+i+1] - MPIPos[start+i];
+
+ char* buf4 = new char[length];
+
+ MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
+
+ string tempBuf = buf4; delete buf4;
+ if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
+ istringstream iss (tempBuf,istringstream::in);
+
+ Sequence currSeq(iss);
+
+ //process seq
+ if (currSeq.getName() != "") {
+ bool goodSeq = 1; // innocent until proven guilty
+ if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos()) { goodSeq = 0; }
+ if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos()) { goodSeq = 0; }
+ if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases()) { goodSeq = 0; }
+ if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer()) { goodSeq = 0; }
+ if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases()) { goodSeq = 0; }
+ if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases()) { goodSeq = 0; }
+
+ if(goodSeq == 1){
+ outputString = ">" + currSeq.getName() + "\n" + currSeq.getAligned() + "\n";
+
+ //print good seq
+ length = outputString.length();
+ char* buf2 = new char[length];
+ memcpy(buf2, outputString.c_str(), length);
+
+ MPI_File_write_shared(goodFile, buf2, length, MPI_CHAR, &statusGood);
+ delete buf2;
+ }
+ else{
+
+ badSeqNames.insert(currSeq.getName());
+
+ //write to bad accnos file
+ outputString = currSeq.getName() + "\n";
+
+ length = outputString.length();
+ char* buf3 = new char[length];
+ memcpy(buf3, outputString.c_str(), length);
+
+ MPI_File_write_shared(badAccnosFile, buf3, length, MPI_CHAR, &statusBadAccnos);
+ delete buf3;
+ }
+ }
+
+ //report progress
+ if((i) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(i)); m->mothurOutEndLine(); }
+ }
+
+ return 1;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ScreenSeqsCommand", "driverMPI");
+ exit(1);
+ }
+}
+#endif
+/**************************************************************************************************/
+
+int ScreenSeqsCommand::createProcesses(string goodFileName, string badAccnos, string filename, set<string>& badSeqNames) {
+ try {
+
+ vector<int> processIDS;
+ int process = 1;
+ int num = 0;
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+
+ //loop through and create all the processes you want
+ while (process != processors) {
+ int pid = fork();
+
+ if (pid > 0) {
+ processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
+ process++;
+ }else if (pid == 0){
+ num = driver(lines[process], goodFileName + toString(getpid()) + ".temp", badAccnos + toString(getpid()) + ".temp", filename, badSeqNames);
+
+ //pass numSeqs to parent
+ ofstream out;
+ string tempFile = filename + toString(getpid()) + ".num.temp";
+ m->openOutputFile(tempFile, out);
+ out << num << endl;
+ out.close();
+
+ exit(0);
+ }else {
+ m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+ for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+ exit(0);
+ }
+ }
+
+ num = driver(lines[0], goodFileName, badAccnos, filename, badSeqNames);
+
+ //force parent to wait until all the processes are done
+ for (int i=0;i<processIDS.size();i++) {
+ int temp = processIDS[i];
+ wait(&temp);
+ }
+
+ for (int i = 0; i < processIDS.size(); i++) {
+ ifstream in;
+ string tempFile = filename + toString(processIDS[i]) + ".num.temp";
+ m->openInputFile(tempFile, in);
+ if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
+ in.close(); m->mothurRemove(tempFile);
+
+ m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
+ m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
+
+ m->appendFiles((badAccnos + toString(processIDS[i]) + ".temp"), badAccnos);
+ m->mothurRemove((badAccnos + toString(processIDS[i]) + ".temp"));
+ }
+
+ //read badSeqs in because root process doesnt know what other "bad" seqs the children found
+ ifstream inBad;
+ int ableToOpen = m->openInputFile(badAccnos, inBad, "no error");
+
+ if (ableToOpen == 0) {
+ badSeqNames.clear();
+ string tempName;
+ while (!inBad.eof()) {
+ inBad >> tempName; m->gobble(inBad);
+ badSeqNames.insert(tempName);
+ }
+ inBad.close();
+ }
+#else
+
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+ //Windows version shared memory, so be careful when passing variables through the sumScreenData struct.
+ //Above fork() will clone, so memory is separate, but that's not the case with windows,
+ //Taking advantage of shared memory to allow both threads to add info to badSeqNames.
+ //////////////////////////////////////////////////////////////////////////////////////////////////////
+
+ vector<sumScreenData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+
+ //Create processor worker threads.
+ for( int i=0; i<processors-1; i++ ){
+
+ string extension = "";
+ if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
+
+ // Allocate memory for thread data.
+ sumScreenData* tempSum = new sumScreenData(startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, filename, m, lines[i].start, lines[i].end,goodFileName+extension, badAccnos+extension);
+ pDataArray.push_back(tempSum);
+
+ //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
+ hThreadArray[i] = CreateThread(NULL, 0, MySumScreenThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);
+ }
+
+ //do your part
+ num = driver(lines[processors-1], (goodFileName+toString(processors-1)+".temp"), (badAccnos+toString(processors-1)+".temp"), filename, badSeqNames);
+ processIDS.push_back(processors-1);
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ num += pDataArray[i]->count;
+ for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) { badSeqNames.insert(*it); }
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
+
+ for (int i = 0; i < processIDS.size(); i++) {
+ m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
+ m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
+
+ m->appendFiles((badAccnos + toString(processIDS[i]) + ".temp"), badAccnos);
+ m->mothurRemove((badAccnos + toString(processIDS[i]) + ".temp"));
+ }
+
+#endif
+
+ return num;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ScreenSeqsCommand", "createProcesses");
+ exit(1);