]> git.donarmstrong.com Git - mothur.git/blob - screenseqscommand.cpp
1.19.0
[mothur.git] / screenseqscommand.cpp
1 /*
2  *  screenseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/3/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "screenseqscommand.h"
11 #include "sequence.hpp"
12
13 //**********************************************************************************************************************
14 vector<string> ScreenSeqsCommand::setParameters(){      
15         try {
16                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
17                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
18                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
19                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
20                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(palignreport);
21                 CommandParameter pstart("start", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pstart);
22                 CommandParameter pend("end", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pend);
23                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
24                 CommandParameter pmaxhomop("maxhomop", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxhomop);
25                 CommandParameter pminlength("minlength", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pminlength);
26                 CommandParameter pmaxlength("maxlength", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxlength);
27                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
28                 CommandParameter pcriteria("criteria", "Number", "", "90", "", "", "",false,false); parameters.push_back(pcriteria);
29                 CommandParameter poptimize("optimize", "Multiple", "none-start-end-maxambig-maxhomop-minlength-maxlength", "none", "", "", "",true,false); parameters.push_back(poptimize);
30                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
31                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
32                 
33                 vector<string> myArray;
34                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
35                 return myArray;
36         }
37         catch(exception& e) {
38                 m->errorOut(e, "ScreenSeqsCommand", "setParameters");
39                 exit(1);
40         }
41 }
42 //**********************************************************************************************************************
43 string ScreenSeqsCommand::getHelpString(){      
44         try {
45                 string helpString = "";
46                 helpString += "The screen.seqs command reads a fastafile and creates .....\n";
47                 helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, qfile, optimize, criteria and processors.\n";
48                 helpString += "The fasta parameter is required.\n";
49                 helpString += "The start parameter .... The default is -1.\n";
50                 helpString += "The end parameter .... The default is -1.\n";
51                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
52                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
53                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
54                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
55                 helpString += "The processors parameter allows you to specify the number of processors to use while running the command. The default is 1.\n";
56                 helpString += "The optimize and criteria parameters allow you set the start, end, maxabig, maxhomop, minlength and maxlength parameters relative to your set of sequences .\n";
57                 helpString += "For example optimize=start-end, criteria=90, would set the start and end values to the position 90% of your sequences started and ended.\n";
58                 helpString += "The name parameter allows you to provide a namesfile, and the group parameter allows you to provide a groupfile.\n";
59                 helpString += "The screen.seqs command should be in the following format: \n";
60                 helpString += "screen.seqs(fasta=yourFastaFile, name=youNameFile, group=yourGroupFIle, start=yourStart, end=yourEnd, maxambig=yourMaxambig,  \n";
61                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
62                 helpString += "Example screen.seqs(fasta=abrecovery.fasta, name=abrecovery.names, group=abrecovery.groups, start=..., end=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
63                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
64                 return helpString;
65         }
66         catch(exception& e) {
67                 m->errorOut(e, "ScreenSeqsCommand", "getHelpString");
68                 exit(1);
69         }
70 }
71 //**********************************************************************************************************************
72 ScreenSeqsCommand::ScreenSeqsCommand(){ 
73         try {
74                 abort = true; calledHelp = true; 
75                 setParameters();
76                 vector<string> tempOutNames;
77                 outputTypes["fasta"] = tempOutNames;
78                 outputTypes["name"] = tempOutNames;
79                 outputTypes["group"] = tempOutNames;
80                 outputTypes["alignreport"] = tempOutNames;
81                 outputTypes["accnos"] = tempOutNames;
82                 outputTypes["qfile"] = tempOutNames;
83         }
84         catch(exception& e) {
85                 m->errorOut(e, "ScreenSeqsCommand", "ScreenSeqsCommand");
86                 exit(1);
87         }
88 }
89 //***************************************************************************************************************
90
91 ScreenSeqsCommand::ScreenSeqsCommand(string option)  {
92         try {
93                 abort = false; calledHelp = false;   
94                 
95                 //allow user to run help
96                 if(option == "help") { help(); abort = true; calledHelp = true; }
97                 
98                 else {
99                         vector<string> myArray = setParameters();
100                         
101                         OptionParser parser(option);
102                         map<string,string> parameters = parser.getParameters();
103                         
104                         ValidParameters validParameter("screen.seqs");
105                         map<string,string>::iterator it;
106                         
107                         //check to make sure all parameters are valid for command
108                         for (it = parameters.begin(); it != parameters.end(); it++) { 
109                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
110                         }
111                         
112                         //initialize outputTypes
113                         vector<string> tempOutNames;
114                         outputTypes["fasta"] = tempOutNames;
115                         outputTypes["name"] = tempOutNames;
116                         outputTypes["group"] = tempOutNames;
117                         outputTypes["alignreport"] = tempOutNames;
118                         outputTypes["accnos"] = tempOutNames;
119                         outputTypes["qfile"] = tempOutNames;
120                         
121                         //if the user changes the input directory command factory will send this info to us in the output parameter 
122                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
123                         if (inputDir == "not found"){   inputDir = "";          }
124                         else {
125                                 string path;
126                                 it = parameters.find("fasta");
127                                 //user has given a template file
128                                 if(it != parameters.end()){ 
129                                         path = m->hasPath(it->second);
130                                         //if the user has not given a path then, add inputdir. else leave path alone.
131                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
132                                 }
133                                 
134                                 it = parameters.find("group");
135                                 //user has given a template file
136                                 if(it != parameters.end()){ 
137                                         path = m->hasPath(it->second);
138                                         //if the user has not given a path then, add inputdir. else leave path alone.
139                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
140                                 }
141                                 
142                                 it = parameters.find("name");
143                                 //user has given a template file
144                                 if(it != parameters.end()){ 
145                                         path = m->hasPath(it->second);
146                                         //if the user has not given a path then, add inputdir. else leave path alone.
147                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
148                                 }
149                                 
150                                 it = parameters.find("alignreport");
151                                 //user has given a template file
152                                 if(it != parameters.end()){ 
153                                         path = m->hasPath(it->second);
154                                         //if the user has not given a path then, add inputdir. else leave path alone.
155                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
156                                 }
157                                 
158                                 it = parameters.find("qfile");
159                                 //user has given a template file
160                                 if(it != parameters.end()){ 
161                                         path = m->hasPath(it->second);
162                                         //if the user has not given a path then, add inputdir. else leave path alone.
163                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
164                                 }
165                                 
166                         }
167
168                         //check for required parameters
169                         fastafile = validParameter.validFile(parameters, "fasta", true);
170                         if (fastafile == "not found") {                         
171                                 fastafile = m->getFastaFile(); 
172                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
173                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
174                         }
175                         else if (fastafile == "not open") { abort = true; }     
176         
177                         groupfile = validParameter.validFile(parameters, "group", true);
178                         if (groupfile == "not open") { abort = true; }  
179                         else if (groupfile == "not found") { groupfile = ""; }
180                         
181                         qualfile = validParameter.validFile(parameters, "qfile", true);
182                         if (qualfile == "not open") { abort = true; }   
183                         else if (qualfile == "not found") { qualfile = ""; }
184                         
185                         namefile = validParameter.validFile(parameters, "name", true);
186                         if (namefile == "not open") { namefile = ""; abort = true; }
187                         else if (namefile == "not found") { namefile = ""; }    
188
189                         alignreport = validParameter.validFile(parameters, "alignreport", true);
190                         if (alignreport == "not open") { abort = true; }
191                         else if (alignreport == "not found") { alignreport = ""; }      
192                         
193                         //if the user changes the output directory command factory will send this info to us in the output parameter 
194                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
195                                 outputDir = ""; 
196                                 outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it       
197                         }
198
199                         //check for optional parameter and set defaults
200                         // ...at some point should added some additional type checking...
201                         string temp;
202                         temp = validParameter.validFile(parameters, "start", false);            if (temp == "not found") { temp = "-1"; }
203                         convert(temp, startPos); 
204                 
205                         temp = validParameter.validFile(parameters, "end", false);                      if (temp == "not found") { temp = "-1"; }
206                         convert(temp, endPos);  
207
208                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
209                         convert(temp, maxAmbig);  
210
211                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "-1"; }
212                         convert(temp, maxHomoP);  
213
214                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "-1"; }
215                         convert(temp, minLength); 
216                         
217                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "-1"; }
218                         convert(temp, maxLength); 
219                         
220                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
221                         m->setProcessors(temp);
222                         convert(temp, processors);
223                         
224                         temp = validParameter.validFile(parameters, "optimize", false); //optimizing trumps the optimized values original value
225                         if (temp == "not found"){       temp = "none";          }
226                         m->splitAtDash(temp, optimize);         
227                         
228                         //check for invalid optimize options
229                         set<string> validOptimizers;
230                         validOptimizers.insert("none"); validOptimizers.insert("start"); validOptimizers.insert("end"); validOptimizers.insert("maxambig"); validOptimizers.insert("maxhomop"); validOptimizers.insert("minlength"); validOptimizers.insert("maxlength");
231                         for (int i = 0; i < optimize.size(); i++) { 
232                                 if (validOptimizers.count(optimize[i]) == 0) { 
233                                         m->mothurOut(optimize[i] + " is not a valid optimizer. Valid options are start, end, maxambig, maxhomop, minlength and maxlength."); m->mothurOutEndLine();
234                                         optimize.erase(optimize.begin()+i);
235                                         i--;
236                                 }
237                         }
238                         
239                         if (optimize.size() == 1) { if (optimize[0] == "none") { optimize.clear(); } }
240                         
241                         temp = validParameter.validFile(parameters, "criteria", false); if (temp == "not found"){       temp = "90";                            }
242                         convert(temp, criteria); 
243                 }
244
245         }
246         catch(exception& e) {
247                 m->errorOut(e, "ScreenSeqsCommand", "ScreenSeqsCommand");
248                 exit(1);
249         }
250 }
251 //***************************************************************************************************************
252
253 int ScreenSeqsCommand::execute(){
254         try{
255                 
256                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
257                 
258                 //if the user want to optimize we need to know the 90% mark
259                 vector<unsigned long int> positions;
260                 if (optimize.size() != 0) {  //get summary is paralellized so we need to divideFile, no need to do this step twice so I moved it here
261                         //use the namefile to optimize correctly
262                         if (namefile != "") { nameMap = m->readNames(namefile); }
263                         getSummary(positions); 
264                 } 
265                 else { 
266                         positions = m->divideFile(fastafile, processors);
267                         for (int i = 0; i < (positions.size()-1); i++) {
268                                 lines.push_back(new linePair(positions[i], positions[(i+1)]));
269                         }       
270                 }
271                                 
272                 string goodSeqFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + "good" + m->getExtension(fastafile);
273                 string badAccnosFile =  outputDir + m->getRootName(m->getSimpleName(fastafile)) + "bad.accnos";
274                 
275                 int numFastaSeqs = 0;
276                 set<string> badSeqNames;
277                 int start = time(NULL);
278                 
279 #ifdef USE_MPI  
280                         int pid, numSeqsPerProcessor; 
281                         int tag = 2001;
282                         vector<unsigned long int> MPIPos;
283                         
284                         MPI_Status status; 
285                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
286                         MPI_Comm_size(MPI_COMM_WORLD, &processors); 
287
288                         MPI_File inMPI;
289                         MPI_File outMPIGood;
290                         MPI_File outMPIBadAccnos;
291                         
292                         int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
293                         int inMode=MPI_MODE_RDONLY; 
294                         
295                         char outGoodFilename[1024];
296                         strcpy(outGoodFilename, goodSeqFile.c_str());
297
298                         char outBadAccnosFilename[1024];
299                         strcpy(outBadAccnosFilename, badAccnosFile.c_str());
300
301                         char inFileName[1024];
302                         strcpy(inFileName, fastafile.c_str());
303                         
304                         MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
305                         MPI_File_open(MPI_COMM_WORLD, outGoodFilename, outMode, MPI_INFO_NULL, &outMPIGood);
306                         MPI_File_open(MPI_COMM_WORLD, outBadAccnosFilename, outMode, MPI_INFO_NULL, &outMPIBadAccnos);
307                         
308                         if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIGood); MPI_File_close(&outMPIBadAccnos); return 0; }
309                         
310                         if (pid == 0) { //you are the root process 
311                                 
312                                 MPIPos = m->setFilePosFasta(fastafile, numFastaSeqs); //fills MPIPos, returns numSeqs
313                                 
314                                 //send file positions to all processes
315                                 for(int i = 1; i < processors; i++) { 
316                                         MPI_Send(&numFastaSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
317                                         MPI_Send(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
318                                 }
319                                 
320                                 //figure out how many sequences you have to align
321                                 numSeqsPerProcessor = numFastaSeqs / processors;
322                                 int startIndex =  pid * numSeqsPerProcessor;
323                                 if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
324                 //      cout << pid << '\t' << numSeqsPerProcessor << '\t' <<   startIndex << endl;
325                                 //align your part
326                                 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIGood, outMPIBadAccnos, MPIPos, badSeqNames);
327                 //cout << pid << " done" << endl;
328                                 if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIGood);  MPI_File_close(&outMPIBadAccnos);  return 0; }
329
330                                 for (int i = 1; i < processors; i++) {
331                                 
332                                         //get bad lists
333                                         int badSize;
334                                         MPI_Recv(&badSize, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
335                                         /*for (int j = 0; j < badSize; j++) {
336                                                 int length;
337                                                 MPI_Recv(&length, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);  //recv the length of the name
338                                                 char* buf2 = new char[length];                                                                          //make space to recieve it
339                                                 MPI_Recv(buf2, length, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status);  //get name
340                                                 
341                                                 string tempBuf = buf2;
342                                                 if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
343                                                 delete buf2;
344                                                 
345                                                 badSeqNames.insert(tempBuf);
346                                         }*/
347                                 }
348                         }else{ //you are a child process
349                                 MPI_Recv(&numFastaSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
350                                 MPIPos.resize(numFastaSeqs+1);
351                                 MPI_Recv(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
352
353                                 //figure out how many sequences you have to align
354                                 numSeqsPerProcessor = numFastaSeqs / processors;
355                                 int startIndex =  pid * numSeqsPerProcessor;
356                                 if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
357                 //cout << pid << '\t' << numSeqsPerProcessor << '\t' <<         startIndex << endl;             
358                                 //align your part
359                                 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIGood, outMPIBadAccnos, MPIPos, badSeqNames);
360 //cout << pid << " done" << endl;
361                                 if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIGood);  MPI_File_close(&outMPIBadAccnos); return 0; }
362                                 
363                                 //send bad list 
364                                 int badSize = badSeqNames.size();
365                                 MPI_Send(&badSize, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
366                                 
367                                 /*
368                                 set<string>::iterator it;
369                                 for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
370                                         string name = *it;
371                                         int length = name.length();
372                                         char* buf2 = new char[length];
373                                         memcpy(buf2, name.c_str(), length);
374                                         
375                                         MPI_Send(&length, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
376                                         MPI_Send(buf2, length, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
377                                 }*/
378                         }
379                         
380                         //close files 
381                         MPI_File_close(&inMPI);
382                         MPI_File_close(&outMPIGood);
383                         MPI_File_close(&outMPIBadAccnos);
384                         MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
385                                         
386 #else
387                                                 
388         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
389                         if(processors == 1){
390                                 numFastaSeqs = driver(lines[0], goodSeqFile, badAccnosFile, fastafile, badSeqNames);
391                                 
392                                 if (m->control_pressed) { remove(goodSeqFile.c_str()); return 0; }
393                                 
394                         }else{
395                                 processIDS.resize(0);
396                                 
397                                 numFastaSeqs = createProcesses(goodSeqFile, badAccnosFile, fastafile, badSeqNames); 
398                                 
399                                 rename((goodSeqFile + toString(processIDS[0]) + ".temp").c_str(), goodSeqFile.c_str());
400                                 rename((badAccnosFile + toString(processIDS[0]) + ".temp").c_str(), badAccnosFile.c_str());
401                                 
402                                 //append alignment and report files
403                                 for(int i=1;i<processors;i++){
404                                         m->appendFiles((goodSeqFile + toString(processIDS[i]) + ".temp"), goodSeqFile);
405                                         remove((goodSeqFile + toString(processIDS[i]) + ".temp").c_str());
406                         
407                                         m->appendFiles((badAccnosFile + toString(processIDS[i]) + ".temp"), badAccnosFile);
408                                         remove((badAccnosFile + toString(processIDS[i]) + ".temp").c_str());
409                                 }
410                                 
411                                 if (m->control_pressed) { remove(goodSeqFile.c_str()); return 0; }
412                                 
413                                 //read badSeqs in because root process doesnt know what other "bad" seqs the children found
414                                 ifstream inBad;
415                                 int ableToOpen = m->openInputFile(badAccnosFile, inBad, "no error");
416                                 
417                                 if (ableToOpen == 0) {
418                                         badSeqNames.clear();
419                                         string tempName;
420                                         while (!inBad.eof()) {
421                                                 inBad >> tempName; m->gobble(inBad);
422                                                 badSeqNames.insert(tempName);
423                                         }
424                                         inBad.close();
425                                 }
426                         }
427         #else
428                         numFastaSeqs = driver(lines[0], goodSeqFile, badAccnosFile, fastafile, badSeqNames);
429                         
430                         if (m->control_pressed) { remove(goodSeqFile.c_str()); return 0; }
431                         
432         #endif
433
434 #endif          
435
436                 #ifdef USE_MPI
437                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
438                                         
439                         if (pid == 0) { //only one process should fix files
440                         
441                                 //read accnos file with all names in it, process 0 just has its names
442                                 MPI_File inMPIAccnos;
443                                 MPI_Offset size;
444                         
445                                 char inFileName[1024];
446                                 strcpy(inFileName, badAccnosFile.c_str());
447                         
448                                 MPI_File_open(MPI_COMM_SELF, inFileName, inMode, MPI_INFO_NULL, &inMPIAccnos);  //comm, filename, mode, info, filepointer
449                                 MPI_File_get_size(inMPIAccnos, &size);
450                         
451                                 char* buffer = new char[size];
452                                 MPI_File_read(inMPIAccnos, buffer, size, MPI_CHAR, &status);
453                         
454                                 string tempBuf = buffer;
455                                 if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size);  }
456                                 istringstream iss (tempBuf,istringstream::in);
457
458                                 delete buffer;
459                                 MPI_File_close(&inMPIAccnos);
460                                 
461                                 badSeqNames.clear();
462                                 string tempName;
463                                 while (!iss.eof()) {
464                                         iss >> tempName; m->gobble(iss);
465                                         badSeqNames.insert(tempName);
466                                 }
467                 #endif
468                                                                                                                                                                         
469                 if(namefile != "" && groupfile != "")   {       
470                         screenNameGroupFile(badSeqNames);       
471                         if (m->control_pressed) {  remove(goodSeqFile.c_str()); return 0; }
472                 }else if(namefile != "")        {       
473                         screenNameGroupFile(badSeqNames);
474                         if (m->control_pressed) {  remove(goodSeqFile.c_str());  return 0; }    
475                 }else if(groupfile != "")                               {       screenGroupFile(badSeqNames);           }       // this screens just the group
476                 
477                 if (m->control_pressed) { remove(goodSeqFile.c_str());  return 0; }
478
479                 if(alignreport != "")                                   {       screenAlignReport(badSeqNames);         }
480                 if(qualfile != "")                                              {       screenQual(badSeqNames);                        }
481                 
482                 if (m->control_pressed) { remove(goodSeqFile.c_str());  return 0; }
483                 
484                 #ifdef USE_MPI
485                         }
486                 #endif
487
488                 m->mothurOutEndLine();
489                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
490                 m->mothurOut(goodSeqFile); m->mothurOutEndLine();       outputTypes["fasta"].push_back(goodSeqFile);
491                 m->mothurOut(badAccnosFile); m->mothurOutEndLine();      outputTypes["accnos"].push_back(badAccnosFile);
492                 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
493                 m->mothurOutEndLine();
494                 m->mothurOutEndLine();
495                 
496                 //set fasta file as new current fastafile
497                 string current = "";
498                 itTypes = outputTypes.find("fasta");
499                 if (itTypes != outputTypes.end()) {
500                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
501                 }
502                 
503                 itTypes = outputTypes.find("name");
504                 if (itTypes != outputTypes.end()) {
505                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
506                 }
507                 
508                 itTypes = outputTypes.find("group");
509                 if (itTypes != outputTypes.end()) {
510                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
511                 }
512                 
513                 itTypes = outputTypes.find("qfile");
514                 if (itTypes != outputTypes.end()) {
515                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
516                 }
517
518                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
519                 m->mothurOutEndLine();
520
521                 return 0;
522         }
523         catch(exception& e) {
524                 m->errorOut(e, "ScreenSeqsCommand", "execute");
525                 exit(1);
526         }
527 }
528
529 //***************************************************************************************************************
530
531 int ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
532         try {
533                 ifstream inputNames;
534                 m->openInputFile(namefile, inputNames);
535                 set<string> badSeqGroups;
536                 string seqName, seqList, group;
537                 set<string>::iterator it;
538
539                 string goodNameFile = outputDir + m->getRootName(m->getSimpleName(namefile)) + "good" + m->getExtension(namefile);
540                 outputNames.push_back(goodNameFile);  outputTypes["name"].push_back(goodNameFile);
541                 
542                 ofstream goodNameOut;   m->openOutputFile(goodNameFile, goodNameOut);
543                 
544                 while(!inputNames.eof()){
545                         if (m->control_pressed) { goodNameOut.close();  inputNames.close(); remove(goodNameFile.c_str());  return 0; }
546
547                         inputNames >> seqName >> seqList;
548                         it = badSeqNames.find(seqName);
549                                 
550                         if(it != badSeqNames.end()){
551                                 badSeqNames.erase(it);
552                                 
553                                 if(namefile != ""){
554                                         int start = 0;
555                                         for(int i=0;i<seqList.length();i++){
556                                                 if(seqList[i] == ','){
557                                                         badSeqGroups.insert(seqList.substr(start,i-start));
558                                                         start = i+1;
559                                                 }                                       
560                                         }
561                                         badSeqGroups.insert(seqList.substr(start,seqList.length()-start));
562                                 }
563                         }
564                         else{
565                                 goodNameOut << seqName << '\t' << seqList << endl;
566                         }
567                         m->gobble(inputNames);
568                 }
569                 inputNames.close();
570                 goodNameOut.close();
571         
572                 //we were unable to remove some of the bad sequences
573                 if (badSeqNames.size() != 0) {
574                         for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
575                                 m->mothurOut("Your namefile does not include the sequence " + *it + " please correct."); 
576                                 m->mothurOutEndLine();
577                         }
578                 }
579
580                 if(groupfile != ""){
581                         
582                         ifstream inputGroups;
583                         m->openInputFile(groupfile, inputGroups);
584
585                         string goodGroupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "good" + m->getExtension(groupfile);
586                         outputNames.push_back(goodGroupFile);   outputTypes["group"].push_back(goodGroupFile);
587                         
588                         ofstream goodGroupOut;  m->openOutputFile(goodGroupFile, goodGroupOut);
589                         
590                         while(!inputGroups.eof()){
591                                 if (m->control_pressed) { goodGroupOut.close(); inputGroups.close(); remove(goodNameFile.c_str());  remove(goodGroupFile.c_str()); return 0; }
592
593                                 inputGroups >> seqName >> group;
594                                 
595                                 it = badSeqGroups.find(seqName);
596                                 
597                                 if(it != badSeqGroups.end()){
598                                         badSeqGroups.erase(it);
599                                 }
600                                 else{
601                                         goodGroupOut << seqName << '\t' << group << endl;
602                                 }
603                                 m->gobble(inputGroups);
604                         }
605                         inputGroups.close();
606                         goodGroupOut.close();
607                         
608                         //we were unable to remove some of the bad sequences
609                         if (badSeqGroups.size() != 0) {
610                                 for (it = badSeqGroups.begin(); it != badSeqGroups.end(); it++) {  
611                                         m->mothurOut("Your groupfile does not include the sequence " + *it + " please correct."); 
612                                         m->mothurOutEndLine();
613                                 }
614                         }
615                 }
616                 
617                 
618                 return 0;
619         
620         }
621         catch(exception& e) {
622                 m->errorOut(e, "ScreenSeqsCommand", "screenNameGroupFile");
623                 exit(1);
624         }
625 }
626 //***************************************************************************************************************
627 int ScreenSeqsCommand::getSummary(vector<unsigned long int>& positions){
628         try {
629                 
630                 vector<int> startPosition;
631                 vector<int> endPosition;
632                 vector<int> seqLength;
633                 vector<int> ambigBases;
634                 vector<int> longHomoPolymer;
635                 
636                 vector<unsigned long int> positions = m->divideFile(fastafile, processors);
637                                 
638                 for (int i = 0; i < (positions.size()-1); i++) {
639                         lines.push_back(new linePair(positions[i], positions[(i+1)]));
640                 }       
641                 
642                 int numSeqs = 0;
643                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
644                         if(processors == 1){
645                                 numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
646                         }else{
647                                 numSeqs = createProcessesCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile); 
648                         }
649                                 
650                         if (m->control_pressed) {  return 0; }
651                 #else
652                         numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
653                         if (m->control_pressed) {  return 0; }
654                 #endif
655
656                 sort(startPosition.begin(), startPosition.end());
657                 sort(endPosition.begin(), endPosition.end());
658                 sort(seqLength.begin(), seqLength.end());
659                 sort(ambigBases.begin(), ambigBases.end());
660                 sort(longHomoPolymer.begin(), longHomoPolymer.end());
661                 
662                 //numSeqs is the number of unique seqs, startPosition.size() is the total number of seqs, we want to optimize using all seqs
663                 int criteriaPercentile  = int(startPosition.size() * (criteria / (float) 100));
664                 
665                 for (int i = 0; i < optimize.size(); i++) {
666                         if (optimize[i] == "start") { startPos = startPosition[criteriaPercentile]; m->mothurOut("Optimizing start to " + toString(startPos) + "."); m->mothurOutEndLine(); }
667                         else if (optimize[i] == "end") { int endcriteriaPercentile = int(endPosition.size() * ((100 - criteria) / (float) 100));  endPos = endPosition[endcriteriaPercentile]; m->mothurOut("Optimizing end to " + toString(endPos) + "."); m->mothurOutEndLine();}
668                         else if (optimize[i] == "maxambig") { maxAmbig = ambigBases[criteriaPercentile]; m->mothurOut("Optimizing maxambig to " + toString(maxAmbig) + "."); m->mothurOutEndLine(); }
669                         else if (optimize[i] == "maxhomop") { maxHomoP = longHomoPolymer[criteriaPercentile]; m->mothurOut("Optimizing maxhomop to " + toString(maxHomoP) + "."); m->mothurOutEndLine(); }
670                         else if (optimize[i] == "minlength") { int mincriteriaPercentile = int(seqLength.size() * ((100 - criteria) / (float) 100)); minLength = seqLength[mincriteriaPercentile]; m->mothurOut("Optimizing minlength to " + toString(minLength) + "."); m->mothurOutEndLine(); }
671                         else if (optimize[i] == "maxlength") { maxLength = seqLength[criteriaPercentile]; m->mothurOut("Optimizing maxlength to " + toString(maxLength) + "."); m->mothurOutEndLine(); }
672                 }
673
674                 return 0;
675         }
676         catch(exception& e) {
677                 m->errorOut(e, "ScreenSeqsCommand", "getSummary");
678                 exit(1);
679         }
680 }
681 /**************************************************************************************/
682 int ScreenSeqsCommand::driverCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, string filename, linePair* filePos) {   
683         try {
684                 
685                 ifstream in;
686                 m->openInputFile(filename, in);
687                                 
688                 in.seekg(filePos->start);
689
690                 bool done = false;
691                 int count = 0;
692         
693                 while (!done) {
694                                 
695                         if (m->control_pressed) { in.close(); return 1; }
696                                         
697                         Sequence current(in); m->gobble(in);
698         
699                         if (current.getName() != "") {
700                                 int num = 1;
701                                 if (namefile != "") {
702                                         //make sure this sequence is in the namefile, else error 
703                                         map<string, int>::iterator it = nameMap.find(current.getName());
704                                         
705                                         if (it == nameMap.end()) { m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
706                                         else { num = it->second; }
707                                 }
708                                 
709                                 //for each sequence this sequence represents
710                                 for (int i = 0; i < num; i++) {
711                                         startPosition.push_back(current.getStartPos());
712                                         endPosition.push_back(current.getEndPos());
713                                         seqLength.push_back(current.getNumBases());
714                                         ambigBases.push_back(current.getAmbigBases());
715                                         longHomoPolymer.push_back(current.getLongHomoPolymer());
716                                 }
717                                 
718                                 count++;
719                         }
720                         
721                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
722                                 unsigned long int pos = in.tellg();
723                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
724                         #else
725                                 if (in.eof()) { break; }
726                         #endif
727                         
728                 }
729                 
730                 in.close();
731                 
732                 return count;
733         }
734         catch(exception& e) {
735                 m->errorOut(e, "ScreenSeqsCommand", "driverCreateSummary");
736                 exit(1);
737         }
738 }
739 /**************************************************************************************************/
740 int ScreenSeqsCommand::createProcessesCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, string filename) {
741         try {
742 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
743                 int process = 1;
744                 int num = 0;
745                 processIDS.clear();
746                 
747                 //loop through and create all the processes you want
748                 while (process != processors) {
749                         int pid = fork();
750                         
751                         if (pid > 0) {
752                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
753                                 process++;
754                         }else if (pid == 0){
755                                 num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[process]);
756                                 
757                                 //pass numSeqs to parent
758                                 ofstream out;
759                                 string tempFile = fastafile + toString(getpid()) + ".num.temp";
760                                 m->openOutputFile(tempFile, out);
761                                 
762                                 out << num << endl;
763                                 out << startPosition.size() << endl;
764                                 for (int k = 0; k < startPosition.size(); k++)          {               out << startPosition[k] << '\t'; }  out << endl;
765                                 for (int k = 0; k < endPosition.size(); k++)            {               out << endPosition[k] << '\t'; }  out << endl;
766                                 for (int k = 0; k < seqLength.size(); k++)                      {               out << seqLength[k] << '\t'; }  out << endl;
767                                 for (int k = 0; k < ambigBases.size(); k++)                     {               out << ambigBases[k] << '\t'; }  out << endl;
768                                 for (int k = 0; k < longHomoPolymer.size(); k++)        {               out << longHomoPolymer[k] << '\t'; }  out << endl;
769                                 
770                                 out.close();
771                                 
772                                 exit(0);
773                         }else { 
774                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
775                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
776                                 exit(0);
777                         }
778                 }
779                 
780                 num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
781                 
782                 //force parent to wait until all the processes are done
783                 for (int i=0;i<processIDS.size();i++) { 
784                         int temp = processIDS[i];
785                         wait(&temp);
786                 }
787                 
788                 //parent reads in and combine Filter info
789                 for (int i = 0; i < processIDS.size(); i++) {
790                         string tempFilename = fastafile + toString(processIDS[i]) + ".num.temp";
791                         ifstream in;
792                         m->openInputFile(tempFilename, in);
793                         
794                         int temp, tempNum;
795                         in >> tempNum; m->gobble(in); num += tempNum;
796                         in >> tempNum; m->gobble(in);
797                         for (int k = 0; k < tempNum; k++)                       {               in >> temp; startPosition.push_back(temp);              }               m->gobble(in);
798                         for (int k = 0; k < tempNum; k++)                       {               in >> temp; endPosition.push_back(temp);                }               m->gobble(in);
799                         for (int k = 0; k < tempNum; k++)                       {               in >> temp; seqLength.push_back(temp);                  }               m->gobble(in);
800                         for (int k = 0; k < tempNum; k++)                       {               in >> temp; ambigBases.push_back(temp);                 }               m->gobble(in);
801                         for (int k = 0; k < tempNum; k++)                       {               in >> temp; longHomoPolymer.push_back(temp);    }               m->gobble(in);
802                                 
803                         in.close();
804                         remove(tempFilename.c_str());
805                 }
806                 
807                 return num;
808 #endif          
809         }
810         catch(exception& e) {
811                 m->errorOut(e, "ScreenSeqsCommand", "createProcessesCreateSummary");
812                 exit(1);
813         }
814 }
815
816 //***************************************************************************************************************
817
818 int ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
819         try {
820                 ifstream inputGroups;
821                 m->openInputFile(groupfile, inputGroups);
822                 string seqName, group;
823                 set<string>::iterator it;
824                 
825                 string goodGroupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + "good" + m->getExtension(groupfile);
826                 outputNames.push_back(goodGroupFile);  outputTypes["group"].push_back(goodGroupFile);
827                 ofstream goodGroupOut;  m->openOutputFile(goodGroupFile, goodGroupOut);
828                 
829                 while(!inputGroups.eof()){
830                         if (m->control_pressed) { goodGroupOut.close(); inputGroups.close(); remove(goodGroupFile.c_str()); return 0; }
831
832                         inputGroups >> seqName >> group;
833                         it = badSeqNames.find(seqName);
834                         
835                         if(it != badSeqNames.end()){
836                                 badSeqNames.erase(it);
837                         }
838                         else{
839                                 goodGroupOut << seqName << '\t' << group << endl;
840                         }
841                         m->gobble(inputGroups);
842                 }
843                 
844                 if (m->control_pressed) { goodGroupOut.close();  inputGroups.close(); remove(goodGroupFile.c_str());  return 0; }
845
846                 //we were unable to remove some of the bad sequences
847                 if (badSeqNames.size() != 0) {
848                         for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
849                                 m->mothurOut("Your groupfile does not include the sequence " + *it + " please correct."); 
850                                 m->mothurOutEndLine();
851                         }
852                 }
853                 
854                 inputGroups.close();
855                 goodGroupOut.close();
856                 
857                 if (m->control_pressed) { remove(goodGroupFile.c_str());   }
858                 
859                 return 0;
860         
861         }
862         catch(exception& e) {
863                 m->errorOut(e, "ScreenSeqsCommand", "screenGroupFile");
864                 exit(1);
865         }
866 }
867
868 //***************************************************************************************************************
869
870 int ScreenSeqsCommand::screenAlignReport(set<string> badSeqNames){
871         try {
872                 ifstream inputAlignReport;
873                 m->openInputFile(alignreport, inputAlignReport);
874                 string seqName, group;
875                 set<string>::iterator it;
876                 
877                 string goodAlignReportFile = outputDir + m->getRootName(m->getSimpleName(alignreport)) + "good" + m->getExtension(alignreport);
878                 outputNames.push_back(goodAlignReportFile);  outputTypes["alignreport"].push_back(goodAlignReportFile);
879                 ofstream goodAlignReportOut;    m->openOutputFile(goodAlignReportFile, goodAlignReportOut);
880
881                 while (!inputAlignReport.eof()) {               //      need to copy header
882                         char c = inputAlignReport.get();
883                         goodAlignReportOut << c;
884                         if (c == 10 || c == 13){        break;  }       
885                 }
886
887                 while(!inputAlignReport.eof()){
888                         if (m->control_pressed) { goodAlignReportOut.close(); inputAlignReport.close(); remove(goodAlignReportFile.c_str()); return 0; }
889
890                         inputAlignReport >> seqName;
891                         it = badSeqNames.find(seqName);
892                         string line;            
893                         while (!inputAlignReport.eof()) {               //      need to copy header
894                                 char c = inputAlignReport.get();
895                                 line += c;
896                                 if (c == 10 || c == 13){        break;  }       
897                         }
898                         
899                         if(it != badSeqNames.end()){
900                                 badSeqNames.erase(it);
901                         }
902                         else{
903                                 goodAlignReportOut << seqName << '\t' << line;
904                         }
905                         m->gobble(inputAlignReport);
906                 }
907                 
908                 if (m->control_pressed) { goodAlignReportOut.close();  inputAlignReport.close(); remove(goodAlignReportFile.c_str());  return 0; }
909
910                 //we were unable to remove some of the bad sequences
911                 if (badSeqNames.size() != 0) {
912                         for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
913                                 m->mothurOut("Your alignreport file does not include the sequence " + *it + " please correct."); 
914                                 m->mothurOutEndLine();
915                         }
916                 }
917
918                 inputAlignReport.close();
919                 goodAlignReportOut.close();
920                                 
921                 if (m->control_pressed) {  remove(goodAlignReportFile.c_str());  return 0; }
922                 
923                 return 0;
924         
925         }
926         catch(exception& e) {
927                 m->errorOut(e, "ScreenSeqsCommand", "screenAlignReport");
928                 exit(1);
929         }
930         
931 }
932 //***************************************************************************************************************
933
934 int ScreenSeqsCommand::screenQual(set<string> badSeqNames){
935         try {
936                 ifstream in;
937                 m->openInputFile(qualfile, in);
938                 set<string>::iterator it;
939                 
940                 string goodQualFile = outputDir + m->getRootName(m->getSimpleName(qualfile)) + "good" + m->getExtension(qualfile);
941                 outputNames.push_back(goodQualFile);  outputTypes["qfile"].push_back(goodQualFile);
942                 ofstream goodQual;      m->openOutputFile(goodQualFile, goodQual);
943                 
944                 while(!in.eof()){       
945                         
946                         if (m->control_pressed) { goodQual.close(); in.close(); remove(goodQualFile.c_str()); return 0; }
947
948                         string saveName = "";
949                         string name = "";
950                         string scores = "";
951                         
952                         in >> name; 
953                         
954                         if (name.length() != 0) { 
955                                 saveName = name.substr(1);
956                                 while (!in.eof())       {       
957                                         char c = in.get(); 
958                                         if (c == 10 || c == 13){        break;  }
959                                         else { name += c; }     
960                                 } 
961                                 m->gobble(in);
962                         }
963                         
964                         while(in){
965                                 char letter= in.get();
966                                 if(letter == '>'){      in.putback(letter);     break;  }
967                                 else{ scores += letter; }
968                         }
969                         
970                         m->gobble(in);
971                         
972                         it = badSeqNames.find(saveName);
973                         
974                         if(it != badSeqNames.end()){
975                                 badSeqNames.erase(it);
976                         }else{                          
977                                 goodQual << name << endl << scores;
978                         }
979                         
980                         m->gobble(in);
981                 }
982                 
983                 in.close();
984                 goodQual.close();
985                 
986                 //we were unable to remove some of the bad sequences
987                 if (badSeqNames.size() != 0) {
988                         for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
989                                 m->mothurOut("Your qual file does not include the sequence " + *it + " please correct."); 
990                                 m->mothurOutEndLine();
991                         }
992                 }
993                 
994                 if (m->control_pressed) {  remove(goodQualFile.c_str());  return 0; }
995                 
996                 return 0;
997                 
998         }
999         catch(exception& e) {
1000                 m->errorOut(e, "ScreenSeqsCommand", "screenQual");
1001                 exit(1);
1002         }
1003         
1004 }
1005 //**********************************************************************************************************************
1006
1007 int ScreenSeqsCommand::driver(linePair* filePos, string goodFName, string badAccnosFName, string filename, set<string>& badSeqNames){
1008         try {
1009                 ofstream goodFile;
1010                 m->openOutputFile(goodFName, goodFile);
1011                 
1012                 ofstream badAccnosFile;
1013                 m->openOutputFile(badAccnosFName, badAccnosFile);
1014                 
1015                 ifstream inFASTA;
1016                 m->openInputFile(filename, inFASTA);
1017
1018                 inFASTA.seekg(filePos->start);
1019
1020                 bool done = false;
1021                 int count = 0;
1022         
1023                 while (!done) {
1024                 
1025                         if (m->control_pressed) {  return 0; }
1026                         
1027                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
1028                         if (currSeq.getName() != "") {
1029                                 bool goodSeq = 1;               //      innocent until proven guilty
1030                                 if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
1031                                 if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
1032                                 if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
1033                                 if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
1034                                 if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
1035                                 if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
1036                                 
1037                                 if(goodSeq == 1){
1038                                         currSeq.printSequence(goodFile);        
1039                                 }
1040                                 else{
1041                                         badAccnosFile << currSeq.getName() << endl;
1042                                         badSeqNames.insert(currSeq.getName());
1043                                 }
1044                         count++;
1045                         }
1046                         
1047                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1048                                 unsigned long int pos = inFASTA.tellg();
1049                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
1050                         #else
1051                                 if (inFASTA.eof()) { break; }
1052                         #endif
1053                         
1054                         //report progress
1055                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
1056                 }
1057                 //report progress
1058                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
1059                 
1060                         
1061                 goodFile.close();
1062                 inFASTA.close();
1063                 badAccnosFile.close();
1064                 
1065                 return count;
1066         }
1067         catch(exception& e) {
1068                 m->errorOut(e, "ScreenSeqsCommand", "driver");
1069                 exit(1);
1070         }
1071 }
1072 //**********************************************************************************************************************
1073 #ifdef USE_MPI
1074 int ScreenSeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& goodFile, MPI_File& badAccnosFile, vector<unsigned long int>& MPIPos, set<string>& badSeqNames){
1075         try {
1076                 string outputString = "";
1077                 MPI_Status statusGood; 
1078                 MPI_Status statusBadAccnos; 
1079                 MPI_Status status; 
1080                 int pid;
1081                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1082
1083                 for(int i=0;i<num;i++){
1084                 
1085                         if (m->control_pressed) {  return 0; }
1086                         
1087                         //read next sequence
1088                         int length = MPIPos[start+i+1] - MPIPos[start+i];
1089
1090                         char* buf4 = new char[length];
1091                         memcpy(buf4, outputString.c_str(), length);
1092
1093                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
1094                         
1095                         string tempBuf = buf4;  delete buf4;
1096                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
1097                         istringstream iss (tempBuf,istringstream::in);
1098                         
1099                         Sequence currSeq(iss);                  
1100                         
1101                         //process seq
1102                         if (currSeq.getName() != "") {
1103                                 bool goodSeq = 1;               //      innocent until proven guilty
1104                                 if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
1105                                 if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
1106                                 if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
1107                                 if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
1108                                 if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
1109                                 if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
1110                                 
1111                                 if(goodSeq == 1){
1112                                         outputString =  ">" + currSeq.getName() + "\n" + currSeq.getAligned() + "\n";
1113                                 
1114                                         //print good seq
1115                                         length = outputString.length();
1116                                         char* buf2 = new char[length];
1117                                         memcpy(buf2, outputString.c_str(), length);
1118                                         
1119                                         MPI_File_write_shared(goodFile, buf2, length, MPI_CHAR, &statusGood);
1120                                         delete buf2;
1121                                 }
1122                                 else{
1123
1124                                         badSeqNames.insert(currSeq.getName());
1125                                         
1126                                         //write to bad accnos file
1127                                         outputString = currSeq.getName() + "\n";
1128                                 
1129                                         length = outputString.length();
1130                                         char* buf3 = new char[length];
1131                                         memcpy(buf3, outputString.c_str(), length);
1132                                         
1133                                         MPI_File_write_shared(badAccnosFile, buf3, length, MPI_CHAR, &statusBadAccnos);
1134                                         delete buf3;
1135                                 }
1136                         }
1137                         
1138                         //report progress
1139                         if((i) % 100 == 0){     m->mothurOut("Processing sequence: " + toString(i)); m->mothurOutEndLine();             }
1140                 }
1141                                 
1142                 return 1;
1143         }
1144         catch(exception& e) {
1145                 m->errorOut(e, "ScreenSeqsCommand", "driverMPI");
1146                 exit(1);
1147         }
1148 }
1149 #endif
1150 /**************************************************************************************************/
1151
1152 int ScreenSeqsCommand::createProcesses(string goodFileName, string badAccnos, string filename, set<string>& badSeqNames) {
1153         try {
1154 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
1155                 int process = 0;
1156                 int num = 0;
1157                 
1158                 //loop through and create all the processes you want
1159                 while (process != processors) {
1160                         int pid = fork();
1161                         
1162                         if (pid > 0) {
1163                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1164                                 process++;
1165                         }else if (pid == 0){
1166                                 num = driver(lines[process], goodFileName + toString(getpid()) + ".temp", badAccnos + toString(getpid()) + ".temp", filename, badSeqNames);
1167                                 
1168                                 //pass numSeqs to parent
1169                                 ofstream out;
1170                                 string tempFile = filename + toString(getpid()) + ".num.temp";
1171                                 m->openOutputFile(tempFile, out);
1172                                 out << num << endl;
1173                                 out.close();
1174                                 
1175                                 exit(0);
1176                         }else { 
1177                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1178                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1179                                 exit(0);
1180                         }
1181                 }
1182                 
1183                 //force parent to wait until all the processes are done
1184                 for (int i=0;i<processors;i++) { 
1185                         int temp = processIDS[i];
1186                         wait(&temp);
1187                 }
1188                 
1189                 for (int i = 0; i < processIDS.size(); i++) {
1190                         ifstream in;
1191                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1192                         m->openInputFile(tempFile, in);
1193                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1194                         in.close(); remove(tempFile.c_str());
1195                 }
1196                 
1197                 return num;
1198 #endif          
1199         }
1200         catch(exception& e) {
1201                 m->errorOut(e, "ScreenSeqsCommand", "createProcesses");
1202                 exit(1);
1203         }
1204 }
1205
1206 //***************************************************************************************************************
1207
1208