]> git.donarmstrong.com Git - mothur.git/blob - screenseqscommand.cpp
added count file to get.oturep, pre.cluster, screen.seqs, tree.shared. made remove...
[mothur.git] / screenseqscommand.cpp
1 /*
2  *  screenseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/3/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "screenseqscommand.h"
11 #include "counttable.h"
12
13 //**********************************************************************************************************************
14 vector<string> ScreenSeqsCommand::setParameters(){      
15         try {
16                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
17         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
18         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
20                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pqfile);
21                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(palignreport);
22                 CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(ptax);
23                 CommandParameter pstart("start", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pstart);
24                 CommandParameter pend("end", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pend);
25                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxambig);
26                 CommandParameter pmaxhomop("maxhomop", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxhomop);
27                 CommandParameter pminlength("minlength", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pminlength);
28                 CommandParameter pmaxlength("maxlength", "Number", "", "-1", "", "", "",false,false); parameters.push_back(pmaxlength);
29                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
30                 CommandParameter pcriteria("criteria", "Number", "", "90", "", "", "",false,false); parameters.push_back(pcriteria);
31                 CommandParameter poptimize("optimize", "Multiple", "none-start-end-maxambig-maxhomop-minlength-maxlength", "none", "", "", "",true,false); parameters.push_back(poptimize);
32                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
33                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
34                 
35                 vector<string> myArray;
36                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
37                 return myArray;
38         }
39         catch(exception& e) {
40                 m->errorOut(e, "ScreenSeqsCommand", "setParameters");
41                 exit(1);
42         }
43 }
44 //**********************************************************************************************************************
45 string ScreenSeqsCommand::getHelpString(){      
46         try {
47                 string helpString = "";
48                 helpString += "The screen.seqs command reads a fastafile and screens sequences.\n";
49                 helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, count, qfile, alignreport, taxonomy, optimize, criteria and processors.\n";
50                 helpString += "The fasta parameter is required.\n";
51                 helpString += "The alignreport and taxonomy parameters allow you to remove bad seqs from taxonomy and alignreport files.\n";
52                 helpString += "The start parameter is used to set a position the \"good\" sequences must start by. The default is -1.\n";
53                 helpString += "The end parameter is used to set a position the \"good\" sequences must end after. The default is -1.\n";
54                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
55                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
56                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
57                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
58                 helpString += "The processors parameter allows you to specify the number of processors to use while running the command. The default is 1.\n";
59                 helpString += "The optimize and criteria parameters allow you set the start, end, maxabig, maxhomop, minlength and maxlength parameters relative to your set of sequences .\n";
60                 helpString += "For example optimize=start-end, criteria=90, would set the start and end values to the position 90% of your sequences started and ended.\n";
61                 helpString += "The name parameter allows you to provide a namesfile, and the group parameter allows you to provide a groupfile.\n";
62                 helpString += "The screen.seqs command should be in the following format: \n";
63                 helpString += "screen.seqs(fasta=yourFastaFile, name=youNameFile, group=yourGroupFIle, start=yourStart, end=yourEnd, maxambig=yourMaxambig,  \n";
64                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
65                 helpString += "Example screen.seqs(fasta=abrecovery.fasta, name=abrecovery.names, group=abrecovery.groups, start=..., end=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
66                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
67                 return helpString;
68         }
69         catch(exception& e) {
70                 m->errorOut(e, "ScreenSeqsCommand", "getHelpString");
71                 exit(1);
72         }
73 }
74 //**********************************************************************************************************************
75 string ScreenSeqsCommand::getOutputFileNameTag(string type, string inputName=""){       
76         try {
77         string outputFileName = "";
78                 map<string, vector<string> >::iterator it;
79         
80         //is this a type this command creates
81         it = outputTypes.find(type);
82         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
83         else {
84             if (type == "fasta")            {   outputFileName =  "good" + m->getExtension(inputName);   }
85             else if (type == "taxonomy")    {   outputFileName =  "good" + m->getExtension(inputName);   }
86             else if (type == "name")        {   outputFileName =  "good" + m->getExtension(inputName);   }
87             else if (type == "count")        {   outputFileName =  "good" + m->getExtension(inputName);   }
88             else if (type == "group")       {   outputFileName =  "good" + m->getExtension(inputName);   }
89             else if (type == "accnos")      {   outputFileName =  "bad.accnos";   }
90             else if (type == "qfile")       {   outputFileName =  "good" + m->getExtension(inputName);   }
91             else if (type == "alignreport") {   outputFileName =  "good.align.report";                   }
92             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
93         }
94         return outputFileName;
95         }
96         catch(exception& e) {
97                 m->errorOut(e, "ScreenSeqsCommand", "getOutputFileNameTag");
98                 exit(1);
99         }
100 }
101
102 //**********************************************************************************************************************
103 ScreenSeqsCommand::ScreenSeqsCommand(){ 
104         try {
105                 abort = true; calledHelp = true; 
106                 setParameters();
107                 vector<string> tempOutNames;
108                 outputTypes["fasta"] = tempOutNames;
109                 outputTypes["name"] = tempOutNames;
110                 outputTypes["group"] = tempOutNames;
111                 outputTypes["alignreport"] = tempOutNames;
112                 outputTypes["accnos"] = tempOutNames;
113                 outputTypes["qfile"] = tempOutNames;
114                 outputTypes["taxonomy"] = tempOutNames;
115         outputTypes["count"] = tempOutNames;
116         }
117         catch(exception& e) {
118                 m->errorOut(e, "ScreenSeqsCommand", "ScreenSeqsCommand");
119                 exit(1);
120         }
121 }
122 //***************************************************************************************************************
123
124 ScreenSeqsCommand::ScreenSeqsCommand(string option)  {
125         try {
126                 abort = false; calledHelp = false;   
127                 
128                 //allow user to run help
129                 if(option == "help") { help(); abort = true; calledHelp = true; }
130                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
131                 
132                 else {
133                         vector<string> myArray = setParameters();
134                         
135                         OptionParser parser(option);
136                         map<string,string> parameters = parser.getParameters();
137                         
138                         ValidParameters validParameter("screen.seqs");
139                         map<string,string>::iterator it;
140                         
141                         //check to make sure all parameters are valid for command
142                         for (it = parameters.begin(); it != parameters.end(); it++) { 
143                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
144                         }
145                         
146                         //initialize outputTypes
147                         vector<string> tempOutNames;
148                         outputTypes["fasta"] = tempOutNames;
149                         outputTypes["name"] = tempOutNames;
150                         outputTypes["group"] = tempOutNames;
151                         outputTypes["alignreport"] = tempOutNames;
152                         outputTypes["accnos"] = tempOutNames;
153                         outputTypes["qfile"] = tempOutNames;
154                         outputTypes["taxonomy"] = tempOutNames;
155             outputTypes["count"] = tempOutNames;
156                         
157                         //if the user changes the input directory command factory will send this info to us in the output parameter 
158                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
159                         if (inputDir == "not found"){   inputDir = "";          }
160                         else {
161                                 string path;
162                                 it = parameters.find("fasta");
163                                 //user has given a template file
164                                 if(it != parameters.end()){ 
165                                         path = m->hasPath(it->second);
166                                         //if the user has not given a path then, add inputdir. else leave path alone.
167                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
168                                 }
169                                 
170                                 it = parameters.find("group");
171                                 //user has given a template file
172                                 if(it != parameters.end()){ 
173                                         path = m->hasPath(it->second);
174                                         //if the user has not given a path then, add inputdir. else leave path alone.
175                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
176                                 }
177                                 
178                                 it = parameters.find("name");
179                                 //user has given a template file
180                                 if(it != parameters.end()){ 
181                                         path = m->hasPath(it->second);
182                                         //if the user has not given a path then, add inputdir. else leave path alone.
183                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
184                                 }
185                                 
186                                 it = parameters.find("alignreport");
187                                 //user has given a template file
188                                 if(it != parameters.end()){ 
189                                         path = m->hasPath(it->second);
190                                         //if the user has not given a path then, add inputdir. else leave path alone.
191                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
192                                 }
193                                 
194                                 it = parameters.find("qfile");
195                                 //user has given a template file
196                                 if(it != parameters.end()){ 
197                                         path = m->hasPath(it->second);
198                                         //if the user has not given a path then, add inputdir. else leave path alone.
199                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
200                                 }
201                                 
202                                 it = parameters.find("taxonomy");
203                                 //user has given a template file
204                                 if(it != parameters.end()){ 
205                                         path = m->hasPath(it->second);
206                                         //if the user has not given a path then, add inputdir. else leave path alone.
207                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
208                                 }
209                 
210                 it = parameters.find("count");
211                                 //user has given a template file
212                                 if(it != parameters.end()){ 
213                                         path = m->hasPath(it->second);
214                                         //if the user has not given a path then, add inputdir. else leave path alone.
215                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
216                                 }
217                         }
218
219                         //check for required parameters
220                         fastafile = validParameter.validFile(parameters, "fasta", true);
221                         if (fastafile == "not found") {                         
222                                 fastafile = m->getFastaFile(); 
223                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
224                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
225                         }
226                         else if (fastafile == "not open") { abort = true; }
227                         else { m->setFastaFile(fastafile); }
228         
229                         groupfile = validParameter.validFile(parameters, "group", true);
230                         if (groupfile == "not open") { abort = true; }  
231                         else if (groupfile == "not found") { groupfile = ""; }
232                         else { m->setGroupFile(groupfile); }
233                         
234                         qualfile = validParameter.validFile(parameters, "qfile", true);
235                         if (qualfile == "not open") { abort = true; }   
236                         else if (qualfile == "not found") { qualfile = ""; }
237                         else { m->setQualFile(qualfile); }
238                         
239                         namefile = validParameter.validFile(parameters, "name", true);
240                         if (namefile == "not open") { namefile = ""; abort = true; }
241                         else if (namefile == "not found") { namefile = ""; }    
242                         else { m->setNameFile(namefile); }
243                         
244             countfile = validParameter.validFile(parameters, "count", true);
245                         if (countfile == "not open") { countfile = ""; abort = true; }
246                         else if (countfile == "not found") { countfile = "";  } 
247                         else { m->setCountTableFile(countfile); }
248             
249             if ((namefile != "") && (countfile != "")) {
250                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
251             }
252                         
253             if ((groupfile != "") && (countfile != "")) {
254                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
255             }
256             
257                         alignreport = validParameter.validFile(parameters, "alignreport", true);
258                         if (alignreport == "not open") { abort = true; }
259                         else if (alignreport == "not found") { alignreport = ""; }
260                         
261                         taxonomy = validParameter.validFile(parameters, "taxonomy", true);
262                         if (taxonomy == "not open") { abort = true; }
263                         else if (taxonomy == "not found") { taxonomy = ""; }    
264                         
265                         //if the user changes the output directory command factory will send this info to us in the output parameter 
266                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
267                                 outputDir = ""; 
268                                 outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it       
269                         }
270
271                         //check for optional parameter and set defaults
272                         // ...at some point should added some additional type checking...
273                         string temp;
274                         temp = validParameter.validFile(parameters, "start", false);            if (temp == "not found") { temp = "-1"; }
275                         m->mothurConvert(temp, startPos); 
276                 
277                         temp = validParameter.validFile(parameters, "end", false);                      if (temp == "not found") { temp = "-1"; }
278                         m->mothurConvert(temp, endPos);  
279
280                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
281                         m->mothurConvert(temp, maxAmbig);  
282
283                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "-1"; }
284                         m->mothurConvert(temp, maxHomoP);  
285
286                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "-1"; }
287                         m->mothurConvert(temp, minLength); 
288                         
289                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "-1"; }
290                         m->mothurConvert(temp, maxLength); 
291                         
292                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
293                         m->setProcessors(temp);
294                         m->mothurConvert(temp, processors);
295                         
296                         temp = validParameter.validFile(parameters, "optimize", false); //optimizing trumps the optimized values original value
297                         if (temp == "not found"){       temp = "none";          }
298                         m->splitAtDash(temp, optimize);         
299                         
300                         //check for invalid optimize options
301                         set<string> validOptimizers;
302                         validOptimizers.insert("none"); validOptimizers.insert("start"); validOptimizers.insert("end"); validOptimizers.insert("maxambig"); validOptimizers.insert("maxhomop"); validOptimizers.insert("minlength"); validOptimizers.insert("maxlength");
303                         for (int i = 0; i < optimize.size(); i++) { 
304                                 if (validOptimizers.count(optimize[i]) == 0) { 
305                                         m->mothurOut(optimize[i] + " is not a valid optimizer. Valid options are start, end, maxambig, maxhomop, minlength and maxlength."); m->mothurOutEndLine();
306                                         optimize.erase(optimize.begin()+i);
307                                         i--;
308                                 }
309                         }
310                         
311                         if (optimize.size() == 1) { if (optimize[0] == "none") { optimize.clear(); } }
312                         
313                         temp = validParameter.validFile(parameters, "criteria", false); if (temp == "not found"){       temp = "90";                            }
314                         m->mothurConvert(temp, criteria); 
315                         
316                         if (countfile == "") { 
317                 if (namefile == "") {
318                     vector<string> files; files.push_back(fastafile);
319                     parser.getNameFile(files);
320                 }
321             }
322                 }
323
324         }
325         catch(exception& e) {
326                 m->errorOut(e, "ScreenSeqsCommand", "ScreenSeqsCommand");
327                 exit(1);
328         }
329 }
330 //***************************************************************************************************************
331
332 int ScreenSeqsCommand::execute(){
333         try{
334                 
335                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
336                 
337                 //if the user want to optimize we need to know the 90% mark
338                 vector<unsigned long long> positions;
339                 if (optimize.size() != 0) {  //get summary is paralellized so we need to divideFile, no need to do this step twice so I moved it here
340                         //use the namefile to optimize correctly
341                         if (namefile != "") { nameMap = m->readNames(namefile); }
342             else if (countfile != "") {
343                 CountTable ct;
344                 ct.readTable(countfile);
345                 nameMap = ct.getNameMap();
346             }
347                         getSummary(positions); 
348                 } 
349                 else { 
350                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
351                 positions = m->divideFile(fastafile, processors);
352                 for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); }
353                         #else 
354                 if(processors == 1){ lines.push_back(linePair(0, 1000));  }
355                 else {
356                     int numFastaSeqs = 0;
357                     positions = m->setFilePosFasta(fastafile, numFastaSeqs); 
358                     if (positions.size() < processors) { processors = positions.size(); }
359                 
360                     //figure out how many sequences you have to process
361                     int numSeqsPerProcessor = numFastaSeqs / processors;
362                     for (int i = 0; i < processors; i++) {
363                         int startIndex =  i * numSeqsPerProcessor;
364                         if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
365                         lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
366                     }
367                 }
368                         #endif
369                 }
370                                         
371                 string goodSeqFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
372                 string badAccnosFile =  outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("accnos");
373                 
374                 int numFastaSeqs = 0;
375                 set<string> badSeqNames;
376                 int start = time(NULL);
377         
378 #ifdef USE_MPI  
379                         int pid, numSeqsPerProcessor; 
380                         int tag = 2001;
381                         vector<unsigned long long> MPIPos;
382                         
383                         MPI_Status status; 
384                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
385                         MPI_Comm_size(MPI_COMM_WORLD, &processors); 
386         
387                         MPI_File inMPI;
388                         MPI_File outMPIGood;
389                         MPI_File outMPIBadAccnos;
390                         
391                         int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
392                         int inMode=MPI_MODE_RDONLY; 
393                         
394                         char outGoodFilename[1024];
395                         strcpy(outGoodFilename, goodSeqFile.c_str());
396
397                         char outBadAccnosFilename[1024];
398                         strcpy(outBadAccnosFilename, badAccnosFile.c_str());
399
400                         char inFileName[1024];
401                         strcpy(inFileName, fastafile.c_str());
402                         
403                         MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
404                         MPI_File_open(MPI_COMM_WORLD, outGoodFilename, outMode, MPI_INFO_NULL, &outMPIGood);
405                         MPI_File_open(MPI_COMM_WORLD, outBadAccnosFilename, outMode, MPI_INFO_NULL, &outMPIBadAccnos);
406                         
407                         if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIGood); MPI_File_close(&outMPIBadAccnos); return 0; }
408                         
409                         if (pid == 0) { //you are the root process 
410                                 
411                                 MPIPos = m->setFilePosFasta(fastafile, numFastaSeqs); //fills MPIPos, returns numSeqs
412                                 
413                                 //send file positions to all processes
414                                 for(int i = 1; i < processors; i++) { 
415                                         MPI_Send(&numFastaSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
416                                         MPI_Send(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
417                                 }
418                                 
419                                 //figure out how many sequences you have to align
420                                 numSeqsPerProcessor = numFastaSeqs / processors;
421                                 int startIndex =  pid * numSeqsPerProcessor;
422                                 if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
423
424                                 //align your part
425                                 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIGood, outMPIBadAccnos, MPIPos, badSeqNames);
426
427                                 if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIGood);  MPI_File_close(&outMPIBadAccnos);  return 0; }
428
429                                 for (int i = 1; i < processors; i++) {
430                                         //get bad lists
431                                         int badSize;
432                                         MPI_Recv(&badSize, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
433                                 }
434                         }else{ //you are a child process
435                                 MPI_Recv(&numFastaSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
436                                 MPIPos.resize(numFastaSeqs+1);
437                                 MPI_Recv(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
438
439                                 //figure out how many sequences you have to align
440                                 numSeqsPerProcessor = numFastaSeqs / processors;
441                                 int startIndex =  pid * numSeqsPerProcessor;
442                                 if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
443
444                                 //align your part
445                                 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIGood, outMPIBadAccnos, MPIPos, badSeqNames);
446
447                                 if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIGood);  MPI_File_close(&outMPIBadAccnos); return 0; }
448                                 
449                                 //send bad list 
450                                 int badSize = badSeqNames.size();
451                                 MPI_Send(&badSize, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
452                         }
453                         
454                         //close files 
455                         MPI_File_close(&inMPI);
456                         MPI_File_close(&outMPIGood);
457                         MPI_File_close(&outMPIBadAccnos);
458                         MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
459                                         
460 #else
461         if(processors == 1){ numFastaSeqs = driver(lines[0], goodSeqFile, badAccnosFile, fastafile, badSeqNames);       }       
462         else{ numFastaSeqs = createProcesses(goodSeqFile, badAccnosFile, fastafile, badSeqNames); }
463         
464         if (m->control_pressed) { m->mothurRemove(goodSeqFile); return 0; }
465 #endif          
466
467                 #ifdef USE_MPI
468                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
469                                         
470                         if (pid == 0) { //only one process should fix files
471                         
472                                 //read accnos file with all names in it, process 0 just has its names
473                                 MPI_File inMPIAccnos;
474                                 MPI_Offset size;
475                         
476                                 char inFileName[1024];
477                                 strcpy(inFileName, badAccnosFile.c_str());
478                         
479                                 MPI_File_open(MPI_COMM_SELF, inFileName, inMode, MPI_INFO_NULL, &inMPIAccnos);  //comm, filename, mode, info, filepointer
480                                 MPI_File_get_size(inMPIAccnos, &size);
481                         
482                                 char* buffer = new char[size];
483                                 MPI_File_read(inMPIAccnos, buffer, size, MPI_CHAR, &status);
484                         
485                                 string tempBuf = buffer;
486                                 if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size);  }
487                                 istringstream iss (tempBuf,istringstream::in);
488
489                                 delete buffer;
490                                 MPI_File_close(&inMPIAccnos);
491                                 
492                                 badSeqNames.clear();
493                                 string tempName;
494                                 while (!iss.eof()) {
495                                         iss >> tempName; m->gobble(iss);
496                                         badSeqNames.insert(tempName);
497                                 }
498                 #endif
499                                                                                                                                                                         
500                 if(namefile != "" && groupfile != "")   {       
501                         screenNameGroupFile(badSeqNames);       
502                         if (m->control_pressed) {  m->mothurRemove(goodSeqFile); return 0; }
503                 }else if(namefile != "")        {       
504                         screenNameGroupFile(badSeqNames);
505                         if (m->control_pressed) {  m->mothurRemove(goodSeqFile);  return 0; }   
506                 }else if(groupfile != "")                               {       screenGroupFile(badSeqNames);           }       // this screens just the group
507                 else if (countfile != "") {     screenCountFile(badSeqNames);           }
508             
509                 
510                 if (m->control_pressed) { m->mothurRemove(goodSeqFile);  return 0; }
511
512                 if(alignreport != "")                                   {       screenAlignReport(badSeqNames);         }
513                 if(qualfile != "")                                              {       screenQual(badSeqNames);                        }
514                 if(taxonomy != "")                                              {       screenTaxonomy(badSeqNames);            }
515                 
516                 if (m->control_pressed) { m->mothurRemove(goodSeqFile);  return 0; }
517                 
518                 #ifdef USE_MPI
519                         }
520                 #endif
521
522                 m->mothurOutEndLine();
523                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
524                 m->mothurOut(goodSeqFile); m->mothurOutEndLine();       outputTypes["fasta"].push_back(goodSeqFile);
525                 m->mothurOut(badAccnosFile); m->mothurOutEndLine();      outputTypes["accnos"].push_back(badAccnosFile);
526                 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
527                 m->mothurOutEndLine();
528                 m->mothurOutEndLine();
529                 
530                 //set fasta file as new current fastafile
531                 string current = "";
532                 itTypes = outputTypes.find("fasta");
533                 if (itTypes != outputTypes.end()) {
534                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
535                 }
536                 
537                 itTypes = outputTypes.find("name");
538                 if (itTypes != outputTypes.end()) {
539                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
540                 }
541                 
542                 itTypes = outputTypes.find("group");
543                 if (itTypes != outputTypes.end()) {
544                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
545                 }
546                 
547                 itTypes = outputTypes.find("qfile");
548                 if (itTypes != outputTypes.end()) {
549                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
550                 }
551                 
552                 itTypes = outputTypes.find("taxonomy");
553                 if (itTypes != outputTypes.end()) {
554                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
555                 }
556         
557         itTypes = outputTypes.find("count");
558                 if (itTypes != outputTypes.end()) {
559                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
560                 }
561
562                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
563                 m->mothurOutEndLine();
564
565                 return 0;
566         }
567         catch(exception& e) {
568                 m->errorOut(e, "ScreenSeqsCommand", "execute");
569                 exit(1);
570         }
571 }
572
573 //***************************************************************************************************************
574
575 int ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
576         try {
577                 ifstream inputNames;
578                 m->openInputFile(namefile, inputNames);
579                 set<string> badSeqGroups;
580                 string seqName, seqList, group;
581                 set<string>::iterator it;
582
583                 string goodNameFile = outputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
584                 outputNames.push_back(goodNameFile);  outputTypes["name"].push_back(goodNameFile);
585                 
586                 ofstream goodNameOut;   m->openOutputFile(goodNameFile, goodNameOut);
587                 
588                 while(!inputNames.eof()){
589                         if (m->control_pressed) { goodNameOut.close();  inputNames.close(); m->mothurRemove(goodNameFile);  return 0; }
590
591                         inputNames >> seqName >> seqList;
592                         it = badSeqNames.find(seqName);
593                                 
594                         if(it != badSeqNames.end()){
595                                 badSeqNames.erase(it);
596                                 
597                                 if(namefile != ""){
598                                         int start = 0;
599                                         for(int i=0;i<seqList.length();i++){
600                                                 if(seqList[i] == ','){
601                                                         badSeqGroups.insert(seqList.substr(start,i-start));
602                                                         start = i+1;
603                                                 }                                       
604                                         }
605                                         badSeqGroups.insert(seqList.substr(start,seqList.length()-start));
606                                 }
607                         }
608                         else{
609                                 goodNameOut << seqName << '\t' << seqList << endl;
610                         }
611                         m->gobble(inputNames);
612                 }
613                 inputNames.close();
614                 goodNameOut.close();
615         
616                 //we were unable to remove some of the bad sequences
617                 if (badSeqNames.size() != 0) {
618                         for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
619                                 m->mothurOut("Your namefile does not include the sequence " + *it + " please correct."); 
620                                 m->mothurOutEndLine();
621                         }
622                 }
623
624                 if(groupfile != ""){
625                         
626                         ifstream inputGroups;
627                         m->openInputFile(groupfile, inputGroups);
628
629                         string goodGroupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
630                         outputNames.push_back(goodGroupFile);   outputTypes["group"].push_back(goodGroupFile);
631                         
632                         ofstream goodGroupOut;  m->openOutputFile(goodGroupFile, goodGroupOut);
633                         
634                         while(!inputGroups.eof()){
635                                 if (m->control_pressed) { goodGroupOut.close(); inputGroups.close(); m->mothurRemove(goodNameFile);  m->mothurRemove(goodGroupFile); return 0; }
636
637                                 inputGroups >> seqName >> group;
638                                 
639                                 it = badSeqGroups.find(seqName);
640                                 
641                                 if(it != badSeqGroups.end()){
642                                         badSeqGroups.erase(it);
643                                 }
644                                 else{
645                                         goodGroupOut << seqName << '\t' << group << endl;
646                                 }
647                                 m->gobble(inputGroups);
648                         }
649                         inputGroups.close();
650                         goodGroupOut.close();
651                         
652                         //we were unable to remove some of the bad sequences
653                         if (badSeqGroups.size() != 0) {
654                                 for (it = badSeqGroups.begin(); it != badSeqGroups.end(); it++) {  
655                                         m->mothurOut("Your groupfile does not include the sequence " + *it + " please correct."); 
656                                         m->mothurOutEndLine();
657                                 }
658                         }
659                 }
660                 
661                 
662                 return 0;
663         
664         }
665         catch(exception& e) {
666                 m->errorOut(e, "ScreenSeqsCommand", "screenNameGroupFile");
667                 exit(1);
668         }
669 }
670 //***************************************************************************************************************
671 int ScreenSeqsCommand::getSummary(vector<unsigned long long>& positions){
672         try {
673                 
674                 vector<int> startPosition;
675                 vector<int> endPosition;
676                 vector<int> seqLength;
677                 vector<int> ambigBases;
678                 vector<int> longHomoPolymer;
679                 
680         vector<unsigned long long> positions;
681 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
682                 positions = m->divideFile(fastafile, processors);
683                 for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); }   
684 #else
685                 if(processors == 1){ lines.push_back(linePair(0, 1000));  }
686         else {
687             int numFastaSeqs = 0;
688             positions = m->setFilePosFasta(fastafile, numFastaSeqs); 
689             if (positions.size() < processors) { processors = positions.size(); }
690             
691             //figure out how many sequences you have to process
692             int numSeqsPerProcessor = numFastaSeqs / processors;
693             for (int i = 0; i < processors; i++) {
694                 int startIndex =  i * numSeqsPerProcessor;
695                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
696                 lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
697             }
698         }
699 #endif
700                 
701 #ifdef USE_MPI
702                 int pid;
703                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
704                 
705                 if (pid == 0) { 
706                         driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
707 #else
708                 int numSeqs = 0;
709                 //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
710                         if(processors == 1){
711                                 numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
712                         }else{
713                                 numSeqs = createProcessesCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile); 
714                         }
715                                 
716                         if (m->control_pressed) {  return 0; }
717                 //#else
718                 //      numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
719                 //      if (m->control_pressed) {  return 0; }
720                 //#endif
721 #endif
722                 sort(startPosition.begin(), startPosition.end());
723                 sort(endPosition.begin(), endPosition.end());
724                 sort(seqLength.begin(), seqLength.end());
725                 sort(ambigBases.begin(), ambigBases.end());
726                 sort(longHomoPolymer.begin(), longHomoPolymer.end());
727                 
728                 //numSeqs is the number of unique seqs, startPosition.size() is the total number of seqs, we want to optimize using all seqs
729                 int criteriaPercentile  = int(startPosition.size() * (criteria / (float) 100));
730                 
731                 for (int i = 0; i < optimize.size(); i++) {
732                         if (optimize[i] == "start") { startPos = startPosition[criteriaPercentile]; m->mothurOut("Optimizing start to " + toString(startPos) + "."); m->mothurOutEndLine(); }
733                         else if (optimize[i] == "end") { int endcriteriaPercentile = int(endPosition.size() * ((100 - criteria) / (float) 100));  endPos = endPosition[endcriteriaPercentile]; m->mothurOut("Optimizing end to " + toString(endPos) + "."); m->mothurOutEndLine();}
734                         else if (optimize[i] == "maxambig") { maxAmbig = ambigBases[criteriaPercentile]; m->mothurOut("Optimizing maxambig to " + toString(maxAmbig) + "."); m->mothurOutEndLine(); }
735                         else if (optimize[i] == "maxhomop") { maxHomoP = longHomoPolymer[criteriaPercentile]; m->mothurOut("Optimizing maxhomop to " + toString(maxHomoP) + "."); m->mothurOutEndLine(); }
736                         else if (optimize[i] == "minlength") { int mincriteriaPercentile = int(seqLength.size() * ((100 - criteria) / (float) 100)); minLength = seqLength[mincriteriaPercentile]; m->mothurOut("Optimizing minlength to " + toString(minLength) + "."); m->mothurOutEndLine(); }
737                         else if (optimize[i] == "maxlength") { maxLength = seqLength[criteriaPercentile]; m->mothurOut("Optimizing maxlength to " + toString(maxLength) + "."); m->mothurOutEndLine(); }
738                 }
739
740 #ifdef USE_MPI
741                 }
742                 
743                 MPI_Status status; 
744                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
745                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
746                         
747                 if (pid == 0) { 
748                         //send file positions to all processes
749                         for(int i = 1; i < processors; i++) { 
750                                 MPI_Send(&startPos, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
751                                 MPI_Send(&endPos, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
752                                 MPI_Send(&maxAmbig, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
753                                 MPI_Send(&maxHomoP, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
754                                 MPI_Send(&minLength, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
755                                 MPI_Send(&maxLength, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
756                         }
757                 }else {
758                         MPI_Recv(&startPos, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
759                         MPI_Recv(&endPos, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
760                         MPI_Recv(&maxAmbig, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
761                         MPI_Recv(&maxHomoP, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
762                         MPI_Recv(&minLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
763                         MPI_Recv(&maxLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
764                 }
765                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
766 #endif
767                 return 0;
768         }
769         catch(exception& e) {
770                 m->errorOut(e, "ScreenSeqsCommand", "getSummary");
771                 exit(1);
772         }
773 }
774 /**************************************************************************************/
775 int ScreenSeqsCommand::driverCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, string filename, linePair filePos) {    
776         try {
777                 
778                 ifstream in;
779                 m->openInputFile(filename, in);
780                                 
781                 in.seekg(filePos.start);
782
783                 bool done = false;
784                 int count = 0;
785         
786                 while (!done) {
787                                 
788                         if (m->control_pressed) { in.close(); return 1; }
789                                         
790                         Sequence current(in); m->gobble(in);
791         
792                         if (current.getName() != "") {
793                                 int num = 1;
794                                 if (namefile != "") {
795                                         //make sure this sequence is in the namefile, else error 
796                                         map<string, int>::iterator it = nameMap.find(current.getName());
797                                         
798                                         if (it == nameMap.end()) { m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
799                                         else { num = it->second; }
800                                 }
801                                 
802                                 //for each sequence this sequence represents
803                                 for (int i = 0; i < num; i++) {
804                                         startPosition.push_back(current.getStartPos());
805                                         endPosition.push_back(current.getEndPos());
806                                         seqLength.push_back(current.getNumBases());
807                                         ambigBases.push_back(current.getAmbigBases());
808                                         longHomoPolymer.push_back(current.getLongHomoPolymer());
809                                 }
810                                 
811                                 count++;
812                         }
813                         //if((count) % 100 == 0){       m->mothurOut("Optimizing sequence: " + toString(count)); m->mothurOutEndLine();         }
814                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
815                                 unsigned long long pos = in.tellg();
816                                 if ((pos == -1) || (pos >= filePos.end)) { break; }
817                         #else
818                                 if (in.eof()) { break; }
819                         #endif
820                         
821                 }
822                 
823                 in.close();
824                 
825                 return count;
826         }
827         catch(exception& e) {
828                 m->errorOut(e, "ScreenSeqsCommand", "driverCreateSummary");
829                 exit(1);
830         }
831 }
832 /**************************************************************************************************/
833 int ScreenSeqsCommand::createProcessesCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, string filename) {
834         try {
835         
836         int process = 1;
837                 int num = 0;
838                 vector<int> processIDS;
839
840 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
841                                 
842                 //loop through and create all the processes you want
843                 while (process != processors) {
844                         int pid = fork();
845                         
846                         if (pid > 0) {
847                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
848                                 process++;
849                         }else if (pid == 0){
850                                 num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[process]);
851                                 
852                                 //pass numSeqs to parent
853                                 ofstream out;
854                                 string tempFile = fastafile + toString(getpid()) + ".num.temp";
855                                 m->openOutputFile(tempFile, out);
856                                 
857                                 out << num << endl;
858                                 out << startPosition.size() << endl;
859                                 for (int k = 0; k < startPosition.size(); k++)          {               out << startPosition[k] << '\t'; }  out << endl;
860                                 for (int k = 0; k < endPosition.size(); k++)            {               out << endPosition[k] << '\t'; }  out << endl;
861                                 for (int k = 0; k < seqLength.size(); k++)                      {               out << seqLength[k] << '\t'; }  out << endl;
862                                 for (int k = 0; k < ambigBases.size(); k++)                     {               out << ambigBases[k] << '\t'; }  out << endl;
863                                 for (int k = 0; k < longHomoPolymer.size(); k++)        {               out << longHomoPolymer[k] << '\t'; }  out << endl;
864                                 
865                                 out.close();
866                                 
867                                 exit(0);
868                         }else { 
869                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
870                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
871                                 exit(0);
872                         }
873                 }
874                 
875                 num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
876                 
877                 //force parent to wait until all the processes are done
878                 for (int i=0;i<processIDS.size();i++) { 
879                         int temp = processIDS[i];
880                         wait(&temp);
881                 }
882                 
883                 //parent reads in and combine Filter info
884                 for (int i = 0; i < processIDS.size(); i++) {
885                         string tempFilename = fastafile + toString(processIDS[i]) + ".num.temp";
886                         ifstream in;
887                         m->openInputFile(tempFilename, in);
888                         
889                         int temp, tempNum;
890                         in >> tempNum; m->gobble(in); num += tempNum;
891                         in >> tempNum; m->gobble(in);
892                         for (int k = 0; k < tempNum; k++)                       {               in >> temp; startPosition.push_back(temp);              }               m->gobble(in);
893                         for (int k = 0; k < tempNum; k++)                       {               in >> temp; endPosition.push_back(temp);                }               m->gobble(in);
894                         for (int k = 0; k < tempNum; k++)                       {               in >> temp; seqLength.push_back(temp);                  }               m->gobble(in);
895                         for (int k = 0; k < tempNum; k++)                       {               in >> temp; ambigBases.push_back(temp);                 }               m->gobble(in);
896                         for (int k = 0; k < tempNum; k++)                       {               in >> temp; longHomoPolymer.push_back(temp);    }               m->gobble(in);
897                                 
898                         in.close();
899                         m->mothurRemove(tempFilename);
900                 }
901                 
902                 
903 #else 
904         //////////////////////////////////////////////////////////////////////////////////////////////////////
905                 //Windows version shared memory, so be careful when passing variables through the seqSumData struct. 
906                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
907                 //Taking advantage of shared memory to allow both threads to add info to vectors.
908                 //////////////////////////////////////////////////////////////////////////////////////////////////////
909                 
910                 vector<sumData*> pDataArray; 
911                 DWORD   dwThreadIdArray[processors-1];
912                 HANDLE  hThreadArray[processors-1]; 
913                 
914                 //Create processor worker threads.
915                 for( int i=0; i<processors-1; i++ ){
916             
917                         // Allocate memory for thread data.
918                         sumData* tempSum = new sumData(filename, m, lines[i].start, lines[i].end, namefile, nameMap);
919                         pDataArray.push_back(tempSum);
920                         
921                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
922                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
923                         hThreadArray[i] = CreateThread(NULL, 0, MySumThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
924                 }
925                 
926         //do your part
927                 num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[processors-1]);
928          
929                 //Wait until all threads have terminated.
930                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
931                 
932                 //Close all thread handles and free memory allocations.
933                 for(int i=0; i < pDataArray.size(); i++){
934                         num += pDataArray[i]->count;
935             for (int k = 0; k < pDataArray[i]->startPosition.size(); k++) {     startPosition.push_back(pDataArray[i]->startPosition[k]);       }
936                         for (int k = 0; k < pDataArray[i]->endPosition.size(); k++) {   endPosition.push_back(pDataArray[i]->endPosition[k]);       }
937             for (int k = 0; k < pDataArray[i]->seqLength.size(); k++) { seqLength.push_back(pDataArray[i]->seqLength[k]);       }
938             for (int k = 0; k < pDataArray[i]->ambigBases.size(); k++) {        ambigBases.push_back(pDataArray[i]->ambigBases[k]);       }
939             for (int k = 0; k < pDataArray[i]->longHomoPolymer.size(); k++) {   longHomoPolymer.push_back(pDataArray[i]->longHomoPolymer[k]);       }
940                         CloseHandle(hThreadArray[i]);
941                         delete pDataArray[i];
942                 }
943
944 #endif          
945         return num;
946         }
947         catch(exception& e) {
948                 m->errorOut(e, "ScreenSeqsCommand", "createProcessesCreateSummary");
949                 exit(1);
950         }
951 }
952
953 //***************************************************************************************************************
954
955 int ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
956         try {
957                 ifstream inputGroups;
958                 m->openInputFile(groupfile, inputGroups);
959                 string seqName, group;
960                 set<string>::iterator it;
961                 
962                 string goodGroupFile = outputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
963         outputNames.push_back(goodGroupFile);  outputTypes["group"].push_back(goodGroupFile);
964                 ofstream goodGroupOut;  m->openOutputFile(goodGroupFile, goodGroupOut);
965                 
966                 while(!inputGroups.eof()){
967                         if (m->control_pressed) { goodGroupOut.close(); inputGroups.close(); m->mothurRemove(goodGroupFile); return 0; }
968
969                         inputGroups >> seqName >> group;
970                         it = badSeqNames.find(seqName);
971                         
972                         if(it != badSeqNames.end()){
973                                 badSeqNames.erase(it);
974                         }
975                         else{
976                                 goodGroupOut << seqName << '\t' << group << endl;
977                         }
978                         m->gobble(inputGroups);
979                 }
980                 
981                 if (m->control_pressed) { goodGroupOut.close();  inputGroups.close(); m->mothurRemove(goodGroupFile);  return 0; }
982
983                 //we were unable to remove some of the bad sequences
984                 if (badSeqNames.size() != 0) {
985                         for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
986                                 m->mothurOut("Your groupfile does not include the sequence " + *it + " please correct."); 
987                                 m->mothurOutEndLine();
988                         }
989                 }
990                 
991                 inputGroups.close();
992                 goodGroupOut.close();
993                 
994                 if (m->control_pressed) { m->mothurRemove(goodGroupFile);   }
995                 
996                 return 0;
997         
998         }
999         catch(exception& e) {
1000                 m->errorOut(e, "ScreenSeqsCommand", "screenGroupFile");
1001                 exit(1);
1002         }
1003 }
1004 //***************************************************************************************************************
1005 int ScreenSeqsCommand::screenCountFile(set<string> badSeqNames){
1006         try {
1007                 ifstream in;
1008                 m->openInputFile(countfile, in);
1009                 set<string>::iterator it;
1010                 
1011                 string goodCountFile = outputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
1012         outputNames.push_back(goodCountFile);  outputTypes["count"].push_back(goodCountFile);
1013                 ofstream goodCountOut;  m->openOutputFile(goodCountFile, goodCountOut);
1014                 
1015         string headers = m->getline(in); m->gobble(in);
1016         goodCountOut << headers << endl;
1017         
1018         string name, rest; int thisTotal;
1019         while (!in.eof()) {
1020
1021                         if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; }
1022             
1023                         in >> name; m->gobble(in); 
1024             in >> thisTotal; m->gobble(in);
1025             rest = m->getline(in); m->gobble(in);
1026             
1027                         it = badSeqNames.find(name);
1028                         
1029                         if(it != badSeqNames.end()){
1030                                 badSeqNames.erase(it);
1031                         }
1032                         else{
1033                                 goodCountOut << name << '\t' << thisTotal << '\t' << rest << endl;
1034                         }
1035                 }
1036                 
1037                 if (m->control_pressed) { goodCountOut.close();  in.close(); m->mothurRemove(goodCountFile);  return 0; }
1038         
1039                 //we were unable to remove some of the bad sequences
1040                 if (badSeqNames.size() != 0) {
1041                         for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
1042                                 m->mothurOut("Your groupfile does not include the sequence " + *it + " please correct."); 
1043                                 m->mothurOutEndLine();
1044                         }
1045                 }
1046                 
1047                 in.close();
1048                 goodCountOut.close();
1049         
1050         //check for groups that have been eliminated
1051         CountTable ct;
1052         if (ct.testGroups(goodCountFile)) {
1053             ct.readTable(goodCountFile);
1054             ct.printTable(goodCountFile);
1055         }
1056                 
1057                 if (m->control_pressed) { m->mothurRemove(goodCountFile);   }
1058                 
1059                 return 0;
1060         
1061         }
1062         catch(exception& e) {
1063                 m->errorOut(e, "ScreenSeqsCommand", "screenCountFile");
1064                 exit(1);
1065         }
1066 }
1067 //***************************************************************************************************************
1068
1069 int ScreenSeqsCommand::screenAlignReport(set<string> badSeqNames){
1070         try {
1071                 ifstream inputAlignReport;
1072                 m->openInputFile(alignreport, inputAlignReport);
1073                 string seqName, group;
1074                 set<string>::iterator it;
1075                 
1076                 string goodAlignReportFile = outputDir + m->getRootName(m->getSimpleName(alignreport)) + getOutputFileNameTag("alignreport");
1077                 outputNames.push_back(goodAlignReportFile);  outputTypes["alignreport"].push_back(goodAlignReportFile);
1078                 ofstream goodAlignReportOut;    m->openOutputFile(goodAlignReportFile, goodAlignReportOut);
1079
1080                 while (!inputAlignReport.eof()) {               //      need to copy header
1081                         char c = inputAlignReport.get();
1082                         goodAlignReportOut << c;
1083                         if (c == 10 || c == 13){        break;  }       
1084                 }
1085
1086                 while(!inputAlignReport.eof()){
1087                         if (m->control_pressed) { goodAlignReportOut.close(); inputAlignReport.close(); m->mothurRemove(goodAlignReportFile); return 0; }
1088
1089                         inputAlignReport >> seqName;
1090                         it = badSeqNames.find(seqName);
1091                         string line;            
1092                         while (!inputAlignReport.eof()) {               //      need to copy header
1093                                 char c = inputAlignReport.get();
1094                                 line += c;
1095                                 if (c == 10 || c == 13){        break;  }       
1096                         }
1097                         
1098                         if(it != badSeqNames.end()){
1099                                 badSeqNames.erase(it);
1100                         }
1101                         else{
1102                                 goodAlignReportOut << seqName << '\t' << line;
1103                         }
1104                         m->gobble(inputAlignReport);
1105                 }
1106                 
1107                 if (m->control_pressed) { goodAlignReportOut.close();  inputAlignReport.close(); m->mothurRemove(goodAlignReportFile);  return 0; }
1108
1109                 //we were unable to remove some of the bad sequences
1110                 if (badSeqNames.size() != 0) {
1111                         for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
1112                                 m->mothurOut("Your alignreport file does not include the sequence " + *it + " please correct."); 
1113                                 m->mothurOutEndLine();
1114                         }
1115                 }
1116
1117                 inputAlignReport.close();
1118                 goodAlignReportOut.close();
1119                                 
1120                 if (m->control_pressed) {  m->mothurRemove(goodAlignReportFile);  return 0; }
1121                 
1122                 return 0;
1123         
1124         }
1125         catch(exception& e) {
1126                 m->errorOut(e, "ScreenSeqsCommand", "screenAlignReport");
1127                 exit(1);
1128         }
1129         
1130 }
1131 //***************************************************************************************************************
1132
1133 int ScreenSeqsCommand::screenTaxonomy(set<string> badSeqNames){
1134         try {
1135                 ifstream input;
1136                 m->openInputFile(taxonomy, input);
1137                 string seqName, tax;
1138                 set<string>::iterator it;
1139                 
1140                 string goodTaxFile = outputDir + m->getRootName(m->getSimpleName(taxonomy)) + getOutputFileNameTag("taxonomy", taxonomy);
1141                 outputNames.push_back(goodTaxFile);  outputTypes["taxonomy"].push_back(goodTaxFile);
1142                 ofstream goodTaxOut;    m->openOutputFile(goodTaxFile, goodTaxOut);
1143                                 
1144                 while(!input.eof()){
1145                         if (m->control_pressed) { goodTaxOut.close(); input.close(); m->mothurRemove(goodTaxFile); return 0; }
1146                         
1147                         input >> seqName >> tax;
1148                         it = badSeqNames.find(seqName);
1149                         
1150                         if(it != badSeqNames.end()){ badSeqNames.erase(it); }
1151                         else{
1152                                 goodTaxOut << seqName << '\t' << tax << endl;
1153                         }
1154                         m->gobble(input);
1155                 }
1156                 
1157                 if (m->control_pressed) { goodTaxOut.close(); input.close(); m->mothurRemove(goodTaxFile); return 0; }
1158                 
1159                 //we were unable to remove some of the bad sequences
1160                 if (badSeqNames.size() != 0) {
1161                         for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
1162                                 m->mothurOut("Your taxonomy file does not include the sequence " + *it + " please correct."); 
1163                                 m->mothurOutEndLine();
1164                         }
1165                 }
1166                 
1167                 input.close();
1168                 goodTaxOut.close();
1169                 
1170                 if (m->control_pressed) {  m->mothurRemove(goodTaxFile);  return 0; }
1171                 
1172                 return 0;
1173                 
1174         }
1175         catch(exception& e) {
1176                 m->errorOut(e, "ScreenSeqsCommand", "screenTaxonomy");
1177                 exit(1);
1178         }
1179         
1180 }
1181 //***************************************************************************************************************
1182
1183 int ScreenSeqsCommand::screenQual(set<string> badSeqNames){
1184         try {
1185                 ifstream in;
1186                 m->openInputFile(qualfile, in);
1187                 set<string>::iterator it;
1188                 
1189                 string goodQualFile = outputDir + m->getRootName(m->getSimpleName(qualfile)) + getOutputFileNameTag("qfile", qualfile);
1190                 outputNames.push_back(goodQualFile);  outputTypes["qfile"].push_back(goodQualFile);
1191                 ofstream goodQual;      m->openOutputFile(goodQualFile, goodQual);
1192                 
1193                 while(!in.eof()){       
1194                         
1195                         if (m->control_pressed) { goodQual.close(); in.close(); m->mothurRemove(goodQualFile); return 0; }
1196
1197                         string saveName = "";
1198                         string name = "";
1199                         string scores = "";
1200                         
1201                         in >> name; 
1202                         
1203                         if (name.length() != 0) { 
1204                                 saveName = name.substr(1);
1205                                 while (!in.eof())       {       
1206                                         char c = in.get(); 
1207                                         if (c == 10 || c == 13){        break;  }
1208                                         else { name += c; }     
1209                                 } 
1210                                 m->gobble(in);
1211                         }
1212                         
1213                         while(in){
1214                                 char letter= in.get();
1215                                 if(letter == '>'){      in.putback(letter);     break;  }
1216                                 else{ scores += letter; }
1217                         }
1218                         
1219                         m->gobble(in);
1220                         
1221                         it = badSeqNames.find(saveName);
1222                         
1223                         if(it != badSeqNames.end()){
1224                                 badSeqNames.erase(it);
1225                         }else{                          
1226                                 goodQual << name << endl << scores;
1227                         }
1228                         
1229                         m->gobble(in);
1230                 }
1231                 
1232                 in.close();
1233                 goodQual.close();
1234                 
1235                 //we were unable to remove some of the bad sequences
1236                 if (badSeqNames.size() != 0) {
1237                         for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
1238                                 m->mothurOut("Your qual file does not include the sequence " + *it + " please correct."); 
1239                                 m->mothurOutEndLine();
1240                         }
1241                 }
1242                 
1243                 if (m->control_pressed) {  m->mothurRemove(goodQualFile);  return 0; }
1244                 
1245                 return 0;
1246                 
1247         }
1248         catch(exception& e) {
1249                 m->errorOut(e, "ScreenSeqsCommand", "screenQual");
1250                 exit(1);
1251         }
1252         
1253 }
1254 //**********************************************************************************************************************
1255
1256 int ScreenSeqsCommand::driver(linePair filePos, string goodFName, string badAccnosFName, string filename, set<string>& badSeqNames){
1257         try {
1258                 ofstream goodFile;
1259                 m->openOutputFile(goodFName, goodFile);
1260                 
1261                 ofstream badAccnosFile;
1262                 m->openOutputFile(badAccnosFName, badAccnosFile);
1263                 
1264                 ifstream inFASTA;
1265                 m->openInputFile(filename, inFASTA);
1266
1267                 inFASTA.seekg(filePos.start);
1268
1269                 bool done = false;
1270                 int count = 0;
1271         
1272                 while (!done) {
1273                 
1274                         if (m->control_pressed) {  return 0; }
1275                         
1276                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
1277                         if (currSeq.getName() != "") {
1278                                 bool goodSeq = 1;               //      innocent until proven guilty
1279                                 if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
1280                                 if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
1281                                 if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
1282                                 if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
1283                                 if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
1284                                 if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
1285                                 
1286                                 if(goodSeq == 1){
1287                                         currSeq.printSequence(goodFile);        
1288                                 }
1289                                 else{
1290                                         badAccnosFile << currSeq.getName() << endl;
1291                                         badSeqNames.insert(currSeq.getName());
1292                                 }
1293                         count++;
1294                         }
1295                         
1296                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1297                                 unsigned long long pos = inFASTA.tellg();
1298                                 if ((pos == -1) || (pos >= filePos.end)) { break; }
1299                         #else
1300                                 if (inFASTA.eof()) { break; }
1301                         #endif
1302                         
1303                         //report progress
1304                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
1305                 }
1306                 //report progress
1307                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
1308                 
1309                         
1310                 goodFile.close();
1311                 inFASTA.close();
1312                 badAccnosFile.close();
1313                 
1314                 return count;
1315         }
1316         catch(exception& e) {
1317                 m->errorOut(e, "ScreenSeqsCommand", "driver");
1318                 exit(1);
1319         }
1320 }
1321 //**********************************************************************************************************************
1322 #ifdef USE_MPI
1323 int ScreenSeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& goodFile, MPI_File& badAccnosFile, vector<unsigned long long>& MPIPos, set<string>& badSeqNames){
1324         try {
1325                 string outputString = "";
1326                 MPI_Status statusGood; 
1327                 MPI_Status statusBadAccnos; 
1328                 MPI_Status status; 
1329                 int pid;
1330                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1331
1332                 for(int i=0;i<num;i++){
1333                 
1334                         if (m->control_pressed) {  return 0; }
1335                         
1336                         //read next sequence
1337                         int length = MPIPos[start+i+1] - MPIPos[start+i];
1338
1339                         char* buf4 = new char[length];
1340
1341                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
1342                         
1343                         string tempBuf = buf4;  delete buf4;
1344                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
1345                         istringstream iss (tempBuf,istringstream::in);
1346                         
1347                         Sequence currSeq(iss);                  
1348                         
1349                         //process seq
1350                         if (currSeq.getName() != "") {
1351                                 bool goodSeq = 1;               //      innocent until proven guilty
1352                                 if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
1353                                 if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
1354                                 if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
1355                                 if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
1356                                 if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
1357                                 if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
1358                                 
1359                                 if(goodSeq == 1){
1360                                         outputString =  ">" + currSeq.getName() + "\n" + currSeq.getAligned() + "\n";
1361                                 
1362                                         //print good seq
1363                                         length = outputString.length();
1364                                         char* buf2 = new char[length];
1365                                         memcpy(buf2, outputString.c_str(), length);
1366                                         
1367                                         MPI_File_write_shared(goodFile, buf2, length, MPI_CHAR, &statusGood);
1368                                         delete buf2;
1369                                 }
1370                                 else{
1371
1372                                         badSeqNames.insert(currSeq.getName());
1373                                         
1374                                         //write to bad accnos file
1375                                         outputString = currSeq.getName() + "\n";
1376                                 
1377                                         length = outputString.length();
1378                                         char* buf3 = new char[length];
1379                                         memcpy(buf3, outputString.c_str(), length);
1380                                         
1381                                         MPI_File_write_shared(badAccnosFile, buf3, length, MPI_CHAR, &statusBadAccnos);
1382                                         delete buf3;
1383                                 }
1384                         }
1385                         
1386                         //report progress
1387                         if((i) % 100 == 0){     m->mothurOut("Processing sequence: " + toString(i)); m->mothurOutEndLine();             }
1388                 }
1389                                 
1390                 return 1;
1391         }
1392         catch(exception& e) {
1393                 m->errorOut(e, "ScreenSeqsCommand", "driverMPI");
1394                 exit(1);
1395         }
1396 }
1397 #endif
1398 /**************************************************************************************************/
1399
1400 int ScreenSeqsCommand::createProcesses(string goodFileName, string badAccnos, string filename, set<string>& badSeqNames) {
1401         try {
1402         
1403         vector<int> processIDS;   
1404         int process = 1;
1405                 int num = 0;
1406
1407 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1408                                 
1409                 //loop through and create all the processes you want
1410                 while (process != processors) {
1411                         int pid = fork();
1412                         
1413                         if (pid > 0) {
1414                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1415                                 process++;
1416                         }else if (pid == 0){
1417                                 num = driver(lines[process], goodFileName + toString(getpid()) + ".temp", badAccnos + toString(getpid()) + ".temp", filename, badSeqNames);
1418                                 
1419                                 //pass numSeqs to parent
1420                                 ofstream out;
1421                                 string tempFile = filename + toString(getpid()) + ".num.temp";
1422                                 m->openOutputFile(tempFile, out);
1423                                 out << num << endl;
1424                                 out.close();
1425                                 
1426                                 exit(0);
1427                         }else { 
1428                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1429                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1430                                 exit(0);
1431                         }
1432                 }
1433                 
1434         num = driver(lines[0], goodFileName, badAccnos, filename, badSeqNames);
1435         
1436                 //force parent to wait until all the processes are done
1437                 for (int i=0;i<processIDS.size();i++) { 
1438                         int temp = processIDS[i];
1439                         wait(&temp);
1440                 }
1441                 
1442                 for (int i = 0; i < processIDS.size(); i++) {
1443                         ifstream in;
1444                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1445                         m->openInputFile(tempFile, in);
1446                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1447                         in.close(); m->mothurRemove(tempFile);
1448             
1449             m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
1450             m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
1451                         
1452             m->appendFiles((badAccnos + toString(processIDS[i]) + ".temp"), badAccnos);
1453             m->mothurRemove((badAccnos + toString(processIDS[i]) + ".temp"));
1454                 }
1455                 
1456         //read badSeqs in because root process doesnt know what other "bad" seqs the children found
1457         ifstream inBad;
1458         int ableToOpen = m->openInputFile(badAccnos, inBad, "no error");
1459         
1460         if (ableToOpen == 0) {
1461             badSeqNames.clear();
1462             string tempName;
1463             while (!inBad.eof()) {
1464                 inBad >> tempName; m->gobble(inBad);
1465                 badSeqNames.insert(tempName);
1466             }
1467             inBad.close();
1468         }
1469 #else
1470         
1471         //////////////////////////////////////////////////////////////////////////////////////////////////////
1472                 //Windows version shared memory, so be careful when passing variables through the sumScreenData struct. 
1473                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1474                 //Taking advantage of shared memory to allow both threads to add info to badSeqNames.
1475                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1476                 
1477                 vector<sumScreenData*> pDataArray; 
1478                 DWORD   dwThreadIdArray[processors-1];
1479                 HANDLE  hThreadArray[processors-1]; 
1480                 
1481                 //Create processor worker threads.
1482                 for( int i=0; i<processors-1; i++ ){
1483             
1484             string extension = "";
1485             if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
1486             
1487                         // Allocate memory for thread data.
1488                         sumScreenData* tempSum = new sumScreenData(startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, filename, m, lines[i].start, lines[i].end,goodFileName+extension, badAccnos+extension);
1489                         pDataArray.push_back(tempSum);
1490                         
1491                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1492                         hThreadArray[i] = CreateThread(NULL, 0, MySumScreenThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
1493                 }
1494                 
1495         //do your part
1496         num = driver(lines[processors-1], (goodFileName+toString(processors-1)+".temp"), (badAccnos+toString(processors-1)+".temp"), filename, badSeqNames);
1497         processIDS.push_back(processors-1);
1498         
1499                 //Wait until all threads have terminated.
1500                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1501                 
1502                 //Close all thread handles and free memory allocations.
1503                 for(int i=0; i < pDataArray.size(); i++){
1504                         num += pDataArray[i]->count;
1505             for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) { badSeqNames.insert(*it);       }
1506                         CloseHandle(hThreadArray[i]);
1507                         delete pDataArray[i];
1508                 }
1509         
1510         for (int i = 0; i < processIDS.size(); i++) {
1511             m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
1512             m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
1513                         
1514             m->appendFiles((badAccnos + toString(processIDS[i]) + ".temp"), badAccnos);
1515             m->mothurRemove((badAccnos + toString(processIDS[i]) + ".temp"));
1516                 }
1517
1518 #endif  
1519         
1520         return num;
1521         
1522         }
1523         catch(exception& e) {
1524                 m->errorOut(e, "ScreenSeqsCommand", "createProcesses");
1525                 exit(1);
1526         }
1527 }
1528
1529 //***************************************************************************************************************
1530
1531