]> git.donarmstrong.com Git - mothur.git/blob - screenseqscommand.cpp
added checks to make sure windows processes completed their tasks.
[mothur.git] / screenseqscommand.cpp
1 /*
2  *  screenseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Pat Schloss on 6/3/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "screenseqscommand.h"
11 #include "counttable.h"
12
13 //**********************************************************************************************************************
14 vector<string> ScreenSeqsCommand::setParameters(){      
15         try {
16                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
17         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname);
18         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false,true); parameters.push_back(pcount);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false,true); parameters.push_back(pgroup);
20                 CommandParameter pqfile("qfile", "InputTypes", "", "", "none", "none", "none","qfile",false,false); parameters.push_back(pqfile);
21                 CommandParameter palignreport("alignreport", "InputTypes", "", "", "none", "none", "none","alignreport",false,false); parameters.push_back(palignreport);
22                 CommandParameter ptax("taxonomy", "InputTypes", "", "", "none", "none", "none","taxonomy",false,false); parameters.push_back(ptax);
23                 CommandParameter pstart("start", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pstart);
24                 CommandParameter pend("end", "Number", "", "-1", "", "", "","",false,false,true); parameters.push_back(pend);
25                 CommandParameter pmaxambig("maxambig", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxambig);
26                 CommandParameter pmaxhomop("maxhomop", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxhomop);
27                 CommandParameter pminlength("minlength", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pminlength);
28                 CommandParameter pmaxlength("maxlength", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(pmaxlength);
29                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
30                 CommandParameter pcriteria("criteria", "Number", "", "90", "", "", "","",false,false); parameters.push_back(pcriteria);
31                 CommandParameter poptimize("optimize", "Multiple", "none-start-end-maxambig-maxhomop-minlength-maxlength", "none", "", "", "","",true,false); parameters.push_back(poptimize);
32                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
33                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
34                 
35                 vector<string> myArray;
36                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
37                 return myArray;
38         }
39         catch(exception& e) {
40                 m->errorOut(e, "ScreenSeqsCommand", "setParameters");
41                 exit(1);
42         }
43 }
44 //**********************************************************************************************************************
45 string ScreenSeqsCommand::getHelpString(){      
46         try {
47                 string helpString = "";
48                 helpString += "The screen.seqs command reads a fastafile and screens sequences.\n";
49                 helpString += "The screen.seqs command parameters are fasta, start, end, maxambig, maxhomop, minlength, maxlength, name, group, count, qfile, alignreport, taxonomy, optimize, criteria and processors.\n";
50                 helpString += "The fasta parameter is required.\n";
51                 helpString += "The alignreport and taxonomy parameters allow you to remove bad seqs from taxonomy and alignreport files.\n";
52                 helpString += "The start parameter is used to set a position the \"good\" sequences must start by. The default is -1.\n";
53                 helpString += "The end parameter is used to set a position the \"good\" sequences must end after. The default is -1.\n";
54                 helpString += "The maxambig parameter allows you to set the maximum number of ambigious bases allowed. The default is -1.\n";
55                 helpString += "The maxhomop parameter allows you to set a maximum homopolymer length. \n";
56                 helpString += "The minlength parameter allows you to set and minimum sequence length. \n";
57                 helpString += "The maxlength parameter allows you to set and maximum sequence length. \n";
58                 helpString += "The processors parameter allows you to specify the number of processors to use while running the command. The default is 1.\n";
59                 helpString += "The optimize and criteria parameters allow you set the start, end, maxabig, maxhomop, minlength and maxlength parameters relative to your set of sequences .\n";
60                 helpString += "For example optimize=start-end, criteria=90, would set the start and end values to the position 90% of your sequences started and ended.\n";
61                 helpString += "The name parameter allows you to provide a namesfile, and the group parameter allows you to provide a groupfile.\n";
62                 helpString += "The screen.seqs command should be in the following format: \n";
63                 helpString += "screen.seqs(fasta=yourFastaFile, name=youNameFile, group=yourGroupFIle, start=yourStart, end=yourEnd, maxambig=yourMaxambig,  \n";
64                 helpString += "maxhomop=yourMaxhomop, minlength=youMinlength, maxlength=yourMaxlength)  \n";    
65                 helpString += "Example screen.seqs(fasta=abrecovery.fasta, name=abrecovery.names, group=abrecovery.groups, start=..., end=..., maxambig=..., maxhomop=..., minlength=..., maxlength=...).\n";
66                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
67                 return helpString;
68         }
69         catch(exception& e) {
70                 m->errorOut(e, "ScreenSeqsCommand", "getHelpString");
71                 exit(1);
72         }
73 }
74 //**********************************************************************************************************************
75 string ScreenSeqsCommand::getOutputPattern(string type) {
76     try {
77         string pattern = "";
78         
79         if (type == "fasta")            {   pattern = "[filename],good,[extension]";    }
80         else if (type == "taxonomy")    {   pattern = "[filename],good,[extension]";    }
81         else if (type == "name")        {   pattern = "[filename],good,[extension]";    }
82         else if (type == "group")       {   pattern = "[filename],good,[extension]";    }
83         else if (type == "count")       {   pattern = "[filename],good,[extension]";    }
84         else if (type == "accnos")      {   pattern = "[filename],bad.accnos";          }
85         else if (type == "qfile")       {   pattern = "[filename],good,[extension]";    }
86         else if (type == "alignreport")      {   pattern = "[filename],good.align.report";    }
87         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
88         
89         return pattern;
90     }
91     catch(exception& e) {
92         m->errorOut(e, "ScreenSeqsCommand", "getOutputPattern");
93         exit(1);
94     }
95 }
96 //**********************************************************************************************************************
97 ScreenSeqsCommand::ScreenSeqsCommand(){ 
98         try {
99                 abort = true; calledHelp = true; 
100                 setParameters();
101                 vector<string> tempOutNames;
102                 outputTypes["fasta"] = tempOutNames;
103                 outputTypes["name"] = tempOutNames;
104                 outputTypes["group"] = tempOutNames;
105                 outputTypes["alignreport"] = tempOutNames;
106                 outputTypes["accnos"] = tempOutNames;
107                 outputTypes["qfile"] = tempOutNames;
108                 outputTypes["taxonomy"] = tempOutNames;
109         outputTypes["count"] = tempOutNames;
110         }
111         catch(exception& e) {
112                 m->errorOut(e, "ScreenSeqsCommand", "ScreenSeqsCommand");
113                 exit(1);
114         }
115 }
116 //***************************************************************************************************************
117
118 ScreenSeqsCommand::ScreenSeqsCommand(string option)  {
119         try {
120                 abort = false; calledHelp = false;   
121                 
122                 //allow user to run help
123                 if(option == "help") { help(); abort = true; calledHelp = true; }
124                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
125                 
126                 else {
127                         vector<string> myArray = setParameters();
128                         
129                         OptionParser parser(option);
130                         map<string,string> parameters = parser.getParameters();
131                         
132                         ValidParameters validParameter("screen.seqs");
133                         map<string,string>::iterator it;
134                         
135                         //check to make sure all parameters are valid for command
136                         for (it = parameters.begin(); it != parameters.end(); it++) { 
137                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
138                         }
139                         
140                         //initialize outputTypes
141                         vector<string> tempOutNames;
142                         outputTypes["fasta"] = tempOutNames;
143                         outputTypes["name"] = tempOutNames;
144                         outputTypes["group"] = tempOutNames;
145                         outputTypes["alignreport"] = tempOutNames;
146                         outputTypes["accnos"] = tempOutNames;
147                         outputTypes["qfile"] = tempOutNames;
148                         outputTypes["taxonomy"] = tempOutNames;
149             outputTypes["count"] = tempOutNames;
150                         
151                         //if the user changes the input directory command factory will send this info to us in the output parameter 
152                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
153                         if (inputDir == "not found"){   inputDir = "";          }
154                         else {
155                                 string path;
156                                 it = parameters.find("fasta");
157                                 //user has given a template file
158                                 if(it != parameters.end()){ 
159                                         path = m->hasPath(it->second);
160                                         //if the user has not given a path then, add inputdir. else leave path alone.
161                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
162                                 }
163                                 
164                                 it = parameters.find("group");
165                                 //user has given a template file
166                                 if(it != parameters.end()){ 
167                                         path = m->hasPath(it->second);
168                                         //if the user has not given a path then, add inputdir. else leave path alone.
169                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
170                                 }
171                                 
172                                 it = parameters.find("name");
173                                 //user has given a template file
174                                 if(it != parameters.end()){ 
175                                         path = m->hasPath(it->second);
176                                         //if the user has not given a path then, add inputdir. else leave path alone.
177                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
178                                 }
179                                 
180                                 it = parameters.find("alignreport");
181                                 //user has given a template file
182                                 if(it != parameters.end()){ 
183                                         path = m->hasPath(it->second);
184                                         //if the user has not given a path then, add inputdir. else leave path alone.
185                                         if (path == "") {       parameters["alignreport"] = inputDir + it->second;              }
186                                 }
187                                 
188                                 it = parameters.find("qfile");
189                                 //user has given a template file
190                                 if(it != parameters.end()){ 
191                                         path = m->hasPath(it->second);
192                                         //if the user has not given a path then, add inputdir. else leave path alone.
193                                         if (path == "") {       parameters["qfile"] = inputDir + it->second;            }
194                                 }
195                                 
196                                 it = parameters.find("taxonomy");
197                                 //user has given a template file
198                                 if(it != parameters.end()){ 
199                                         path = m->hasPath(it->second);
200                                         //if the user has not given a path then, add inputdir. else leave path alone.
201                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
202                                 }
203                 
204                 it = parameters.find("count");
205                                 //user has given a template file
206                                 if(it != parameters.end()){ 
207                                         path = m->hasPath(it->second);
208                                         //if the user has not given a path then, add inputdir. else leave path alone.
209                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
210                                 }
211                         }
212
213                         //check for required parameters
214                         fastafile = validParameter.validFile(parameters, "fasta", true);
215                         if (fastafile == "not found") {                         
216                                 fastafile = m->getFastaFile(); 
217                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
218                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
219                         }
220                         else if (fastafile == "not open") { abort = true; }
221                         else { m->setFastaFile(fastafile); }
222         
223                         groupfile = validParameter.validFile(parameters, "group", true);
224                         if (groupfile == "not open") { abort = true; }  
225                         else if (groupfile == "not found") { groupfile = ""; }
226                         else { m->setGroupFile(groupfile); }
227                         
228                         qualfile = validParameter.validFile(parameters, "qfile", true);
229                         if (qualfile == "not open") { abort = true; }   
230                         else if (qualfile == "not found") { qualfile = ""; }
231                         else { m->setQualFile(qualfile); }
232                         
233                         namefile = validParameter.validFile(parameters, "name", true);
234                         if (namefile == "not open") { namefile = ""; abort = true; }
235                         else if (namefile == "not found") { namefile = ""; }    
236                         else { m->setNameFile(namefile); }
237                         
238             countfile = validParameter.validFile(parameters, "count", true);
239                         if (countfile == "not open") { countfile = ""; abort = true; }
240                         else if (countfile == "not found") { countfile = "";  } 
241                         else { m->setCountTableFile(countfile); }
242             
243             if ((namefile != "") && (countfile != "")) {
244                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
245             }
246                         
247             if ((groupfile != "") && (countfile != "")) {
248                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
249             }
250             
251                         alignreport = validParameter.validFile(parameters, "alignreport", true);
252                         if (alignreport == "not open") { abort = true; }
253                         else if (alignreport == "not found") { alignreport = ""; }
254                         
255                         taxonomy = validParameter.validFile(parameters, "taxonomy", true);
256                         if (taxonomy == "not open") { abort = true; }
257                         else if (taxonomy == "not found") { taxonomy = ""; }    
258                         
259                         //if the user changes the output directory command factory will send this info to us in the output parameter 
260                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
261                                 outputDir = ""; 
262                                 outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it       
263                         }
264
265                         //check for optional parameter and set defaults
266                         // ...at some point should added some additional type checking...
267                         string temp;
268                         temp = validParameter.validFile(parameters, "start", false);            if (temp == "not found") { temp = "-1"; }
269                         m->mothurConvert(temp, startPos); 
270                 
271                         temp = validParameter.validFile(parameters, "end", false);                      if (temp == "not found") { temp = "-1"; }
272                         m->mothurConvert(temp, endPos);  
273
274                         temp = validParameter.validFile(parameters, "maxambig", false);         if (temp == "not found") { temp = "-1"; }
275                         m->mothurConvert(temp, maxAmbig);  
276
277                         temp = validParameter.validFile(parameters, "maxhomop", false);         if (temp == "not found") { temp = "-1"; }
278                         m->mothurConvert(temp, maxHomoP);  
279
280                         temp = validParameter.validFile(parameters, "minlength", false);        if (temp == "not found") { temp = "-1"; }
281                         m->mothurConvert(temp, minLength); 
282                         
283                         temp = validParameter.validFile(parameters, "maxlength", false);        if (temp == "not found") { temp = "-1"; }
284                         m->mothurConvert(temp, maxLength); 
285                         
286                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
287                         m->setProcessors(temp);
288                         m->mothurConvert(temp, processors);
289                         
290                         temp = validParameter.validFile(parameters, "optimize", false); //optimizing trumps the optimized values original value
291                         if (temp == "not found"){       temp = "none";          }
292                         m->splitAtDash(temp, optimize);         
293                         
294                         //check for invalid optimize options
295                         set<string> validOptimizers;
296                         validOptimizers.insert("none"); validOptimizers.insert("start"); validOptimizers.insert("end"); validOptimizers.insert("maxambig"); validOptimizers.insert("maxhomop"); validOptimizers.insert("minlength"); validOptimizers.insert("maxlength");
297                         for (int i = 0; i < optimize.size(); i++) { 
298                                 if (validOptimizers.count(optimize[i]) == 0) { 
299                                         m->mothurOut(optimize[i] + " is not a valid optimizer. Valid options are start, end, maxambig, maxhomop, minlength and maxlength."); m->mothurOutEndLine();
300                                         optimize.erase(optimize.begin()+i);
301                                         i--;
302                                 }
303                         }
304                         
305                         if (optimize.size() == 1) { if (optimize[0] == "none") { optimize.clear(); } }
306                         
307                         temp = validParameter.validFile(parameters, "criteria", false); if (temp == "not found"){       temp = "90";                            }
308                         m->mothurConvert(temp, criteria); 
309                         
310                         if (countfile == "") { 
311                 if (namefile == "") {
312                     vector<string> files; files.push_back(fastafile);
313                     parser.getNameFile(files);
314                 }
315             }
316                 }
317
318         }
319         catch(exception& e) {
320                 m->errorOut(e, "ScreenSeqsCommand", "ScreenSeqsCommand");
321                 exit(1);
322         }
323 }
324 //***************************************************************************************************************
325
326 int ScreenSeqsCommand::execute(){
327         try{
328                 
329                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
330                 
331                 //if the user want to optimize we need to know the 90% mark
332                 vector<unsigned long long> positions;
333                 if (optimize.size() != 0) {  //get summary is paralellized so we need to divideFile, no need to do this step twice so I moved it here
334                         //use the namefile to optimize correctly
335                         if (namefile != "") { nameMap = m->readNames(namefile); }
336             else if (countfile != "") {
337                 CountTable ct;
338                 ct.readTable(countfile);
339                 nameMap = ct.getNameMap();
340             }
341                         getSummary(positions); 
342                 } 
343                 else { 
344                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
345                 positions = m->divideFile(fastafile, processors);
346                 for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); }
347                         #else 
348                 if(processors == 1){ lines.push_back(linePair(0, 1000));  }
349                 else {
350                     int numFastaSeqs = 0;
351                     positions = m->setFilePosFasta(fastafile, numFastaSeqs); 
352                     if (positions.size() < processors) { processors = positions.size(); }
353                 
354                     //figure out how many sequences you have to process
355                     int numSeqsPerProcessor = numFastaSeqs / processors;
356                     for (int i = 0; i < processors; i++) {
357                         int startIndex =  i * numSeqsPerProcessor;
358                         if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
359                         lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
360                     }
361                 }
362                         #endif
363                 }
364         
365         map<string, string> variables; 
366         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
367         string badAccnosFile =  getOutputFileName("accnos",variables);
368         variables["[extension]"] = m->getExtension(fastafile);
369                 string goodSeqFile = getOutputFileName("fasta", variables);
370                 
371                 
372                 int numFastaSeqs = 0;
373                 set<string> badSeqNames;
374                 int start = time(NULL);
375         
376 #ifdef USE_MPI  
377                         int pid, numSeqsPerProcessor; 
378                         int tag = 2001;
379                         vector<unsigned long long> MPIPos;
380                         
381                         MPI_Status status; 
382                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
383                         MPI_Comm_size(MPI_COMM_WORLD, &processors); 
384         
385                         MPI_File inMPI;
386                         MPI_File outMPIGood;
387                         MPI_File outMPIBadAccnos;
388                         
389                         int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
390                         int inMode=MPI_MODE_RDONLY; 
391                         
392                         char outGoodFilename[1024];
393                         strcpy(outGoodFilename, goodSeqFile.c_str());
394
395                         char outBadAccnosFilename[1024];
396                         strcpy(outBadAccnosFilename, badAccnosFile.c_str());
397
398                         char inFileName[1024];
399                         strcpy(inFileName, fastafile.c_str());
400                         
401                         MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
402                         MPI_File_open(MPI_COMM_WORLD, outGoodFilename, outMode, MPI_INFO_NULL, &outMPIGood);
403                         MPI_File_open(MPI_COMM_WORLD, outBadAccnosFilename, outMode, MPI_INFO_NULL, &outMPIBadAccnos);
404                         
405                         if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIGood); MPI_File_close(&outMPIBadAccnos); return 0; }
406                         
407                         if (pid == 0) { //you are the root process 
408                                 
409                                 MPIPos = m->setFilePosFasta(fastafile, numFastaSeqs); //fills MPIPos, returns numSeqs
410                                 
411                                 //send file positions to all processes
412                                 for(int i = 1; i < processors; i++) { 
413                                         MPI_Send(&numFastaSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
414                                         MPI_Send(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
415                                 }
416                                 
417                                 //figure out how many sequences you have to align
418                                 numSeqsPerProcessor = numFastaSeqs / processors;
419                                 int startIndex =  pid * numSeqsPerProcessor;
420                                 if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
421
422                                 //align your part
423                                 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIGood, outMPIBadAccnos, MPIPos, badSeqNames);
424
425                                 if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIGood);  MPI_File_close(&outMPIBadAccnos);  return 0; }
426
427                                 for (int i = 1; i < processors; i++) {
428                                         //get bad lists
429                                         int badSize;
430                                         MPI_Recv(&badSize, 1, MPI_INT, i, tag, MPI_COMM_WORLD, &status);
431                                 }
432                         }else{ //you are a child process
433                                 MPI_Recv(&numFastaSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
434                                 MPIPos.resize(numFastaSeqs+1);
435                                 MPI_Recv(&MPIPos[0], (numFastaSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
436
437                                 //figure out how many sequences you have to align
438                                 numSeqsPerProcessor = numFastaSeqs / processors;
439                                 int startIndex =  pid * numSeqsPerProcessor;
440                                 if(pid == (processors - 1)){    numSeqsPerProcessor = numFastaSeqs - pid * numSeqsPerProcessor;         }
441
442                                 //align your part
443                                 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPIGood, outMPIBadAccnos, MPIPos, badSeqNames);
444
445                                 if (m->control_pressed) { MPI_File_close(&inMPI);  MPI_File_close(&outMPIGood);  MPI_File_close(&outMPIBadAccnos); return 0; }
446                                 
447                                 //send bad list 
448                                 int badSize = badSeqNames.size();
449                                 MPI_Send(&badSize, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
450                         }
451                         
452                         //close files 
453                         MPI_File_close(&inMPI);
454                         MPI_File_close(&outMPIGood);
455                         MPI_File_close(&outMPIBadAccnos);
456                         MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
457                                         
458 #else
459         if(processors == 1){ numFastaSeqs = driver(lines[0], goodSeqFile, badAccnosFile, fastafile, badSeqNames);       }       
460         else{ numFastaSeqs = createProcesses(goodSeqFile, badAccnosFile, fastafile, badSeqNames); }
461         
462         if (m->control_pressed) { m->mothurRemove(goodSeqFile); return 0; }
463 #endif          
464
465                 #ifdef USE_MPI
466                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
467                                         
468                         if (pid == 0) { //only one process should fix files
469                         
470                                 //read accnos file with all names in it, process 0 just has its names
471                                 MPI_File inMPIAccnos;
472                                 MPI_Offset size;
473                         
474                                 char inFileName[1024];
475                                 strcpy(inFileName, badAccnosFile.c_str());
476                         
477                                 MPI_File_open(MPI_COMM_SELF, inFileName, inMode, MPI_INFO_NULL, &inMPIAccnos);  //comm, filename, mode, info, filepointer
478                                 MPI_File_get_size(inMPIAccnos, &size);
479                         
480                                 char* buffer = new char[size];
481                                 MPI_File_read(inMPIAccnos, buffer, size, MPI_CHAR, &status);
482                         
483                                 string tempBuf = buffer;
484                                 if (tempBuf.length() > size) { tempBuf = tempBuf.substr(0, size);  }
485                                 istringstream iss (tempBuf,istringstream::in);
486
487                                 delete buffer;
488                                 MPI_File_close(&inMPIAccnos);
489                                 
490                                 badSeqNames.clear();
491                                 string tempName;
492                                 while (!iss.eof()) {
493                                         iss >> tempName; m->gobble(iss);
494                                         badSeqNames.insert(tempName);
495                                 }
496                 #endif
497                                                                                                                                                                         
498                 if(namefile != "" && groupfile != "")   {       
499                         screenNameGroupFile(badSeqNames);       
500                         if (m->control_pressed) {  m->mothurRemove(goodSeqFile); return 0; }
501                 }else if(namefile != "")        {       
502                         screenNameGroupFile(badSeqNames);
503                         if (m->control_pressed) {  m->mothurRemove(goodSeqFile);  return 0; }   
504                 }else if(groupfile != "")                               {       screenGroupFile(badSeqNames);           }       // this screens just the group
505                 else if (countfile != "") {     screenCountFile(badSeqNames);           }
506             
507                 
508                 if (m->control_pressed) { m->mothurRemove(goodSeqFile);  return 0; }
509
510                 if(alignreport != "")                                   {       screenAlignReport(badSeqNames);         }
511                 if(qualfile != "")                                              {       screenQual(badSeqNames);                        }
512                 if(taxonomy != "")                                              {       screenTaxonomy(badSeqNames);            }
513                 
514                 if (m->control_pressed) { m->mothurRemove(goodSeqFile);  return 0; }
515                 
516                 #ifdef USE_MPI
517                         }
518                 #endif
519
520                 m->mothurOutEndLine();
521                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
522                 m->mothurOut(goodSeqFile); m->mothurOutEndLine();       outputTypes["fasta"].push_back(goodSeqFile);
523                 m->mothurOut(badAccnosFile); m->mothurOutEndLine();      outputTypes["accnos"].push_back(badAccnosFile);
524                 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
525                 m->mothurOutEndLine();
526                 m->mothurOutEndLine();
527                 
528                 //set fasta file as new current fastafile
529                 string current = "";
530                 itTypes = outputTypes.find("fasta");
531                 if (itTypes != outputTypes.end()) {
532                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
533                 }
534                 
535                 itTypes = outputTypes.find("name");
536                 if (itTypes != outputTypes.end()) {
537                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
538                 }
539                 
540                 itTypes = outputTypes.find("group");
541                 if (itTypes != outputTypes.end()) {
542                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
543                 }
544                 
545                 itTypes = outputTypes.find("qfile");
546                 if (itTypes != outputTypes.end()) {
547                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setQualFile(current); }
548                 }
549                 
550                 itTypes = outputTypes.find("taxonomy");
551                 if (itTypes != outputTypes.end()) {
552                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
553                 }
554         
555         itTypes = outputTypes.find("count");
556                 if (itTypes != outputTypes.end()) {
557                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
558                 }
559
560                 m->mothurOut("It took " + toString(time(NULL) - start) + " secs to screen " + toString(numFastaSeqs) + " sequences.");
561                 m->mothurOutEndLine();
562
563                 return 0;
564         }
565         catch(exception& e) {
566                 m->errorOut(e, "ScreenSeqsCommand", "execute");
567                 exit(1);
568         }
569 }
570
571 //***************************************************************************************************************
572
573 int ScreenSeqsCommand::screenNameGroupFile(set<string> badSeqNames){
574         try {
575                 ifstream inputNames;
576                 m->openInputFile(namefile, inputNames);
577                 set<string> badSeqGroups;
578                 string seqName, seqList, group;
579                 set<string>::iterator it;
580         map<string, string> variables; 
581                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(namefile));
582         variables["[extension]"] = m->getExtension(namefile);
583                 string goodNameFile = getOutputFileName("name", variables);
584                 outputNames.push_back(goodNameFile);  outputTypes["name"].push_back(goodNameFile);
585                 
586                 ofstream goodNameOut;   m->openOutputFile(goodNameFile, goodNameOut);
587                 
588                 while(!inputNames.eof()){
589                         if (m->control_pressed) { goodNameOut.close();  inputNames.close(); m->mothurRemove(goodNameFile);  return 0; }
590
591                         inputNames >> seqName; m->gobble(inputNames); inputNames >> seqList;
592                         it = badSeqNames.find(seqName);
593                                 
594                         if(it != badSeqNames.end()){
595                                 badSeqNames.erase(it);
596                                 
597                                 if(namefile != ""){
598                                         int start = 0;
599                                         for(int i=0;i<seqList.length();i++){
600                                                 if(seqList[i] == ','){
601                                                         badSeqGroups.insert(seqList.substr(start,i-start));
602                                                         start = i+1;
603                                                 }                                       
604                                         }
605                                         badSeqGroups.insert(seqList.substr(start,seqList.length()-start));
606                                 }
607                         }
608                         else{
609                                 goodNameOut << seqName << '\t' << seqList << endl;
610                         }
611                         m->gobble(inputNames);
612                 }
613                 inputNames.close();
614                 goodNameOut.close();
615         
616                 //we were unable to remove some of the bad sequences
617                 if (badSeqNames.size() != 0) {
618                         for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
619                                 m->mothurOut("Your namefile does not include the sequence " + *it + " please correct."); 
620                                 m->mothurOutEndLine();
621                         }
622                 }
623
624                 if(groupfile != ""){
625                         
626                         ifstream inputGroups;
627                         m->openInputFile(groupfile, inputGroups);
628             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(groupfile));
629             variables["[extension]"] = m->getExtension(groupfile);
630             string goodGroupFile = getOutputFileName("group", variables);
631                         
632                         outputNames.push_back(goodGroupFile);   outputTypes["group"].push_back(goodGroupFile);
633                         
634                         ofstream goodGroupOut;  m->openOutputFile(goodGroupFile, goodGroupOut);
635                         
636                         while(!inputGroups.eof()){
637                                 if (m->control_pressed) { goodGroupOut.close(); inputGroups.close(); m->mothurRemove(goodNameFile);  m->mothurRemove(goodGroupFile); return 0; }
638
639                                 inputGroups >> seqName; m->gobble(inputGroups); inputGroups >> group;
640                                 
641                                 it = badSeqGroups.find(seqName);
642                                 
643                                 if(it != badSeqGroups.end()){
644                                         badSeqGroups.erase(it);
645                                 }
646                                 else{
647                                         goodGroupOut << seqName << '\t' << group << endl;
648                                 }
649                                 m->gobble(inputGroups);
650                         }
651                         inputGroups.close();
652                         goodGroupOut.close();
653                         
654                         //we were unable to remove some of the bad sequences
655                         if (badSeqGroups.size() != 0) {
656                                 for (it = badSeqGroups.begin(); it != badSeqGroups.end(); it++) {  
657                                         m->mothurOut("Your groupfile does not include the sequence " + *it + " please correct."); 
658                                         m->mothurOutEndLine();
659                                 }
660                         }
661                 }
662                 
663                 
664                 return 0;
665         
666         }
667         catch(exception& e) {
668                 m->errorOut(e, "ScreenSeqsCommand", "screenNameGroupFile");
669                 exit(1);
670         }
671 }
672 //***************************************************************************************************************
673 int ScreenSeqsCommand::getSummary(vector<unsigned long long>& positions){
674         try {
675                 
676                 vector<int> startPosition;
677                 vector<int> endPosition;
678                 vector<int> seqLength;
679                 vector<int> ambigBases;
680                 vector<int> longHomoPolymer;
681                 
682         vector<unsigned long long> positions;
683 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
684                 positions = m->divideFile(fastafile, processors);
685                 for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); }   
686 #else
687                 if(processors == 1){ lines.push_back(linePair(0, 1000));  }
688         else {
689             int numFastaSeqs = 0;
690             positions = m->setFilePosFasta(fastafile, numFastaSeqs); 
691             if (positions.size() < processors) { processors = positions.size(); }
692             
693             //figure out how many sequences you have to process
694             int numSeqsPerProcessor = numFastaSeqs / processors;
695             for (int i = 0; i < processors; i++) {
696                 int startIndex =  i * numSeqsPerProcessor;
697                 if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
698                 lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor));
699             }
700         }
701 #endif
702                 
703 #ifdef USE_MPI
704                 int pid;
705                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
706                 
707                 if (pid == 0) { 
708                         driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
709 #else
710                 int numSeqs = 0;
711                 //#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
712                         if(processors == 1){
713                                 numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
714                         }else{
715                                 numSeqs = createProcessesCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile); 
716                         }
717                                 
718                         if (m->control_pressed) {  return 0; }
719                 //#else
720                 //      numSeqs = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
721                 //      if (m->control_pressed) {  return 0; }
722                 //#endif
723 #endif
724                 sort(startPosition.begin(), startPosition.end());
725                 sort(endPosition.begin(), endPosition.end());
726                 sort(seqLength.begin(), seqLength.end());
727                 sort(ambigBases.begin(), ambigBases.end());
728                 sort(longHomoPolymer.begin(), longHomoPolymer.end());
729                 
730                 //numSeqs is the number of unique seqs, startPosition.size() is the total number of seqs, we want to optimize using all seqs
731                 int criteriaPercentile  = int(startPosition.size() * (criteria / (float) 100));
732                 
733                 for (int i = 0; i < optimize.size(); i++) {
734                         if (optimize[i] == "start") { startPos = startPosition[criteriaPercentile]; m->mothurOut("Optimizing start to " + toString(startPos) + "."); m->mothurOutEndLine(); }
735                         else if (optimize[i] == "end") { int endcriteriaPercentile = int(endPosition.size() * ((100 - criteria) / (float) 100));  endPos = endPosition[endcriteriaPercentile]; m->mothurOut("Optimizing end to " + toString(endPos) + "."); m->mothurOutEndLine();}
736                         else if (optimize[i] == "maxambig") { maxAmbig = ambigBases[criteriaPercentile]; m->mothurOut("Optimizing maxambig to " + toString(maxAmbig) + "."); m->mothurOutEndLine(); }
737                         else if (optimize[i] == "maxhomop") { maxHomoP = longHomoPolymer[criteriaPercentile]; m->mothurOut("Optimizing maxhomop to " + toString(maxHomoP) + "."); m->mothurOutEndLine(); }
738                         else if (optimize[i] == "minlength") { int mincriteriaPercentile = int(seqLength.size() * ((100 - criteria) / (float) 100)); minLength = seqLength[mincriteriaPercentile]; m->mothurOut("Optimizing minlength to " + toString(minLength) + "."); m->mothurOutEndLine(); }
739                         else if (optimize[i] == "maxlength") { maxLength = seqLength[criteriaPercentile]; m->mothurOut("Optimizing maxlength to " + toString(maxLength) + "."); m->mothurOutEndLine(); }
740                 }
741
742 #ifdef USE_MPI
743                 }
744                 
745                 MPI_Status status; 
746                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
747                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
748                         
749                 if (pid == 0) { 
750                         //send file positions to all processes
751                         for(int i = 1; i < processors; i++) { 
752                                 MPI_Send(&startPos, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
753                                 MPI_Send(&endPos, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
754                                 MPI_Send(&maxAmbig, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
755                                 MPI_Send(&maxHomoP, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
756                                 MPI_Send(&minLength, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
757                                 MPI_Send(&maxLength, 1, MPI_INT, i, 2001, MPI_COMM_WORLD);
758                         }
759                 }else {
760                         MPI_Recv(&startPos, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
761                         MPI_Recv(&endPos, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
762                         MPI_Recv(&maxAmbig, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
763                         MPI_Recv(&maxHomoP, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
764                         MPI_Recv(&minLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
765                         MPI_Recv(&maxLength, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
766                 }
767                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
768 #endif
769                 return 0;
770         }
771         catch(exception& e) {
772                 m->errorOut(e, "ScreenSeqsCommand", "getSummary");
773                 exit(1);
774         }
775 }
776 /**************************************************************************************/
777 int ScreenSeqsCommand::driverCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, string filename, linePair filePos) {    
778         try {
779                 
780                 ifstream in;
781                 m->openInputFile(filename, in);
782                                 
783                 in.seekg(filePos.start);
784
785                 bool done = false;
786                 int count = 0;
787         
788                 while (!done) {
789                                 
790                         if (m->control_pressed) { in.close(); return 1; }
791                                         
792                         Sequence current(in); m->gobble(in);
793         
794                         if (current.getName() != "") {
795                                 int num = 1;
796                                 if (namefile != "") {
797                                         //make sure this sequence is in the namefile, else error 
798                                         map<string, int>::iterator it = nameMap.find(current.getName());
799                                         
800                                         if (it == nameMap.end()) { m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
801                                         else { num = it->second; }
802                                 }
803                                 
804                                 //for each sequence this sequence represents
805                                 for (int i = 0; i < num; i++) {
806                                         startPosition.push_back(current.getStartPos());
807                                         endPosition.push_back(current.getEndPos());
808                                         seqLength.push_back(current.getNumBases());
809                                         ambigBases.push_back(current.getAmbigBases());
810                                         longHomoPolymer.push_back(current.getLongHomoPolymer());
811                                 }
812                                 
813                                 count++;
814                         }
815                         //if((count) % 100 == 0){       m->mothurOut("Optimizing sequence: " + toString(count)); m->mothurOutEndLine();         }
816                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
817                                 unsigned long long pos = in.tellg();
818                                 if ((pos == -1) || (pos >= filePos.end)) { break; }
819                         #else
820                                 if (in.eof()) { break; }
821                         #endif
822                         
823                 }
824                 
825                 in.close();
826                 
827                 return count;
828         }
829         catch(exception& e) {
830                 m->errorOut(e, "ScreenSeqsCommand", "driverCreateSummary");
831                 exit(1);
832         }
833 }
834 /**************************************************************************************************/
835 int ScreenSeqsCommand::createProcessesCreateSummary(vector<int>& startPosition, vector<int>& endPosition, vector<int>& seqLength, vector<int>& ambigBases, vector<int>& longHomoPolymer, string filename) {
836         try {
837         
838         int process = 1;
839                 int num = 0;
840                 vector<int> processIDS;
841
842 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
843                                 
844                 //loop through and create all the processes you want
845                 while (process != processors) {
846                         int pid = fork();
847                         
848                         if (pid > 0) {
849                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
850                                 process++;
851                         }else if (pid == 0){
852                                 num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[process]);
853                                 
854                                 //pass numSeqs to parent
855                                 ofstream out;
856                                 string tempFile = fastafile + toString(getpid()) + ".num.temp";
857                                 m->openOutputFile(tempFile, out);
858                                 
859                                 out << num << endl;
860                                 out << startPosition.size() << endl;
861                                 for (int k = 0; k < startPosition.size(); k++)          {               out << startPosition[k] << '\t'; }  out << endl;
862                                 for (int k = 0; k < endPosition.size(); k++)            {               out << endPosition[k] << '\t'; }  out << endl;
863                                 for (int k = 0; k < seqLength.size(); k++)                      {               out << seqLength[k] << '\t'; }  out << endl;
864                                 for (int k = 0; k < ambigBases.size(); k++)                     {               out << ambigBases[k] << '\t'; }  out << endl;
865                                 for (int k = 0; k < longHomoPolymer.size(); k++)        {               out << longHomoPolymer[k] << '\t'; }  out << endl;
866                                 
867                                 out.close();
868                                 
869                                 exit(0);
870                         }else { 
871                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
872                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
873                                 exit(0);
874                         }
875                 }
876                 
877                 num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[0]);
878                 
879                 //force parent to wait until all the processes are done
880                 for (int i=0;i<processIDS.size();i++) { 
881                         int temp = processIDS[i];
882                         wait(&temp);
883                 }
884                 
885                 //parent reads in and combine Filter info
886                 for (int i = 0; i < processIDS.size(); i++) {
887                         string tempFilename = fastafile + toString(processIDS[i]) + ".num.temp";
888                         ifstream in;
889                         m->openInputFile(tempFilename, in);
890                         
891                         int temp, tempNum;
892                         in >> tempNum; m->gobble(in); num += tempNum;
893                         in >> tempNum; m->gobble(in);
894                         for (int k = 0; k < tempNum; k++)                       {               in >> temp; startPosition.push_back(temp);              }               m->gobble(in);
895                         for (int k = 0; k < tempNum; k++)                       {               in >> temp; endPosition.push_back(temp);                }               m->gobble(in);
896                         for (int k = 0; k < tempNum; k++)                       {               in >> temp; seqLength.push_back(temp);                  }               m->gobble(in);
897                         for (int k = 0; k < tempNum; k++)                       {               in >> temp; ambigBases.push_back(temp);                 }               m->gobble(in);
898                         for (int k = 0; k < tempNum; k++)                       {               in >> temp; longHomoPolymer.push_back(temp);    }               m->gobble(in);
899                                 
900                         in.close();
901                         m->mothurRemove(tempFilename);
902                 }
903                 
904                 
905 #else 
906         //////////////////////////////////////////////////////////////////////////////////////////////////////
907                 //Windows version shared memory, so be careful when passing variables through the seqSumData struct. 
908                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
909                 //Taking advantage of shared memory to allow both threads to add info to vectors.
910                 //////////////////////////////////////////////////////////////////////////////////////////////////////
911                 
912                 vector<sumData*> pDataArray; 
913                 DWORD   dwThreadIdArray[processors-1];
914                 HANDLE  hThreadArray[processors-1]; 
915                 
916                 //Create processor worker threads.
917                 for( int i=0; i<processors-1; i++ ){
918             
919                         // Allocate memory for thread data.
920                         sumData* tempSum = new sumData(filename, m, lines[i].start, lines[i].end, namefile, nameMap);
921                         pDataArray.push_back(tempSum);
922                         
923                         //MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
924                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
925                         hThreadArray[i] = CreateThread(NULL, 0, MySumThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
926                 }
927                 
928         //do your part
929                 num = driverCreateSummary(startPosition, endPosition, seqLength, ambigBases, longHomoPolymer, fastafile, lines[processors-1]);
930          
931                 //Wait until all threads have terminated.
932                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
933                 
934                 //Close all thread handles and free memory allocations.
935                 for(int i=0; i < pDataArray.size(); i++){
936                         num += pDataArray[i]->count;
937             if (pDataArray[i]->count != pDataArray[i]->end) {
938                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
939             }
940             for (int k = 0; k < pDataArray[i]->startPosition.size(); k++) {     startPosition.push_back(pDataArray[i]->startPosition[k]);       }
941                         for (int k = 0; k < pDataArray[i]->endPosition.size(); k++) {   endPosition.push_back(pDataArray[i]->endPosition[k]);       }
942             for (int k = 0; k < pDataArray[i]->seqLength.size(); k++) { seqLength.push_back(pDataArray[i]->seqLength[k]);       }
943             for (int k = 0; k < pDataArray[i]->ambigBases.size(); k++) {        ambigBases.push_back(pDataArray[i]->ambigBases[k]);       }
944             for (int k = 0; k < pDataArray[i]->longHomoPolymer.size(); k++) {   longHomoPolymer.push_back(pDataArray[i]->longHomoPolymer[k]);       }
945                         CloseHandle(hThreadArray[i]);
946                         delete pDataArray[i];
947                 }
948
949 #endif          
950         return num;
951         }
952         catch(exception& e) {
953                 m->errorOut(e, "ScreenSeqsCommand", "createProcessesCreateSummary");
954                 exit(1);
955         }
956 }
957
958 //***************************************************************************************************************
959
960 int ScreenSeqsCommand::screenGroupFile(set<string> badSeqNames){
961         try {
962                 ifstream inputGroups;
963                 m->openInputFile(groupfile, inputGroups);
964                 string seqName, group;
965                 set<string>::iterator it;
966                 map<string, string> variables;
967                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(groupfile));
968         variables["[extension]"] = m->getExtension(groupfile);
969         string goodGroupFile = getOutputFileName("group", variables);
970         outputNames.push_back(goodGroupFile);  outputTypes["group"].push_back(goodGroupFile);
971                 ofstream goodGroupOut;  m->openOutputFile(goodGroupFile, goodGroupOut);
972                 
973                 while(!inputGroups.eof()){
974                         if (m->control_pressed) { goodGroupOut.close(); inputGroups.close(); m->mothurRemove(goodGroupFile); return 0; }
975
976                         inputGroups >> seqName; m->gobble(inputGroups); inputGroups >> group;
977                         it = badSeqNames.find(seqName);
978                         
979                         if(it != badSeqNames.end()){
980                                 badSeqNames.erase(it);
981                         }
982                         else{
983                                 goodGroupOut << seqName << '\t' << group << endl;
984                         }
985                         m->gobble(inputGroups);
986                 }
987                 
988                 if (m->control_pressed) { goodGroupOut.close();  inputGroups.close(); m->mothurRemove(goodGroupFile);  return 0; }
989
990                 //we were unable to remove some of the bad sequences
991                 if (badSeqNames.size() != 0) {
992                         for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
993                                 m->mothurOut("Your groupfile does not include the sequence " + *it + " please correct."); 
994                                 m->mothurOutEndLine();
995                         }
996                 }
997                 
998                 inputGroups.close();
999                 goodGroupOut.close();
1000                 
1001                 if (m->control_pressed) { m->mothurRemove(goodGroupFile);   }
1002                 
1003                 return 0;
1004         
1005         }
1006         catch(exception& e) {
1007                 m->errorOut(e, "ScreenSeqsCommand", "screenGroupFile");
1008                 exit(1);
1009         }
1010 }
1011 //***************************************************************************************************************
1012 int ScreenSeqsCommand::screenCountFile(set<string> badSeqNames){
1013         try {
1014                 ifstream in;
1015                 m->openInputFile(countfile, in);
1016                 set<string>::iterator it;
1017                 map<string, string> variables;
1018                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
1019         variables["[extension]"] = m->getExtension(countfile);
1020         string goodCountFile = getOutputFileName("count", variables);
1021                 
1022         outputNames.push_back(goodCountFile);  outputTypes["count"].push_back(goodCountFile);
1023                 ofstream goodCountOut;  m->openOutputFile(goodCountFile, goodCountOut);
1024                 
1025         string headers = m->getline(in); m->gobble(in);
1026         goodCountOut << headers << endl;
1027         
1028         string name, rest; int thisTotal;
1029         while (!in.eof()) {
1030
1031                         if (m->control_pressed) { goodCountOut.close(); in.close(); m->mothurRemove(goodCountFile); return 0; }
1032             
1033                         in >> name; m->gobble(in); 
1034             in >> thisTotal; m->gobble(in);
1035             rest = m->getline(in); m->gobble(in);
1036             
1037                         it = badSeqNames.find(name);
1038                         
1039                         if(it != badSeqNames.end()){
1040                                 badSeqNames.erase(it);
1041                         }
1042                         else{
1043                                 goodCountOut << name << '\t' << thisTotal << '\t' << rest << endl;
1044                         }
1045                 }
1046                 
1047                 if (m->control_pressed) { goodCountOut.close();  in.close(); m->mothurRemove(goodCountFile);  return 0; }
1048         
1049                 //we were unable to remove some of the bad sequences
1050                 if (badSeqNames.size() != 0) {
1051                         for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
1052                                 m->mothurOut("Your count file does not include the sequence " + *it + " please correct."); 
1053                                 m->mothurOutEndLine();
1054                         }
1055                 }
1056                 
1057                 in.close();
1058                 goodCountOut.close();
1059         
1060         //check for groups that have been eliminated
1061         CountTable ct;
1062         if (ct.testGroups(goodCountFile)) {
1063             ct.readTable(goodCountFile);
1064             ct.printTable(goodCountFile);
1065         }
1066                 
1067                 if (m->control_pressed) { m->mothurRemove(goodCountFile);   }
1068                 
1069                 return 0;
1070         
1071         }
1072         catch(exception& e) {
1073                 m->errorOut(e, "ScreenSeqsCommand", "screenCountFile");
1074                 exit(1);
1075         }
1076 }
1077 //***************************************************************************************************************
1078
1079 int ScreenSeqsCommand::screenAlignReport(set<string> badSeqNames){
1080         try {
1081                 ifstream inputAlignReport;
1082                 m->openInputFile(alignreport, inputAlignReport);
1083                 string seqName, group;
1084                 set<string>::iterator it;
1085                 
1086         map<string, string> variables;
1087                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(alignreport));
1088         string goodAlignReportFile = getOutputFileName("alignreport", variables);
1089                 
1090                 outputNames.push_back(goodAlignReportFile);  outputTypes["alignreport"].push_back(goodAlignReportFile);
1091                 ofstream goodAlignReportOut;    m->openOutputFile(goodAlignReportFile, goodAlignReportOut);
1092
1093                 while (!inputAlignReport.eof()) {               //      need to copy header
1094                         char c = inputAlignReport.get();
1095                         goodAlignReportOut << c;
1096                         if (c == 10 || c == 13){        break;  }       
1097                 }
1098
1099                 while(!inputAlignReport.eof()){
1100                         if (m->control_pressed) { goodAlignReportOut.close(); inputAlignReport.close(); m->mothurRemove(goodAlignReportFile); return 0; }
1101
1102                         inputAlignReport >> seqName;
1103                         it = badSeqNames.find(seqName);
1104                         string line;            
1105                         while (!inputAlignReport.eof()) {               //      need to copy header
1106                                 char c = inputAlignReport.get();
1107                                 line += c;
1108                                 if (c == 10 || c == 13){        break;  }       
1109                         }
1110                         
1111                         if(it != badSeqNames.end()){
1112                                 badSeqNames.erase(it);
1113                         }
1114                         else{
1115                                 goodAlignReportOut << seqName << '\t' << line;
1116                         }
1117                         m->gobble(inputAlignReport);
1118                 }
1119                 
1120                 if (m->control_pressed) { goodAlignReportOut.close();  inputAlignReport.close(); m->mothurRemove(goodAlignReportFile);  return 0; }
1121
1122                 //we were unable to remove some of the bad sequences
1123                 if (badSeqNames.size() != 0) {
1124                         for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
1125                                 m->mothurOut("Your alignreport file does not include the sequence " + *it + " please correct."); 
1126                                 m->mothurOutEndLine();
1127                         }
1128                 }
1129
1130                 inputAlignReport.close();
1131                 goodAlignReportOut.close();
1132                                 
1133                 if (m->control_pressed) {  m->mothurRemove(goodAlignReportFile);  return 0; }
1134                 
1135                 return 0;
1136         
1137         }
1138         catch(exception& e) {
1139                 m->errorOut(e, "ScreenSeqsCommand", "screenAlignReport");
1140                 exit(1);
1141         }
1142         
1143 }
1144 //***************************************************************************************************************
1145
1146 int ScreenSeqsCommand::screenTaxonomy(set<string> badSeqNames){
1147         try {
1148                 ifstream input;
1149                 m->openInputFile(taxonomy, input);
1150                 string seqName, tax;
1151                 set<string>::iterator it;
1152         map<string, string> variables;
1153                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(taxonomy));
1154         variables["[extension]"] = m->getExtension(taxonomy);
1155         string goodTaxFile = getOutputFileName("taxonomy", variables);
1156
1157                 outputNames.push_back(goodTaxFile);  outputTypes["taxonomy"].push_back(goodTaxFile);
1158                 ofstream goodTaxOut;    m->openOutputFile(goodTaxFile, goodTaxOut);
1159                                 
1160                 while(!input.eof()){
1161                         if (m->control_pressed) { goodTaxOut.close(); input.close(); m->mothurRemove(goodTaxFile); return 0; }
1162                         
1163                         input >> seqName; m->gobble(input); input >> tax;
1164                         it = badSeqNames.find(seqName);
1165                         
1166                         if(it != badSeqNames.end()){ badSeqNames.erase(it); }
1167                         else{
1168                                 goodTaxOut << seqName << '\t' << tax << endl;
1169                         }
1170                         m->gobble(input);
1171                 }
1172                 
1173                 if (m->control_pressed) { goodTaxOut.close(); input.close(); m->mothurRemove(goodTaxFile); return 0; }
1174                 
1175                 //we were unable to remove some of the bad sequences
1176                 if (badSeqNames.size() != 0) {
1177                         for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
1178                                 m->mothurOut("Your taxonomy file does not include the sequence " + *it + " please correct."); 
1179                                 m->mothurOutEndLine();
1180                         }
1181                 }
1182                 
1183                 input.close();
1184                 goodTaxOut.close();
1185                 
1186                 if (m->control_pressed) {  m->mothurRemove(goodTaxFile);  return 0; }
1187                 
1188                 return 0;
1189                 
1190         }
1191         catch(exception& e) {
1192                 m->errorOut(e, "ScreenSeqsCommand", "screenTaxonomy");
1193                 exit(1);
1194         }
1195         
1196 }
1197 //***************************************************************************************************************
1198
1199 int ScreenSeqsCommand::screenQual(set<string> badSeqNames){
1200         try {
1201                 ifstream in;
1202                 m->openInputFile(qualfile, in);
1203                 set<string>::iterator it;
1204                 map<string, string> variables;
1205                 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(qualfile));
1206         variables["[extension]"] = m->getExtension(qualfile);
1207         string goodQualFile = getOutputFileName("qfile", variables);
1208                 
1209                 outputNames.push_back(goodQualFile);  outputTypes["qfile"].push_back(goodQualFile);
1210                 ofstream goodQual;      m->openOutputFile(goodQualFile, goodQual);
1211                 
1212                 while(!in.eof()){       
1213                         
1214                         if (m->control_pressed) { goodQual.close(); in.close(); m->mothurRemove(goodQualFile); return 0; }
1215
1216                         string saveName = "";
1217                         string name = "";
1218                         string scores = "";
1219                         
1220                         in >> name; 
1221                         
1222                         if (name.length() != 0) { 
1223                                 saveName = name.substr(1);
1224                                 while (!in.eof())       {       
1225                                         char c = in.get(); 
1226                                         if (c == 10 || c == 13){        break;  }
1227                                         else { name += c; }     
1228                                 } 
1229                                 m->gobble(in);
1230                         }
1231                         
1232                         while(in){
1233                                 char letter= in.get();
1234                                 if(letter == '>'){      in.putback(letter);     break;  }
1235                                 else{ scores += letter; }
1236                         }
1237                         
1238                         m->gobble(in);
1239                         
1240                         it = badSeqNames.find(saveName);
1241                         
1242                         if(it != badSeqNames.end()){
1243                                 badSeqNames.erase(it);
1244                         }else{                          
1245                                 goodQual << name << endl << scores;
1246                         }
1247                         
1248                         m->gobble(in);
1249                 }
1250                 
1251                 in.close();
1252                 goodQual.close();
1253                 
1254                 //we were unable to remove some of the bad sequences
1255                 if (badSeqNames.size() != 0) {
1256                         for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {  
1257                                 m->mothurOut("Your qual file does not include the sequence " + *it + " please correct."); 
1258                                 m->mothurOutEndLine();
1259                         }
1260                 }
1261                 
1262                 if (m->control_pressed) {  m->mothurRemove(goodQualFile);  return 0; }
1263                 
1264                 return 0;
1265                 
1266         }
1267         catch(exception& e) {
1268                 m->errorOut(e, "ScreenSeqsCommand", "screenQual");
1269                 exit(1);
1270         }
1271         
1272 }
1273 //**********************************************************************************************************************
1274
1275 int ScreenSeqsCommand::driver(linePair filePos, string goodFName, string badAccnosFName, string filename, set<string>& badSeqNames){
1276         try {
1277                 ofstream goodFile;
1278                 m->openOutputFile(goodFName, goodFile);
1279                 
1280                 ofstream badAccnosFile;
1281                 m->openOutputFile(badAccnosFName, badAccnosFile);
1282                 
1283                 ifstream inFASTA;
1284                 m->openInputFile(filename, inFASTA);
1285
1286                 inFASTA.seekg(filePos.start);
1287
1288                 bool done = false;
1289                 int count = 0;
1290         
1291                 while (!done) {
1292                 
1293                         if (m->control_pressed) {  return 0; }
1294                         
1295                         Sequence currSeq(inFASTA); m->gobble(inFASTA);
1296                         if (currSeq.getName() != "") {
1297                                 bool goodSeq = 1;               //      innocent until proven guilty
1298                                 if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
1299                                 if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
1300                                 if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
1301                                 if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
1302                                 if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
1303                                 if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
1304                                 
1305                                 if(goodSeq == 1){
1306                                         currSeq.printSequence(goodFile);        
1307                                 }
1308                                 else{
1309                                         badAccnosFile << currSeq.getName() << endl;
1310                                         badSeqNames.insert(currSeq.getName());
1311                                 }
1312                         count++;
1313                         }
1314                         
1315                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1316                                 unsigned long long pos = inFASTA.tellg();
1317                                 if ((pos == -1) || (pos >= filePos.end)) { break; }
1318                         #else
1319                                 if (inFASTA.eof()) { break; }
1320                         #endif
1321                         
1322                         //report progress
1323                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
1324                 }
1325                 //report progress
1326                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
1327                 
1328                         
1329                 goodFile.close();
1330                 inFASTA.close();
1331                 badAccnosFile.close();
1332                 
1333                 return count;
1334         }
1335         catch(exception& e) {
1336                 m->errorOut(e, "ScreenSeqsCommand", "driver");
1337                 exit(1);
1338         }
1339 }
1340 //**********************************************************************************************************************
1341 #ifdef USE_MPI
1342 int ScreenSeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& goodFile, MPI_File& badAccnosFile, vector<unsigned long long>& MPIPos, set<string>& badSeqNames){
1343         try {
1344                 string outputString = "";
1345                 MPI_Status statusGood; 
1346                 MPI_Status statusBadAccnos; 
1347                 MPI_Status status; 
1348                 int pid;
1349                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1350
1351                 for(int i=0;i<num;i++){
1352                 
1353                         if (m->control_pressed) {  return 0; }
1354                         
1355                         //read next sequence
1356                         int length = MPIPos[start+i+1] - MPIPos[start+i];
1357
1358                         char* buf4 = new char[length];
1359
1360                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
1361                         
1362                         string tempBuf = buf4;  delete buf4;
1363                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
1364                         istringstream iss (tempBuf,istringstream::in);
1365                         
1366                         Sequence currSeq(iss);                  
1367                         
1368                         //process seq
1369                         if (currSeq.getName() != "") {
1370                                 bool goodSeq = 1;               //      innocent until proven guilty
1371                                 if(goodSeq == 1 && startPos != -1 && startPos < currSeq.getStartPos())                  {       goodSeq = 0;    }
1372                                 if(goodSeq == 1 && endPos != -1 && endPos > currSeq.getEndPos())                                {       goodSeq = 0;    }
1373                                 if(goodSeq == 1 && maxAmbig != -1 && maxAmbig < currSeq.getAmbigBases())                {       goodSeq = 0;    }
1374                                 if(goodSeq == 1 && maxHomoP != -1 && maxHomoP < currSeq.getLongHomoPolymer())   {       goodSeq = 0;    }
1375                                 if(goodSeq == 1 && minLength != -1 && minLength > currSeq.getNumBases())                {       goodSeq = 0;    }
1376                                 if(goodSeq == 1 && maxLength != -1 && maxLength < currSeq.getNumBases())                {       goodSeq = 0;    }
1377                                 
1378                                 if(goodSeq == 1){
1379                                         outputString =  ">" + currSeq.getName() + "\n" + currSeq.getAligned() + "\n";
1380                                 
1381                                         //print good seq
1382                                         length = outputString.length();
1383                                         char* buf2 = new char[length];
1384                                         memcpy(buf2, outputString.c_str(), length);
1385                                         
1386                                         MPI_File_write_shared(goodFile, buf2, length, MPI_CHAR, &statusGood);
1387                                         delete buf2;
1388                                 }
1389                                 else{
1390
1391                                         badSeqNames.insert(currSeq.getName());
1392                                         
1393                                         //write to bad accnos file
1394                                         outputString = currSeq.getName() + "\n";
1395                                 
1396                                         length = outputString.length();
1397                                         char* buf3 = new char[length];
1398                                         memcpy(buf3, outputString.c_str(), length);
1399                                         
1400                                         MPI_File_write_shared(badAccnosFile, buf3, length, MPI_CHAR, &statusBadAccnos);
1401                                         delete buf3;
1402                                 }
1403                         }
1404                         
1405                         //report progress
1406                         if((i) % 100 == 0){     m->mothurOut("Processing sequence: " + toString(i)); m->mothurOutEndLine();             }
1407                 }
1408                                 
1409                 return 1;
1410         }
1411         catch(exception& e) {
1412                 m->errorOut(e, "ScreenSeqsCommand", "driverMPI");
1413                 exit(1);
1414         }
1415 }
1416 #endif
1417 /**************************************************************************************************/
1418
1419 int ScreenSeqsCommand::createProcesses(string goodFileName, string badAccnos, string filename, set<string>& badSeqNames) {
1420         try {
1421         
1422         vector<int> processIDS;   
1423         int process = 1;
1424                 int num = 0;
1425
1426 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1427                                 
1428                 //loop through and create all the processes you want
1429                 while (process != processors) {
1430                         int pid = fork();
1431                         
1432                         if (pid > 0) {
1433                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1434                                 process++;
1435                         }else if (pid == 0){
1436                                 num = driver(lines[process], goodFileName + toString(getpid()) + ".temp", badAccnos + toString(getpid()) + ".temp", filename, badSeqNames);
1437                                 
1438                                 //pass numSeqs to parent
1439                                 ofstream out;
1440                                 string tempFile = filename + toString(getpid()) + ".num.temp";
1441                                 m->openOutputFile(tempFile, out);
1442                                 out << num << endl;
1443                                 out.close();
1444                                 
1445                                 exit(0);
1446                         }else { 
1447                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1448                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1449                                 exit(0);
1450                         }
1451                 }
1452                 
1453         num = driver(lines[0], goodFileName, badAccnos, filename, badSeqNames);
1454         
1455                 //force parent to wait until all the processes are done
1456                 for (int i=0;i<processIDS.size();i++) { 
1457                         int temp = processIDS[i];
1458                         wait(&temp);
1459                 }
1460                 
1461                 for (int i = 0; i < processIDS.size(); i++) {
1462                         ifstream in;
1463                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
1464                         m->openInputFile(tempFile, in);
1465                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
1466                         in.close(); m->mothurRemove(tempFile);
1467             
1468             m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
1469             m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
1470                         
1471             m->appendFiles((badAccnos + toString(processIDS[i]) + ".temp"), badAccnos);
1472             m->mothurRemove((badAccnos + toString(processIDS[i]) + ".temp"));
1473                 }
1474                 
1475         //read badSeqs in because root process doesnt know what other "bad" seqs the children found
1476         ifstream inBad;
1477         int ableToOpen = m->openInputFile(badAccnos, inBad, "no error");
1478         
1479         if (ableToOpen == 0) {
1480             badSeqNames.clear();
1481             string tempName;
1482             while (!inBad.eof()) {
1483                 inBad >> tempName; m->gobble(inBad);
1484                 badSeqNames.insert(tempName);
1485             }
1486             inBad.close();
1487         }
1488 #else
1489         
1490         //////////////////////////////////////////////////////////////////////////////////////////////////////
1491                 //Windows version shared memory, so be careful when passing variables through the sumScreenData struct. 
1492                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1493                 //Taking advantage of shared memory to allow both threads to add info to badSeqNames.
1494                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1495                 
1496                 vector<sumScreenData*> pDataArray; 
1497                 DWORD   dwThreadIdArray[processors-1];
1498                 HANDLE  hThreadArray[processors-1]; 
1499                 
1500                 //Create processor worker threads.
1501                 for( int i=0; i<processors-1; i++ ){
1502             
1503             string extension = "";
1504             if (i!=0) {extension += toString(i) + ".temp"; processIDS.push_back(i); }
1505             
1506                         // Allocate memory for thread data.
1507                         sumScreenData* tempSum = new sumScreenData(startPos, endPos, maxAmbig, maxHomoP, minLength, maxLength, filename, m, lines[i].start, lines[i].end,goodFileName+extension, badAccnos+extension);
1508                         pDataArray.push_back(tempSum);
1509                         
1510                         //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier
1511                         hThreadArray[i] = CreateThread(NULL, 0, MySumScreenThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
1512                 }
1513                 
1514         //do your part
1515         num = driver(lines[processors-1], (goodFileName+toString(processors-1)+".temp"), (badAccnos+toString(processors-1)+".temp"), filename, badSeqNames);
1516         processIDS.push_back(processors-1);
1517         
1518                 //Wait until all threads have terminated.
1519                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
1520                 
1521                 //Close all thread handles and free memory allocations.
1522                 for(int i=0; i < pDataArray.size(); i++){
1523                         num += pDataArray[i]->count;
1524             if (pDataArray[i]->count != pDataArray[i]->end) {
1525                 m->mothurOut("[ERROR]: process " + toString(i) + " only processed " + toString(pDataArray[i]->count) + " of " + toString(pDataArray[i]->end) + " sequences assigned to it, quitting. \n"); m->control_pressed = true; 
1526             }
1527             for (set<string>::iterator it = pDataArray[i]->badSeqNames.begin(); it != pDataArray[i]->badSeqNames.end(); it++) { badSeqNames.insert(*it);       }
1528                         CloseHandle(hThreadArray[i]);
1529                         delete pDataArray[i];
1530                 }
1531         
1532         for (int i = 0; i < processIDS.size(); i++) {
1533             m->appendFiles((goodFileName + toString(processIDS[i]) + ".temp"), goodFileName);
1534             m->mothurRemove((goodFileName + toString(processIDS[i]) + ".temp"));
1535                         
1536             m->appendFiles((badAccnos + toString(processIDS[i]) + ".temp"), badAccnos);
1537             m->mothurRemove((badAccnos + toString(processIDS[i]) + ".temp"));
1538                 }
1539
1540 #endif  
1541         
1542         return num;
1543         
1544         }
1545         catch(exception& e) {
1546                 m->errorOut(e, "ScreenSeqsCommand", "createProcesses");
1547                 exit(1);
1548         }
1549 }
1550
1551 //***************************************************************************************************************
1552
1553