]> git.donarmstrong.com Git - mothur.git/blob - filterseqscommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / filterseqscommand.cpp
1 /*
2  *  filterseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Thomas Ryabin on 5/4/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "filterseqscommand.h"
11 #include "sequence.hpp"
12
13
14 //**********************************************************************************************************************
15 vector<string> FilterSeqsCommand::setParameters(){      
16         try {
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter phard("hard", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(phard);
19                 CommandParameter ptrump("trump", "String", "", "*", "", "", "",false,false); parameters.push_back(ptrump);
20                 CommandParameter psoft("soft", "Number", "", "0", "", "", "",false,false); parameters.push_back(psoft);
21                 CommandParameter pvertical("vertical", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pvertical);
22                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25                 
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "FilterSeqsCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string FilterSeqsCommand::getHelpString(){      
37         try {
38                 string helpString = "";
39                 helpString += "The filter.seqs command reads a file containing sequences and creates a .filter and .filter.fasta file.\n";
40                 helpString += "The filter.seqs command parameters are fasta, trump, soft, hard, processors and vertical. \n";
41                 helpString += "The fasta parameter is required, unless you have a valid current fasta file. You may enter several fasta files to build the filter from and filter, by separating their names with -'s.\n";
42                 helpString += "For example: fasta=abrecovery.fasta-amazon.fasta \n";
43                 helpString += "The trump option will remove a column if the trump character is found at that position in any sequence of the alignment. Default=*, meaning no trump. \n";
44                 helpString += "A soft mask removes any column where the dominant base (i.e. A, T, G, C, or U) does not occur in at least a designated percentage of sequences. Default=0.\n";
45                 helpString += "The hard parameter allows you to enter a file containing the filter you want to use.\n";
46                 helpString += "The vertical parameter removes columns where all sequences contain a gap character. The default is T.\n";
47                 helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
48                 helpString += "The filter.seqs command should be in the following format: \n";
49                 helpString += "filter.seqs(fasta=yourFastaFile, trump=yourTrump) \n";
50                 helpString += "Example filter.seqs(fasta=abrecovery.fasta, trump=.).\n";
51                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
52                 return helpString;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "FilterSeqsCommand", "getHelpString");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 string FilterSeqsCommand::getOutputFileNameTag(string type, string inputName=""){       
61         try {
62         string outputFileName = "";
63                 map<string, vector<string> >::iterator it;
64         
65         //is this a type this command creates
66         it = outputTypes.find(type);
67         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
68         else {
69             if (type == "fasta") {  outputFileName =  "filter.fasta"; }
70             else if (type == "filter") {  outputFileName =  "filter"; }
71             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
72         }
73         return outputFileName;
74         }
75         catch(exception& e) {
76                 m->errorOut(e, "FilterSeqsCommand", "getOutputFileNameTag");
77                 exit(1);
78         }
79 }
80 //**********************************************************************************************************************
81 FilterSeqsCommand::FilterSeqsCommand(){ 
82         try {
83                 abort = true; calledHelp = true; 
84                 setParameters();
85                 vector<string> tempOutNames;
86                 outputTypes["fasta"] = tempOutNames;
87                 outputTypes["filter"] = tempOutNames;
88         }
89         catch(exception& e) {
90                 m->errorOut(e, "FilterSeqsCommand", "FilterSeqsCommand");
91                 exit(1);
92         }
93 }
94 /**************************************************************************************/
95 FilterSeqsCommand::FilterSeqsCommand(string option)  {
96         try {
97                 abort = false; calledHelp = false;   
98                 filterFileName = "";
99                 
100                 //allow user to run help
101                 if(option == "help") { help(); abort = true; calledHelp = true; }
102                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
103                 
104                 else {
105                         vector<string> myArray = setParameters();
106                         
107                         OptionParser parser(option);
108                         map<string,string> parameters = parser.getParameters();
109                         
110                         ValidParameters validParameter("filter.seqs");
111                         map<string,string>::iterator it;
112                         
113                         //check to make sure all parameters are valid for command
114                         for (it = parameters.begin(); it != parameters.end(); it++) { 
115                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
116                         }
117                         
118                         //initialize outputTypes
119                         vector<string> tempOutNames;
120                         outputTypes["fasta"] = tempOutNames;
121                         outputTypes["filter"] = tempOutNames;
122                 
123                         //if the user changes the input directory command factory will send this info to us in the output parameter 
124                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
125                         if (inputDir == "not found"){   inputDir = "";          }
126                         else {
127                                 string path;
128                                 it = parameters.find("fasta");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
134                                 }
135                                 
136                                 it = parameters.find("hard");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["hard"] = inputDir + it->second;             }
142                                 }
143                         }
144                         
145                         //check for required parameters
146                         fasta = validParameter.validFile(parameters, "fasta", false);
147                         if (fasta == "not found") {                             
148                                 fasta = m->getFastaFile(); 
149                                 if (fasta != "") { 
150                     fastafileNames.push_back(fasta);  
151                     m->mothurOut("Using " + fasta + " as input file for the fasta parameter."); m->mothurOutEndLine();
152                     string simpleName = m->getSimpleName(fasta);
153                     filterFileName += simpleName.substr(0, simpleName.find_first_of('.'));
154                 }
155                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
156                         }
157                         else { 
158                                 m->splitAtDash(fasta, fastafileNames);
159                                 
160                                 //go through files and make sure they are good, if not, then disregard them
161                                 for (int i = 0; i < fastafileNames.size(); i++) {
162                                         
163                                         bool ignore = false;
164                                         if (fastafileNames[i] == "current") { 
165                                                 fastafileNames[i] = m->getFastaFile(); 
166                                                 if (fastafileNames[i] != "") {  m->mothurOut("Using " + fastafileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
167                                                 else {  
168                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
169                                                         //erase from file list
170                                                         fastafileNames.erase(fastafileNames.begin()+i);
171                                                         i--;
172                                                 }
173                                         }
174                                         
175                                         if (!ignore) {
176                                                 if (inputDir != "") {
177                                                         string path = m->hasPath(fastafileNames[i]);
178                                                         //if the user has not given a path then, add inputdir. else leave path alone.
179                                                         if (path == "") {       fastafileNames[i] = inputDir + fastafileNames[i];               }
180                                                 }
181
182                                                 ifstream in;
183                                                 int ableToOpen = m->openInputFile(fastafileNames[i], in, "noerror");
184                                         
185                                                 //if you can't open it, try default location
186                                                 if (ableToOpen == 1) {
187                                                         if (m->getDefaultPath() != "") { //default path is set
188                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastafileNames[i]);
189                                                                 m->mothurOut("Unable to open " + fastafileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
190                                                                 ifstream in2;
191                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
192                                                                 in2.close();
193                                                                 fastafileNames[i] = tryPath;
194                                                         }
195                                                 }
196                                                 
197                                                 //if you can't open it, try default location
198                                                 if (ableToOpen == 1) {
199                                                         if (m->getOutputDir() != "") { //default path is set
200                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastafileNames[i]);
201                                                                 m->mothurOut("Unable to open " + fastafileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
202                                                                 ifstream in2;
203                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
204                                                                 in2.close();
205                                                                 fastafileNames[i] = tryPath;
206                                                         }
207                                                 }
208                                                 
209                                                 in.close();
210                                                 
211                                                 if (ableToOpen == 1) { 
212                                                         m->mothurOut("Unable to open " + fastafileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
213                                                         //erase from file list
214                                                         fastafileNames.erase(fastafileNames.begin()+i);
215                                                         i--;
216                                                 }else{  
217                                                         string simpleName = m->getSimpleName(fastafileNames[i]);
218                                                         filterFileName += simpleName.substr(0, simpleName.find_first_of('.'));
219                                                         m->setFastaFile(fastafileNames[i]);
220                                                 }
221                                                 in.close();
222                                         }
223                                 }
224                                 
225                                 //make sure there is at least one valid file left
226                                 if (fastafileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
227                         }
228                         
229                         if (!abort) {
230                                 //if the user changes the output directory command factory will send this info to us in the output parameter 
231                                 outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
232                                         outputDir = ""; 
233                                         outputDir += m->hasPath(fastafileNames[0]); //if user entered a file with a path then preserve it       
234                                 }
235                         }
236                         //check for optional parameter and set defaults
237                         // ...at some point should added some additional type checking...
238                         
239                         string temp;
240                         hard = validParameter.validFile(parameters, "hard", true);                              if (hard == "not found") { hard = ""; }
241                         else if (hard == "not open") { hard = ""; abort = true; }       
242
243                         temp = validParameter.validFile(parameters, "trump", false);                    if (temp == "not found") { temp = "*"; }
244                         trump = temp[0];
245                         
246                         temp = validParameter.validFile(parameters, "soft", false);                             if (temp == "not found") { soft = 0; }
247                         else {  soft = (float)atoi(temp.c_str()) / 100.0;  }
248                         
249                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
250                         m->setProcessors(temp);
251                         m->mothurConvert(temp, processors); 
252                         
253                         vertical = validParameter.validFile(parameters, "vertical", false);             
254                         if (vertical == "not found") { 
255                                 if ((hard == "") && (trump == '*') && (soft == 0)) { vertical = "T"; } //you have not given a hard file or set the trump char.
256                                 else { vertical = "F";  }
257                         }
258                         
259                         numSeqs = 0;
260                 }
261                 
262         }
263         catch(exception& e) {
264                 m->errorOut(e, "FilterSeqsCommand", "FilterSeqsCommand");
265                 exit(1);
266         }
267 }
268 /**************************************************************************************/
269
270 int FilterSeqsCommand::execute() {      
271         try {
272         
273                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
274                 
275                 ifstream inFASTA;
276                 m->openInputFile(fastafileNames[0], inFASTA);
277                 
278                 Sequence testSeq(inFASTA);
279                 alignmentLength = testSeq.getAlignLength();
280                 inFASTA.close();
281                 
282                 ////////////create filter/////////////////
283                 m->mothurOut("Creating Filter... "); m->mothurOutEndLine();
284                 
285                 filter = createFilter();
286                 
287                 m->mothurOutEndLine();  m->mothurOutEndLine();
288                 
289                 if (m->control_pressed) { outputTypes.clear(); return 0; }
290                 
291                 #ifdef USE_MPI
292                         int pid;
293                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
294                                         
295                         if (pid == 0) { //only one process should output the filter
296                 #endif
297                 
298                 ofstream outFilter;
299                 
300                 //prevent giantic file name
301                 string filterFile;
302                 if (fastafileNames.size() > 3) { filterFile = outputDir + "merge." + getOutputFileNameTag("filter"); }
303                 else {  filterFile = outputDir + filterFileName + "." + getOutputFileNameTag("filter");  }
304                 
305                 m->openOutputFile(filterFile, outFilter);
306                 outFilter << filter << endl;
307                 outFilter.close();
308                 outputNames.push_back(filterFile); outputTypes["filter"].push_back(filterFile);
309                 
310                 #ifdef USE_MPI
311                         }
312                 #endif
313                 
314                 ////////////run filter/////////////////
315                 
316                 m->mothurOut("Running Filter... "); m->mothurOutEndLine();
317                 
318                 filterSequences();
319                 
320                 m->mothurOutEndLine();  m->mothurOutEndLine();
321                                         
322                 int filteredLength = 0;
323                 for(int i=0;i<alignmentLength;i++){
324                         if(filter[i] == '1'){   filteredLength++;       }
325                 }
326                 
327                 if (m->control_pressed) {  outputTypes.clear(); for(int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }  return 0; }
328
329                 
330                 m->mothurOutEndLine();
331                 m->mothurOut("Length of filtered alignment: " + toString(filteredLength)); m->mothurOutEndLine();
332                 m->mothurOut("Number of columns removed: " + toString((alignmentLength-filteredLength))); m->mothurOutEndLine();
333                 m->mothurOut("Length of the original alignment: " + toString(alignmentLength)); m->mothurOutEndLine();
334                 m->mothurOut("Number of sequences used to construct filter: " + toString(numSeqs)); m->mothurOutEndLine();
335                 
336                 //set fasta file as new current fastafile
337                 string current = "";
338                 itTypes = outputTypes.find("fasta");
339                 if (itTypes != outputTypes.end()) {
340                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
341                 }
342                 
343                 m->mothurOutEndLine();
344                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
345                 for(int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();      }
346                 m->mothurOutEndLine();
347                 
348                 return 0;
349                 
350         }
351         catch(exception& e) {
352                 m->errorOut(e, "FilterSeqsCommand", "execute");
353                 exit(1);
354         }
355 }
356 /**************************************************************************************/
357 int FilterSeqsCommand::filterSequences() {      
358         try {
359                 
360                 numSeqs = 0;
361                 
362                 for (int s = 0; s < fastafileNames.size(); s++) {
363                         
364                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
365                                 
366                                 string filteredFasta = outputDir + m->getRootName(m->getSimpleName(fastafileNames[s])) + getOutputFileNameTag("fasta");
367 #ifdef USE_MPI  
368                                 int pid, numSeqsPerProcessor, num; 
369                                 int tag = 2001;
370                                 vector<unsigned long long>MPIPos;
371                                                 
372                                 MPI_Status status; 
373                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
374                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
375                                 
376                                 MPI_File outMPI;
377                                 MPI_File inMPI;
378                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
379                                 int inMode=MPI_MODE_RDONLY; 
380                                 
381                                 char outFilename[1024];
382                                 strcpy(outFilename, filteredFasta.c_str());
383                         
384                                 char inFileName[1024];
385                                 strcpy(inFileName, fastafileNames[s].c_str());
386                                 
387                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
388                                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
389
390                                 if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);  return 0;  }
391
392                                 if (pid == 0) { //you are the root process 
393                                         
394                                         MPIPos = m->setFilePosFasta(fastafileNames[s], num); //fills MPIPos, returns numSeqs
395                                         numSeqs += num;
396                                         
397                                         //send file positions to all processes
398                                         for(int i = 1; i < processors; i++) { 
399                                                 MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
400                                                 MPI_Send(&MPIPos[0], (num+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
401                                         }
402                                         
403                                         //figure out how many sequences you have to do
404                                         numSeqsPerProcessor = num / processors;
405                                         int startIndex =  pid * numSeqsPerProcessor;
406                                         if(pid == (processors - 1)){    numSeqsPerProcessor = num - pid * numSeqsPerProcessor;  }
407                                         
408                                 
409                                         //do your part
410                                         driverMPIRun(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);
411                                         
412                                         if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);  return 0;  }
413                                         
414                                         //wait on chidren
415                                         for(int i = 1; i < processors; i++) { 
416                                                 char buf[5];
417                                                 MPI_Recv(buf, 5, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
418                                         }
419                                         
420                                 }else { //you are a child process
421                                         MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
422                                         MPIPos.resize(num+1);
423                                         numSeqs += num;
424                                         MPI_Recv(&MPIPos[0], (num+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
425                                         
426                                         //figure out how many sequences you have to align
427                                         numSeqsPerProcessor = num / processors;
428                                         int startIndex =  pid * numSeqsPerProcessor;
429                                         if(pid == (processors - 1)){    numSeqsPerProcessor = num - pid * numSeqsPerProcessor;  }
430                                         
431                                         
432                                         //align your part
433                                         driverMPIRun(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);           
434                                         
435                                         if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);  return 0;  }
436                                         
437                                         char buf[5];
438                                         strcpy(buf, "done"); 
439                                         
440                                         //tell parent you are done.
441                                         MPI_Send(buf, 5, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
442                                 }
443                                 
444                                 MPI_File_close(&outMPI);
445                                 MPI_File_close(&inMPI);
446                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
447                                 
448 #else
449             
450             vector<unsigned long long> positions;
451             if (savedPositions.size() != 0) { positions = savedPositions[s]; }
452             else {
453 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
454                                 positions = m->divideFile(fastafileNames[s], processors);
455 #else
456                 if(processors != 1){
457                     int numFastaSeqs = 0;
458                     positions = m->setFilePosFasta(fastafileNames[s], numFastaSeqs); 
459                     if (positions.size() < processors) { processors = positions.size(); }
460                 }
461 #endif
462             }
463                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
464                         //vector<unsigned long long> positions = m->divideFile(fastafileNames[s], processors);
465                         
466                         for (int i = 0; i < (positions.size()-1); i++) {
467                                 lines.push_back(new linePair(positions[i], positions[(i+1)]));
468                         }       
469                         
470                                 if(processors == 1){
471                                         int numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]);
472                                         numSeqs += numFastaSeqs;
473                                 }else{
474                                         int numFastaSeqs = createProcessesRunFilter(filter, fastafileNames[s], filteredFasta); 
475                                         numSeqs += numFastaSeqs;
476                                 }
477                                 
478                                 if (m->control_pressed) {  return 1; }
479                 #else
480             if(processors == 1){
481                 lines.push_back(new linePair(0, 1000));
482                                 int numFastaSeqs = driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]);
483                                 numSeqs += numFastaSeqs;
484             }else {
485                 int numFastaSeqs = positions.size()-1;
486                 //positions = m->setFilePosFasta(fastafileNames[s], numFastaSeqs); 
487                 
488                 //figure out how many sequences you have to process
489                 int numSeqsPerProcessor = numFastaSeqs / processors;
490                 for (int i = 0; i < processors; i++) {
491                     int startIndex =  i * numSeqsPerProcessor;
492                     if(i == (processors - 1)){  numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
493                     lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
494                 }
495                 
496                 numFastaSeqs = createProcessesRunFilter(filter, fastafileNames[s], filteredFasta); 
497                 numSeqs += numFastaSeqs;
498             }
499
500                                 if (m->control_pressed) {  return 1; }
501                 #endif
502 #endif
503                         outputNames.push_back(filteredFasta); outputTypes["fasta"].push_back(filteredFasta);
504                 }
505
506                 return 0;
507         }
508         catch(exception& e) {
509                 m->errorOut(e, "FilterSeqsCommand", "filterSequences");
510                 exit(1);
511         }
512 }
513 #ifdef USE_MPI
514 /**************************************************************************************/
515 int FilterSeqsCommand::driverMPIRun(int start, int num, MPI_File& inMPI, MPI_File& outMPI, vector<unsigned long long>& MPIPos) {        
516         try {
517                 string outputString = "";
518                 int count = 0;
519                 MPI_Status status; 
520                 
521                 for(int i=0;i<num;i++){
522                 
523                         if (m->control_pressed) { return 0; }
524                 
525                         //read next sequence
526                         int length = MPIPos[start+i+1] - MPIPos[start+i];
527                         char* buf4 = new char[length];
528                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
529                         
530                         string tempBuf = buf4;
531                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
532                         istringstream iss (tempBuf,istringstream::in);
533                         delete buf4;
534         
535                         Sequence seq(iss);  m->gobble(iss);
536                         
537                         if (seq.getName() != "") {
538                                 string align = seq.getAligned();
539                                 string filterSeq = "";
540                                         
541                                 for(int j=0;j<alignmentLength;j++){
542                                         if(filter[j] == '1'){
543                                                 filterSeq += align[j];
544                                         }
545                                 }
546                                 
547                                 count++;
548                                 outputString += ">" + seq.getName() + "\n" + filterSeq + "\n";
549                                 
550                                 if(count % 10 == 0){ //output to file 
551                                         //send results to parent
552                                         int length = outputString.length();
553                                         char* buf = new char[length];
554                                         memcpy(buf, outputString.c_str(), length);
555                                 
556                                         MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
557                                         outputString = "";
558                                         delete buf;
559                                 }
560
561                         }
562                         
563                         if((i+1) % 100 == 0){   cout << (i+1) << endl;   m->mothurOutJustToLog(toString(i+1) + "\n");   }
564                 }
565                 
566                 if(outputString != ""){ //output to file 
567                         //send results to parent
568                         int length = outputString.length();
569                         char* buf = new char[length];
570                         memcpy(buf, outputString.c_str(), length);
571                         
572                         MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
573                         outputString = "";
574                         delete buf;
575                 }
576                 
577                 if((num) % 100 != 0){   cout << (num) << endl;   m->mothurOutJustToLog(toString(num) + "\n");   }
578                         
579                 return 0;
580         }
581         catch(exception& e) {
582                 m->errorOut(e, "FilterSeqsCommand", "driverRunFilter");
583                 exit(1);
584         }
585 }
586 #endif
587 /**************************************************************************************/
588 int FilterSeqsCommand::driverRunFilter(string F, string outputFilename, string inputFilename, linePair* filePos) {      
589         try {
590                 ofstream out;
591                 m->openOutputFile(outputFilename, out);
592                 
593                 ifstream in;
594                 m->openInputFile(inputFilename, in);
595                                 
596                 in.seekg(filePos->start);
597
598                 bool done = false;
599                 int count = 0;
600         
601                 while (!done) {
602                                 
603                                 if (m->control_pressed) { in.close(); out.close(); return 0; }
604                                 
605                                 Sequence seq(in); m->gobble(in);
606                                 if (seq.getName() != "") {
607                                         string align = seq.getAligned();
608                                         string filterSeq = "";
609                                         
610                                         for(int j=0;j<alignmentLength;j++){
611                                                 if(filter[j] == '1'){
612                                                         filterSeq += align[j];
613                                                 }
614                                         }
615                                         
616                                         out << '>' << seq.getName() << endl << filterSeq << endl;
617                                 count++;
618                         }
619                         
620                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
621                                 unsigned long long pos = in.tellg();
622                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
623                         #else
624                                 if (in.eof()) { break; }
625                         #endif
626                         
627                         //report progress
628                         if((count) % 100 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine();           }
629                 }
630                 //report progress
631                 if((count) % 100 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine();           }
632                 
633                 
634                 out.close();
635                 in.close();
636                 
637                 return count;
638         }
639         catch(exception& e) {
640                 m->errorOut(e, "FilterSeqsCommand", "driverRunFilter");
641                 exit(1);
642         }
643 }
644 /**************************************************************************************************/
645
646 int FilterSeqsCommand::createProcessesRunFilter(string F, string filename, string filteredFastaName) {
647         try {
648         
649         int process = 1;
650                 int num = 0;
651                 processIDS.clear();
652         
653 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
654                 
655                 
656                 //loop through and create all the processes you want
657                 while (process != processors) {
658                         int pid = fork();
659                         
660                         if (pid > 0) {
661                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
662                                 process++;
663                         }else if (pid == 0){
664                                 string filteredFasta = filename + toString(getpid()) + ".temp";
665                                 num = driverRunFilter(F, filteredFasta, filename, lines[process]);
666                                 
667                                 //pass numSeqs to parent
668                                 ofstream out;
669                                 string tempFile = filename +  toString(getpid()) + ".num.temp";
670                                 m->openOutputFile(tempFile, out);
671                                 out << num << endl;
672                                 out.close();
673                                 
674                                 exit(0);
675                         }else { 
676                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
677                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
678                                 exit(0);
679                         }
680                 }
681                 
682         num = driverRunFilter(F, filteredFastaName, filename, lines[0]);
683         
684                 //force parent to wait until all the processes are done
685                 for (int i=0;i<processIDS.size();i++) { 
686                         int temp = processIDS[i];
687                         wait(&temp);
688                 }       
689                                         
690                 for (int i = 0; i < processIDS.size(); i++) {
691                         ifstream in;
692                         string tempFile =  filename + toString(processIDS[i]) + ".num.temp";
693                         m->openInputFile(tempFile, in);
694                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
695                         in.close(); m->mothurRemove(tempFile);
696             
697             m->appendFiles((filename + toString(processIDS[i]) + ".temp"), filteredFastaName);
698             m->mothurRemove((filename + toString(processIDS[i]) + ".temp"));
699                 }
700                
701 #else
702         
703         //////////////////////////////////////////////////////////////////////////////////////////////////////
704                 //Windows version shared memory, so be careful when passing variables through the filterData struct. 
705                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
706                 //Taking advantage of shared memory to allow both threads to add info to F.
707                 //////////////////////////////////////////////////////////////////////////////////////////////////////
708                 
709                 vector<filterRunData*> pDataArray; 
710                 DWORD   dwThreadIdArray[processors-1];
711                 HANDLE  hThreadArray[processors-1]; 
712                 
713                 //Create processor worker threads.
714                 for( int i=0; i<processors-1; i++){
715                         
716             string extension = "";
717                         if (i != 0) { extension = toString(i) + ".temp"; }
718             
719                         filterRunData* tempFilter = new filterRunData(filter, filename, (filteredFastaName + extension), m, lines[i]->start, lines[i]->end, alignmentLength, i);
720                         pDataArray.push_back(tempFilter);
721                         processIDS.push_back(i);
722             
723                         hThreadArray[i] = CreateThread(NULL, 0, MyRunFilterThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
724                 }
725         
726         num = driverRunFilter(F, (filteredFastaName + toString(processors-1) + ".temp"), filename, lines[processors-1]);
727         
728                 //Wait until all threads have terminated.
729                 WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
730                 
731                 //Close all thread handles and free memory allocations.
732                 for(int i=0; i < pDataArray.size(); i++){
733                         num += pDataArray[i]->count;
734             CloseHandle(hThreadArray[i]);
735                         delete pDataArray[i];
736                 }
737         
738         for (int i = 1; i < processors; i++) {
739             m->appendFiles((filteredFastaName + toString(i) + ".temp"), filteredFastaName);
740             m->mothurRemove((filteredFastaName + toString(i) + ".temp"));
741                 }
742 #endif  
743         
744         return num;
745         
746         }
747         catch(exception& e) {
748                 m->errorOut(e, "FilterSeqsCommand", "createProcessesRunFilter");
749                 exit(1);
750         }
751 }
752 /**************************************************************************************/
753 string FilterSeqsCommand::createFilter() {      
754         try {
755                 string filterString = "";                       
756                 Filters F;
757                 
758                 if (soft != 0)                  {  F.setSoft(soft);             }
759                 if (trump != '*')               {  F.setTrump(trump);   }
760                 
761                 F.setLength(alignmentLength);
762                 
763                 if(trump != '*' || m->isTrue(vertical) || soft != 0){
764                         F.initialize();
765                 }
766                 
767                 if(hard.compare("") != 0)       {       F.doHard(hard);         }
768                 else                                            {       F.setFilter(string(alignmentLength, '1'));      }
769                 
770                 numSeqs = 0;
771                 if(trump != '*' || m->isTrue(vertical) || soft != 0){
772                         for (int s = 0; s < fastafileNames.size(); s++) {
773                         
774                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
775                         
776 #ifdef USE_MPI  
777                                 int pid, numSeqsPerProcessor, num; 
778                                 int tag = 2001;
779                                 vector<unsigned long long> MPIPos;
780                                 
781                                 MPI_Status status; 
782                                 MPI_File inMPI; 
783                                 MPI_Comm_size(MPI_COMM_WORLD, &processors);
784                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
785                                                         
786                                 //char* tempFileName = new char(fastafileNames[s].length());
787                                 //tempFileName = &(fastafileNames[s][0]);
788                                 
789                                 char tempFileName[1024];
790                                 strcpy(tempFileName, fastafileNames[s].c_str());
791                 
792                                 MPI_File_open(MPI_COMM_WORLD, tempFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
793                                 
794                                 if (m->control_pressed) {  MPI_File_close(&inMPI);  return 0;  }
795                                 
796                                 if (pid == 0) { //you are the root process
797                                                 MPIPos = m->setFilePosFasta(fastafileNames[s], num); //fills MPIPos, returns numSeqs
798                                                 numSeqs += num;
799                                                 
800                                                 //send file positions to all processes
801                                                 for(int i = 1; i < processors; i++) { 
802                                                         MPI_Send(&num, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
803                                                         MPI_Send(&MPIPos[0], (num+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
804                                                 }
805                                                                 
806                                                 //figure out how many sequences you have to do
807                                                 numSeqsPerProcessor = num / processors;
808                                                 int startIndex =  pid * numSeqsPerProcessor;
809                                                 if(pid == (processors - 1)){    numSeqsPerProcessor = num - pid * numSeqsPerProcessor;  }
810                                                 
811                                 
812                                                 //do your part
813                                                 MPICreateFilter(startIndex, numSeqsPerProcessor, F, inMPI, MPIPos);
814                                                 
815                                                 if (m->control_pressed) {  MPI_File_close(&inMPI);  return 0;  }
816                                                                                                 
817                                 }else { //i am the child process
818                                         MPI_Recv(&num, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
819                                         MPIPos.resize(num+1);
820                                         numSeqs += num;
821                                         MPI_Recv(&MPIPos[0], (num+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
822                                         
823                                         //figure out how many sequences you have to align
824                                         numSeqsPerProcessor = num / processors;
825                                         int startIndex =  pid * numSeqsPerProcessor;
826                                         if(pid == (processors - 1)){    numSeqsPerProcessor = num - pid * numSeqsPerProcessor;  }
827                                         
828                                         
829                                         //do your part
830                                         MPICreateFilter(startIndex, numSeqsPerProcessor, F, inMPI,  MPIPos);
831                                         
832                                         if (m->control_pressed) {  MPI_File_close(&inMPI);  return 0;  }
833                                 }
834                                 
835                                 MPI_File_close(&inMPI);
836                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
837                                 
838 #else
839                                 
840                 vector<unsigned long long> positions;
841                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
842                                 positions = m->divideFile(fastafileNames[s], processors);
843                                 for (int i = 0; i < (positions.size()-1); i++) {
844                                         lines.push_back(new linePair(positions[i], positions[(i+1)]));
845                                 }       
846                                 
847                                 if(processors == 1){
848                                         int numFastaSeqs = driverCreateFilter(F, fastafileNames[s], lines[0]);
849                                         numSeqs += numFastaSeqs;
850                                 }else{
851                                         int numFastaSeqs = createProcessesCreateFilter(F, fastafileNames[s]); 
852                                         numSeqs += numFastaSeqs;
853                                 }
854                 #else
855                 if(processors == 1){
856                     lines.push_back(new linePair(0, 1000));
857                     int numFastaSeqs = driverCreateFilter(F, fastafileNames[s], lines[0]);
858                     numSeqs += numFastaSeqs;
859                                 }else {
860                     int numFastaSeqs = 0;
861                     positions = m->setFilePosFasta(fastafileNames[s], numFastaSeqs); 
862                     if (positions.size() < processors) { processors = positions.size(); }
863                     
864                     //figure out how many sequences you have to process
865                     int numSeqsPerProcessor = numFastaSeqs / processors;
866                     for (int i = 0; i < processors; i++) {
867                         int startIndex =  i * numSeqsPerProcessor;
868                         if(i == (processors - 1)){      numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;   }
869                         lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor));
870                     }
871                     
872                     numFastaSeqs = createProcessesCreateFilter(F, fastafileNames[s]); 
873                                         numSeqs += numFastaSeqs;
874                 }
875                 #endif
876                 //save the file positions so we can reuse them in the runFilter function
877                 savedPositions[s] = positions;
878                 
879                                 if (m->control_pressed) {  return filterString; }
880 #endif
881                         
882                         }
883                 }
884
885
886 #ifdef USE_MPI  
887                 int pid;
888                 int Atag = 1; int Ttag = 2; int Ctag = 3; int Gtag = 4; int Gaptag = 5;
889                 MPI_Status status;
890                 
891                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
892                 
893                 if(trump != '*' || m->isTrue(vertical) || soft != 0){
894                         
895                         if (pid == 0) { //only one process should output the filter
896                         
897                                 vector<int> temp; temp.resize(alignmentLength+1);
898                                                                 
899                                 //get the frequencies from the child processes
900                                 for(int i = 1; i < processors; i++) { 
901                                 
902                                         for (int j = 0; j < 5; j++) {
903                                         
904                                                 MPI_Recv(&temp[0], (alignmentLength+1), MPI_INT, i, 2001, MPI_COMM_WORLD, &status); 
905                                                 int receiveTag = temp[temp.size()-1];  //child process added a int to the end to indicate what letter count this is for
906                                                 
907                                                 if (receiveTag == Atag) { //you are recieveing the A frequencies
908                                                         for (int k = 0; k < alignmentLength; k++) {             F.a[k] += temp[k];      }
909                                                 }else if (receiveTag == Ttag) { //you are recieveing the T frequencies
910                                                         for (int k = 0; k < alignmentLength; k++) {             F.t[k] += temp[k];      }
911                                                 }else if (receiveTag == Ctag) { //you are recieveing the C frequencies
912                                                         for (int k = 0; k < alignmentLength; k++) {             F.c[k] += temp[k];      }
913                                                 }else if (receiveTag == Gtag) { //you are recieveing the G frequencies
914                                                         for (int k = 0; k < alignmentLength; k++) {             F.g[k] += temp[k];      }
915                                                 }else if (receiveTag == Gaptag) { //you are recieveing the gap frequencies
916                                                         for (int k = 0; k < alignmentLength; k++) {             F.gap[k] += temp[k];    }
917                                                 }
918                                         }
919                                 } 
920                         }else{
921                         
922                                 //send my fequency counts
923                                 F.a.push_back(Atag);
924                                 int ierr = MPI_Send(&(F.a[0]), (alignmentLength+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
925                                 F.t.push_back(Ttag);
926                                 ierr = MPI_Send (&(F.t[0]), (alignmentLength+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
927                                 F.c.push_back(Ctag);
928                                 ierr = MPI_Send(&(F.c[0]), (alignmentLength+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
929                                 F.g.push_back(Gtag);
930                                 ierr = MPI_Send(&(F.g[0]), (alignmentLength+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
931                                 F.gap.push_back(Gaptag);
932                                 ierr = MPI_Send(&(F.gap[0]), (alignmentLength+1), MPI_INT, 0, 2001, MPI_COMM_WORLD);
933                         }
934                         
935                 }
936                 
937                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
938                 
939                 if (pid == 0) { //only one process should output the filter
940 #endif
941
942                 F.setNumSeqs(numSeqs);
943                 if(m->isTrue(vertical) == 1)    {       F.doVertical(); }
944                 if(soft != 0)                           {       F.doSoft();             }
945                 filterString = F.getFilter();
946                 
947 #ifdef USE_MPI
948                 //send filter string to kids
949                 //for(int i = 1; i < processors; i++) { 
950                 //      MPI_Send(&filterString[0], alignmentLength, MPI_CHAR, i, 2001, MPI_COMM_WORLD);
951                 //}
952                 MPI_Bcast(&filterString[0], alignmentLength, MPI_CHAR, 0, MPI_COMM_WORLD);
953         }else{
954                 //recieve filterString
955                 char* tempBuf = new char[alignmentLength];
956                 //MPI_Recv(&tempBuf[0], alignmentLength, MPI_CHAR, 0, 2001, MPI_COMM_WORLD, &status);
957                 MPI_Bcast(tempBuf, alignmentLength, MPI_CHAR, 0, MPI_COMM_WORLD);
958                 
959                 filterString = tempBuf;
960                 if (filterString.length() > alignmentLength) { filterString = filterString.substr(0, alignmentLength);  }
961                 delete tempBuf; 
962         }
963         
964         MPI_Barrier(MPI_COMM_WORLD);
965 #endif
966             
967                 return filterString;
968         }
969         catch(exception& e) {
970                 m->errorOut(e, "FilterSeqsCommand", "createFilter");
971                 exit(1);
972         }
973 }
974 /**************************************************************************************/
975 int FilterSeqsCommand::driverCreateFilter(Filters& F, string filename, linePair* filePos) {     
976         try {
977                 
978                 ifstream in;
979                 m->openInputFile(filename, in);
980                                 
981                 in.seekg(filePos->start);
982
983                 bool done = false;
984                 int count = 0;
985         
986                 while (!done) {
987                                 
988                         if (m->control_pressed) { in.close(); return 1; }
989                                         
990                         Sequence seq(in); m->gobble(in);
991                         if (seq.getName() != "") {
992                                         if (seq.getAligned().length() != alignmentLength) { m->mothurOut("Sequences are not all the same length, please correct."); m->mothurOutEndLine(); m->control_pressed = true;  }
993                                         
994                                         if(trump != '*')                        {       F.doTrump(seq);         }
995                                         if(m->isTrue(vertical) || soft != 0)    {       F.getFreqs(seq);        }
996                                         cout.flush();
997                                         count++;
998                         }
999                         
1000                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1001                                 unsigned long long pos = in.tellg();
1002                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
1003                         #else
1004                                 if (in.eof()) { break; }
1005                         #endif
1006                         
1007                         //report progress
1008                         if((count) % 100 == 0){ m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1009                 }
1010                 //report progress
1011                 if((count) % 100 != 0){ m->mothurOut(toString(count)); m->mothurOutEndLine();           }
1012                 in.close();
1013                 
1014                 return count;
1015         }
1016         catch(exception& e) {
1017                 m->errorOut(e, "FilterSeqsCommand", "driverCreateFilter");
1018                 exit(1);
1019         }
1020 }
1021 #ifdef USE_MPI
1022 /**************************************************************************************/
1023 int FilterSeqsCommand::MPICreateFilter(int start, int num, Filters& F, MPI_File& inMPI, vector<unsigned long long>& MPIPos) {   
1024         try {
1025                 
1026                 MPI_Status status; 
1027                 int pid;
1028                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
1029                 
1030                 for(int i=0;i<num;i++){
1031                         
1032                         if (m->control_pressed) { return 0; }
1033                         
1034                         //read next sequence
1035                         int length = MPIPos[start+i+1] - MPIPos[start+i];
1036         
1037                         char* buf4 = new char[length];
1038                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
1039                         
1040                         string tempBuf = buf4;
1041                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
1042                         istringstream iss (tempBuf,istringstream::in);
1043                         delete buf4;
1044
1045                         Sequence seq(iss);  
1046
1047                         if (seq.getAligned().length() != alignmentLength) {  cout << "Alignment length is " << alignmentLength << " and sequence " << seq.getName() << " has length " << seq.getAligned().length() << ", please correct." << endl; exit(1);  }
1048                         
1049                         if(trump != '*'){       F.doTrump(seq); }
1050                         if(m->isTrue(vertical) || soft != 0){   F.getFreqs(seq);        }
1051                         cout.flush();
1052                                                 
1053                         //report progress
1054                         if((i+1) % 100 == 0){   cout << (i+1) << endl;   m->mothurOutJustToLog(toString(i+1) + "\n");   }
1055                 }
1056                 
1057                 //report progress
1058                 if((num) % 100 != 0){   cout << num << endl; m->mothurOutJustToLog(toString(num) + "\n");       }
1059                 
1060                 return 0;
1061         }
1062         catch(exception& e) {
1063                 m->errorOut(e, "FilterSeqsCommand", "MPICreateFilter");
1064                 exit(1);
1065         }
1066 }
1067 #endif
1068 /**************************************************************************************************/
1069
1070 int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename) {
1071         try {
1072         int process = 1;
1073                 int num = 0;
1074                 processIDS.clear();
1075
1076 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
1077                                 
1078                 //loop through and create all the processes you want
1079                 while (process != processors) {
1080                         int pid = fork();
1081                         
1082                         if (pid > 0) {
1083                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
1084                                 process++;
1085                         }else if (pid == 0){
1086                                 //reset child's filter counts to 0;
1087                                 F.a.clear(); F.a.resize(alignmentLength, 0);
1088                                 F.t.clear(); F.t.resize(alignmentLength, 0);
1089                                 F.g.clear(); F.g.resize(alignmentLength, 0);
1090                                 F.c.clear(); F.c.resize(alignmentLength, 0);
1091                                 F.gap.clear(); F.gap.resize(alignmentLength, 0);
1092                                 
1093                                 num = driverCreateFilter(F, filename, lines[process]);
1094                                 
1095                                 //write out filter counts to file
1096                                 filename += toString(getpid()) + "filterValues.temp";
1097                                 ofstream out;
1098                                 m->openOutputFile(filename, out);
1099                                 
1100                                 out << num << endl;
1101                                 out << F.getFilter() << endl;
1102                                 for (int k = 0; k < alignmentLength; k++) {             out << F.a[k] << '\t'; }  out << endl;
1103                                 for (int k = 0; k < alignmentLength; k++) {             out << F.t[k] << '\t'; }  out << endl;
1104                                 for (int k = 0; k < alignmentLength; k++) {             out << F.g[k] << '\t'; }  out << endl;
1105                                 for (int k = 0; k < alignmentLength; k++) {             out << F.c[k] << '\t'; }  out << endl;
1106                                 for (int k = 0; k < alignmentLength; k++) {             out << F.gap[k] << '\t'; }  out << endl;
1107
1108                                 //cout << F.getFilter() << endl;
1109                                 out.close();
1110                                 
1111                                 exit(0);
1112                         }else { 
1113                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
1114                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
1115                                 exit(0);
1116                         }
1117                 }
1118                 
1119                 //parent do your part
1120                 num = driverCreateFilter(F, filename, lines[0]);
1121                 
1122                 //force parent to wait until all the processes are done
1123                 for (int i=0;i<(processors-1);i++) { 
1124                         int temp = processIDS[i];
1125                         wait(&temp);
1126                 }
1127                 
1128                 //parent reads in and combines Filter info
1129                 for (int i = 0; i < processIDS.size(); i++) {
1130                         string tempFilename = filename + toString(processIDS[i]) + "filterValues.temp";
1131                         ifstream in;
1132                         m->openInputFile(tempFilename, in);
1133                         
1134                         int temp, tempNum;
1135                         string tempFilterString;
1136
1137                         in >> tempNum; m->gobble(in); num += tempNum;
1138
1139                         in >> tempFilterString;
1140                         F.mergeFilter(tempFilterString);
1141
1142                         for (int k = 0; k < alignmentLength; k++) {             in >> temp; F.a[k] += temp; }           m->gobble(in);
1143                         for (int k = 0; k < alignmentLength; k++) {             in >> temp; F.t[k] += temp; }           m->gobble(in);
1144                         for (int k = 0; k < alignmentLength; k++) {             in >> temp; F.g[k] += temp; }           m->gobble(in);
1145                         for (int k = 0; k < alignmentLength; k++) {             in >> temp; F.c[k] += temp; }           m->gobble(in);
1146                         for (int k = 0; k < alignmentLength; k++) {             in >> temp; F.gap[k] += temp; } m->gobble(in);
1147                                 
1148                         in.close();
1149                         m->mothurRemove(tempFilename);
1150                 }
1151                 
1152                 
1153 #else
1154         
1155         //////////////////////////////////////////////////////////////////////////////////////////////////////
1156                 //Windows version shared memory, so be careful when passing variables through the filterData struct. 
1157                 //Above fork() will clone, so memory is separate, but that's not the case with windows, 
1158                 //Taking advantage of shared memory to allow both threads to add info to F.
1159                 //////////////////////////////////////////////////////////////////////////////////////////////////////
1160                 
1161                 vector<filterData*> pDataArray; 
1162                 DWORD   dwThreadIdArray[processors];
1163                 HANDLE  hThreadArray[processors]; 
1164                 
1165                 //Create processor worker threads.
1166                 for( int i=0; i<processors; i++ ){
1167                         
1168                         filterData* tempFilter = new filterData(filename, m, lines[i]->start, lines[i]->end, alignmentLength, trump, vertical, soft, hard, i);
1169                         pDataArray.push_back(tempFilter);
1170                         processIDS.push_back(i);
1171             
1172                         hThreadArray[i] = CreateThread(NULL, 0, MyCreateFilterThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]);   
1173                 }
1174         
1175                 //Wait until all threads have terminated.
1176                 WaitForMultipleObjects(processors, hThreadArray, TRUE, INFINITE);
1177                 
1178                 //Close all thread handles and free memory allocations.
1179                 for(int i=0; i < pDataArray.size(); i++){
1180                         num += pDataArray[i]->count;
1181             F.mergeFilter(pDataArray[i]->F.getFilter());
1182             
1183                         for (int k = 0; k < alignmentLength; k++) {      F.a[k] += pDataArray[i]->F.a[k];       }
1184                         for (int k = 0; k < alignmentLength; k++) {      F.t[k] += pDataArray[i]->F.t[k];       }
1185                         for (int k = 0; k < alignmentLength; k++) {      F.g[k] += pDataArray[i]->F.g[k];       }
1186                         for (int k = 0; k < alignmentLength; k++) {      F.c[k] += pDataArray[i]->F.c[k];       }
1187                         for (int k = 0; k < alignmentLength; k++) {      F.gap[k] += pDataArray[i]->F.gap[k];   }
1188
1189                         CloseHandle(hThreadArray[i]);
1190                         delete pDataArray[i];
1191                 }
1192                 
1193 #endif  
1194         return num;
1195         
1196         }
1197         catch(exception& e) {
1198                 m->errorOut(e, "FilterSeqsCommand", "createProcessesCreateFilter");
1199                 exit(1);
1200         }
1201 }
1202 /**************************************************************************************/