]> git.donarmstrong.com Git - mothur.git/blob - filterseqscommand.cpp
aa38e7affa36e65ada9463813bfce76438c382fe
[mothur.git] / filterseqscommand.cpp
1 /*
2  *  filterseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Thomas Ryabin on 5/4/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "filterseqscommand.h"
11 #include "sequence.hpp"
12
13 /**************************************************************************************/
14
15 FilterSeqsCommand::FilterSeqsCommand(string option)  {
16         try {
17                 abort = false;
18                 filterFileName = "";
19                 
20                 //allow user to run help
21                 if(option == "help") { help(); abort = true; }
22                 
23                 else {
24                         //valid paramters for this command
25                         string Array[] =  {"fasta", "trump", "soft", "hard", "vertical", "outputdir","inputdir", "processors"};
26                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27                         
28                         OptionParser parser(option);
29                         map<string,string> parameters = parser.getParameters();
30                         
31                         ValidParameters validParameter;
32                         map<string,string>::iterator it;
33                         
34                         //check to make sure all parameters are valid for command
35                         for (it = parameters.begin(); it != parameters.end(); it++) { 
36                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
37                         }
38                         
39                         //if the user changes the input directory command factory will send this info to us in the output parameter 
40                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
41                         if (inputDir == "not found"){   inputDir = "";          }
42                         else {
43                                 string path;
44                                 it = parameters.find("fasta");
45                                 //user has given a template file
46                                 if(it != parameters.end()){ 
47                                         path = hasPath(it->second);
48                                         //if the user has not given a path then, add inputdir. else leave path alone.
49                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
50                                 }
51                                 
52                                 it = parameters.find("hard");
53                                 //user has given a template file
54                                 if(it != parameters.end()){ 
55                                         path = hasPath(it->second);
56                                         //if the user has not given a path then, add inputdir. else leave path alone.
57                                         if (path == "") {       parameters["hard"] = inputDir + it->second;             }
58                                 }
59                         }
60                         
61                         //check for required parameters
62                         fasta = validParameter.validFile(parameters, "fasta", false);
63                         if (fasta == "not found") { m->mothurOut("fasta is a required parameter for the filter.seqs command."); m->mothurOutEndLine(); abort = true;  }
64                         else { 
65                                 splitAtDash(fasta, fastafileNames);
66                                 
67                                 //go through files and make sure they are good, if not, then disregard them
68                                 for (int i = 0; i < fastafileNames.size(); i++) {
69                                         if (inputDir != "") {
70                                                 string path = hasPath(fastafileNames[i]);
71                                                 //if the user has not given a path then, add inputdir. else leave path alone.
72                                                 if (path == "") {       fastafileNames[i] = inputDir + fastafileNames[i];               }
73                                         }
74
75                                         int ableToOpen;
76                                         ifstream in;
77                                         ableToOpen = openInputFile(fastafileNames[i], in);
78                                         if (ableToOpen == 1) { 
79                                                 m->mothurOut(fastafileNames[i] + " will be disregarded."); m->mothurOutEndLine(); 
80                                                 //erase from file list
81                                                 fastafileNames.erase(fastafileNames.begin()+i);
82                                                 i--;
83                                         }else{  
84                                                 string simpleName = getSimpleName(fastafileNames[i]);
85                                                 filterFileName += simpleName.substr(0, simpleName.find_first_of('.'));
86                                         }
87                                         in.close();
88                                 }
89                                 
90                                 //make sure there is at least one valid file left
91                                 if (fastafileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
92                         }
93                         
94                         if (!abort) {
95                                 //if the user changes the output directory command factory will send this info to us in the output parameter 
96                                 outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
97                                         outputDir = ""; 
98                                         outputDir += hasPath(fastafileNames[0]); //if user entered a file with a path then preserve it  
99                                 }
100                         }
101                         //check for optional parameter and set defaults
102                         // ...at some point should added some additional type checking...
103                         
104                         string temp;
105                         temp = validParameter.validFile(parameters, "trump", false);                    if (temp == "not found") { temp = "*"; }
106                         trump = temp[0];
107                         
108                         temp = validParameter.validFile(parameters, "soft", false);                             if (temp == "not found") { soft = 0; }
109                         else {  soft = (float)atoi(temp.c_str()) / 100.0;  }
110                         
111                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
112                         convert(temp, processors); 
113                         
114                         hard = validParameter.validFile(parameters, "hard", true);                              if (hard == "not found") { hard = ""; }
115                         else if (hard == "not open") { abort = true; }  
116                         
117                         vertical = validParameter.validFile(parameters, "vertical", false);             if (vertical == "not found") { vertical = "T"; }
118                         
119                         numSeqs = 0;
120                         
121                 }
122                 
123         }
124         catch(exception& e) {
125                 m->errorOut(e, "FilterSeqsCommand", "FilterSeqsCommand");
126                 exit(1);
127         }
128 }
129
130 //**********************************************************************************************************************
131
132 void FilterSeqsCommand::help(){
133         try {
134                                 
135                 m->mothurOut("The filter.seqs command reads a file containing sequences and creates a .filter and .filter.fasta file.\n");
136                 m->mothurOut("The filter.seqs command parameters are fasta, trump, soft, hard and vertical. \n");
137                 m->mothurOut("The fasta parameter is required. You may enter several fasta files to build the filter from and filter, by separating their names with -'s.\n");
138                 m->mothurOut("For example: fasta=abrecovery.fasta-amazon.fasta \n");
139                 m->mothurOut("The trump parameter .... The default is ...\n");
140                 m->mothurOut("The soft parameter .... The default is ....\n");
141                 m->mothurOut("The hard parameter .... The default is ....\n");
142                 m->mothurOut("The vertical parameter .... The default is T.\n");
143                 m->mothurOut("The filter.seqs command should be in the following format: \n");
144                 m->mothurOut("filter.seqs(fasta=yourFastaFile, trump=yourTrump, soft=yourSoft, hard=yourHard, vertical=yourVertical) \n");
145                 m->mothurOut("Example filter.seqs(fasta=abrecovery.fasta, trump=..., soft=..., hard=..., vertical=T).\n");
146                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
147                 
148         }
149         catch(exception& e) {
150                 m->errorOut(e, "FilterSeqsCommand", "help");
151                 exit(1);
152         }
153 }
154
155 /**************************************************************************************/
156
157 int FilterSeqsCommand::execute() {      
158         try {
159         
160                 if (abort == true) { return 0; }
161                 vector<string> outputNames;
162                 
163                 ifstream inFASTA;
164                 openInputFile(fastafileNames[0], inFASTA);
165                 
166                 Sequence testSeq(inFASTA);
167                 alignmentLength = testSeq.getAlignLength();
168                 inFASTA.close();
169                 
170                 ////////////create filter/////////////////
171                 
172                 filter = createFilter();
173                 
174                 ofstream outFilter;
175                 
176                 string filterFile = outputDir + filterFileName + ".filter";
177                 openOutputFile(filterFile, outFilter);
178                 outFilter << filter << endl;
179                 outFilter.close();
180                 outputNames.push_back(filterFile);
181                 
182                 
183                 ////////////run filter/////////////////
184                 
185                 numSeqs = 0;
186                 for (int i = 0; i < fastafileNames.size(); i++) {
187                         ifstream in;
188                         openInputFile(fastafileNames[i], in);
189                         string filteredFasta = outputDir + getRootName(getSimpleName(fastafileNames[i])) + "filter.fasta";
190                         ofstream outFASTA;
191                         openOutputFile(filteredFasta, outFASTA);
192                         outputNames.push_back(filteredFasta);
193                         
194                         
195                         while(!in.eof()){
196                                 if (m->control_pressed) { in.close(); outFASTA.close(); for(int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }  return 0; }
197                                 
198                                 Sequence seq(in);
199                                 if (seq.getName() != "") {
200                                         string align = seq.getAligned();
201                                         string filterSeq = "";
202                                         
203                                         for(int j=0;j<alignmentLength;j++){
204                                                 if(filter[j] == '1'){
205                                                         filterSeq += align[j];
206                                                 }
207                                         }
208                                         
209                                         outFASTA << '>' << seq.getName() << endl << filterSeq << endl;
210                                         numSeqs++;
211                                 }
212                                 gobble(in);
213                         }
214                         outFASTA.close();
215                         in.close();
216                 }
217                 
218                 int filteredLength = 0;
219                 for(int i=0;i<alignmentLength;i++){
220                         if(filter[i] == '1'){   filteredLength++;       }
221                 }
222                 
223                 if (m->control_pressed) {  for(int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }  return 0; }
224
225                 
226                 m->mothurOutEndLine();
227                 m->mothurOut("Length of filtered alignment: " + toString(filteredLength)); m->mothurOutEndLine();
228                 m->mothurOut("Number of columns removed: " + toString((alignmentLength-filteredLength))); m->mothurOutEndLine();
229                 m->mothurOut("Length of the original alignment: " + toString(alignmentLength)); m->mothurOutEndLine();
230                 m->mothurOut("Number of sequences used to construct filter: " + toString(numSeqs)); m->mothurOutEndLine();
231                 
232                 
233                 m->mothurOutEndLine();
234                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
235                 for(int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();      }
236                 m->mothurOutEndLine();
237                 
238                 return 0;
239                 
240         }
241         catch(exception& e) {
242                 m->errorOut(e, "FilterSeqsCommand", "execute");
243                 exit(1);
244         }
245 }
246 /**************************************************************************************/
247 string FilterSeqsCommand::createFilter() {      
248         try {
249                 string filterString = "";
250                 
251                 Filters F;
252                 
253                 if (soft != 0)                  {  F.setSoft(soft);             }
254                 if (trump != '*')               {  F.setTrump(trump);   }
255                 
256                 F.setLength(alignmentLength);
257                 
258                 if(soft != 0 || isTrue(vertical)){
259                         F.initialize();
260                 }
261                 
262                 if(hard.compare("") != 0)       {       F.doHard(hard);         }
263                 else                                            {       F.setFilter(string(alignmentLength, '1'));      }
264                 
265                 numSeqs = 0;
266                 
267                 if(trump != '*' || isTrue(vertical) || soft != 0){
268                         for (int s = 0; s < fastafileNames.size(); s++) {
269                         
270                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
271                         
272 #ifdef USE_MPI  
273                                 int pid, rc, ierr; 
274                                 int Atag = 1; int Ttag = 2; int Ctag = 3; int Gtag = 4; int Gaptag = 5;
275                                 int tag = 2001;
276                                 
277                                 MPI_Status status; 
278                                 MPI_File in; 
279                                 rc = MPI_Comm_size(MPI_COMM_WORLD, &processors);
280                                 rc = MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
281                                                 
282                                                         
283                                 char* tempFileName = new char(fastafileNames[s].length());
284                                 tempFileName = &(fastafileNames[s][0]);
285                                 
286                                 MPI_File_open(MPI_COMM_WORLD, tempFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &in);  //comm, filename, mode, info, filepointer
287                                 
288                                 if (pid == 0) { //you are the root process
289                                                 setLines(fastafileNames[s]);
290                                                 
291                                                 for (int j = 0; j < lines.size(); j++) { //each process
292                                                         if (j != 0) { //don't send to yourself
293                                                                 MPI_Send(&lines[j]->start, 1, MPI_INT, j, tag, MPI_COMM_WORLD); //start position in file
294                                                                 MPI_Send(&lines[j]->numSeqs, 1, MPI_INT, j, tag, MPI_COMM_WORLD); //how many sequences we are sending
295                                                                 MPI_Send(&bufferSizes[j], 1, MPI_INT, j, tag, MPI_COMM_WORLD); //how bytes for the read
296                                                         }
297                                                 }
298                                                 //cout << "done sending" << endl;
299                                                 //cout << "parent = " << pid << " lines = " << lines[pid]->start << '\t' << lines[pid]->numSeqs << " size = " <<  lines.size() << endl; 
300                                                  
301                                 cout << "parent =  " << pid << " address of Filter " << &F << " address of FilterString  " << &filterString << " address of numSeqs = " << &numSeqs << " address of soft = " << &soft << endl;          
302                                 
303                                                 char* buf = new char(bufferSizes[0]);
304                         //cout << pid << '\t' << bufferSizes[0] << " line 1 start pos = " << lines[1]->start   << " buffer size 0 " << bufferSizes[0] << " buffer size 1 " << bufferSizes[1] << endl;                   
305                                                 MPI_File_read_at(in, 0, buf, bufferSizes[0], MPI_CHAR, &status);
306                                                 
307                 cout << pid << " done reading " << &buf <<  endl;
308                                                 string tempBuf = buf;
309                                                 delete buf;
310                         //cout << pid << '\t' << (tempBuf.substr(0, 10)) << endl;
311                         
312                                                 //parse buffer                                          
313                                                 istringstream iss (tempBuf,istringstream::in);
314                                                 string name, seqstring;
315                                                 vector<string> seqs;
316                                         
317                                                 while (iss) {
318                         
319                                                         if (m->control_pressed) { return filterString; }
320                                                         cout << "here" << endl;                 
321                                                         Sequence seq(iss); 
322                                                         cout << "here1" << endl;                        
323                                                         gobble(iss);
324                                                         cout << seq.getName() << endl;          
325                                                         if (seq.getName() != "") {
326                                                                 seqs.push_back(seq.getAligned());       
327                                                         }
328                                                         
329                                                 }
330                                                 
331                                                 for(int i=0;i<seqs.size();i++){
332                                 
333                                                         if (m->control_pressed) { return filterString; }
334                         
335                                                         Sequence seq("", seqs[i]);
336                         
337                                                         if(trump != '*'){       F.doTrump(seq); }
338                                                         if(isTrue(vertical) || soft != 0){      F.getFreqs(seq);        }
339                                                         cout.flush();
340                                                 
341                                                         //report progress
342                                                         if((i+1) % 100 == 0){   m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
343                                                 }
344                 
345                                                 //report progress
346                                                 if((seqs.size()) % 100 != 0){   m->mothurOut(toString(seqs.size())); m->mothurOutEndLine();             }
347
348                                                 //do your part
349                                                 //MPICreateFilter(F, seqs);
350                                                 
351                                                 vector<int> temp; temp.resize(alignmentLength);
352                                                 
353                                                 //get the frequencies from the child processes
354                                                 for(int i = 0; i < ((processors-1)*5); i++) { 
355                                 cout << "i = " << i << endl;
356                                                         int ierr = MPI_Recv(&temp, alignmentLength, MPI_INT, MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &status); 
357                                                         
358                                                         int receiveTag = temp[temp.size()-1];  //child process added a int to the end to indicate what letter count this is for
359                                                         
360                                                         int sender = status.MPI_SOURCE; 
361                                                         
362                                                         if (receiveTag == Atag) { //you are recieveing the A frequencies
363                                                                 for (int k = 0; k < alignmentLength; k++) {             F.a[k] += temp[k];      }
364                                                         }else if (receiveTag == Ttag) { //you are recieveing the T frequencies
365                                                                 for (int k = 0; k < alignmentLength; k++) {             F.t[k] += temp[k];      }
366                                                         }else if (receiveTag == Ctag) { //you are recieveing the C frequencies
367                                                                 for (int k = 0; k < alignmentLength; k++) {             F.c[k] += temp[k];      }
368                                                         }else if (receiveTag == Gtag) { //you are recieveing the G frequencies
369                                                                 for (int k = 0; k < alignmentLength; k++) {             F.g[k] += temp[k];      }
370                                                         }else if (receiveTag == Gaptag) { //you are recieveing the gap frequencies
371                                                                 for (int k = 0; k < alignmentLength; k++) {             F.gap[k] += temp[k];    }
372                                                         }
373                                                         
374                                                         m->mothurOut("receive tag = " + toString(receiveTag) + " " + toString(sender) + " is complete."); m->mothurOutEndLine();
375                                                 } 
376
377                                                 
378                                 }else { //i am the child process
379                                         int startPos, numLines, bufferSize;
380                                 cout << "child = " << pid << " address of Filter " << &F << " address of FilterString  " << &filterString << " address of numSeqs = " << &numSeqs << " address of soft = " << &soft<< endl;     
381                                         ierr = MPI_Recv(&startPos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
382                                         ierr = MPI_Recv(&numLines, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
383                                         ierr = MPI_Recv(&bufferSize, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
384                                 //cout << "child = " << pid << " done recv messages startpos = " << startPos << " numLines = " << numLines << " buffersize = " << bufferSize << endl;   
385                                 
386                                         
387                                         //send freqs
388                                         char* buf2 = new char(bufferSize);
389                                         MPI_File_read_at( in, startPos, buf2, bufferSize, MPI_CHAR, &status);
390                                 cout << pid << " done reading " << &buf2 <<  endl;
391                                         
392                                         string tempBuf = buf2;
393                                         delete buf2;
394                 //      cout << pid << '\t' << (tempBuf.substr(0, 10)) << endl;
395                                         istringstream iss (tempBuf,istringstream::in);
396                                         
397                                         string name, seqstring;
398                                         vector<string> seqs;
399                                         
400                                         while (iss) {
401                         
402                                                 if (m->control_pressed) { return filterString; }
403                                                 cout << "here" << endl;                 
404                                                 Sequence seq(iss); 
405                                                 cout << "here1" << endl;                        
406                                                 gobble(iss);
407                                                 cout << seq.getName() << endl;  
408                                                         
409                                                 if (seq.getName() != "") {
410                                                         seqs.push_back(seq.getAligned());       
411                                                 }
412                                         }
413
414                                         for(int i=0;i<seqs.size();i++){
415                                 
416                                                 if (m->control_pressed) { return filterString; }
417                         
418                                                 Sequence seq("", seqs[i]);
419                         
420                                                 if(trump != '*'){       F.doTrump(seq); }
421                                                 if(isTrue(vertical) || soft != 0){      F.getFreqs(seq);        }
422                                                 cout.flush();
423                                                 
424                                                 //report progress
425                                                 if((i+1) % 100 == 0){   m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
426                                         }
427                 
428                                         //report progress
429                                         if((seqs.size()) % 100 != 0){   m->mothurOut(toString(seqs.size())); m->mothurOutEndLine();             }
430                 
431                                         //MPICreateFilter(F, seqs);
432                                 
433                                         //send my fequency counts
434                                         F.a.push_back(Atag);
435                                         int ierr = MPI_Send( &F.a[0], alignmentLength, MPI_INT, 0, tag, MPI_COMM_WORLD);
436                                         F.t.push_back(Ttag);
437                                         ierr = MPI_Send( &F.t[0], alignmentLength, MPI_INT, 0, tag, MPI_COMM_WORLD);
438                                         F.c.push_back(Ctag);
439                                         ierr = MPI_Send( &F.c[0], alignmentLength, MPI_INT, 0, tag, MPI_COMM_WORLD);
440                                         F.g.push_back(Gtag);
441                                         ierr = MPI_Send( &F.g[0], alignmentLength, MPI_INT, 0, tag, MPI_COMM_WORLD);
442                                         F.gap.push_back(Gaptag);
443                                         ierr = MPI_Send( &F.gap[0], alignmentLength, MPI_INT, 0, tag, MPI_COMM_WORLD);
444                                         
445                                         cout << "child " << pid << " done sending counts" << endl;
446                                 }
447                                 
448                                 MPI_Barrier(MPI_COMM_WORLD);
449                                 
450 #else
451                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
452                                 if(processors == 1){
453                                         ifstream inFASTA;
454                                         openInputFile(fastafileNames[s], inFASTA);
455                                         int numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
456                                         inFASTA.close();
457                                         
458                                         numSeqs += numFastaSeqs;
459                                         
460                                         lines.push_back(new linePair(0, numFastaSeqs));
461                                         
462                                         driverCreateFilter(F, fastafileNames[s], lines[0]);
463                                 }else{
464                                         
465                                         setLines(fastafileNames[s]);                                    
466                                         createProcessesCreateFilter(F, fastafileNames[s]); 
467                                 }
468                 #else
469                                 ifstream inFASTA;
470                                 openInputFile(fastafileNames[s], inFASTA);
471                                 int numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
472                                 inFASTA.close();
473                                 
474                                 numSeqs += numFastaSeqs;
475                                 
476                                 lines.push_back(new linePair(0, numFastaSeqs));
477                                 
478                                 driverCreateFilter(F, lines[0], fastafileNames[s]);
479                 #endif
480 #endif
481                         
482                         }
483                 }
484
485 #ifdef USE_MPI
486
487 //merge all frequency data and create filter string
488                                         //int pid;
489                                         //MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
490                                         
491                                         //if (pid == 0) { //only one process should output to screen
492 #endif
493
494         cout << "made it here" << endl; 
495                 F.setNumSeqs(numSeqs);
496                                 
497                 if(isTrue(vertical) == 1)       {       F.doVertical(); }
498                 if(soft != 0)                           {       F.doSoft();             }
499 //cout << "Filter String = " << F.getFilter() << endl;                  
500                 filterString = F.getFilter();
501
502                 return filterString;
503         }
504         catch(exception& e) {
505                 m->errorOut(e, "FilterSeqsCommand", "createFilter");
506                 exit(1);
507         }
508 }
509 /**************************************************************************************/
510 int FilterSeqsCommand::driverCreateFilter(Filters& F, string filename, linePair* line) {        
511         try {
512                 
513                 ifstream in;
514                 openInputFile(filename, in);
515                                 
516                 in.seekg(line->start);
517                 
518                 for(int i=0;i<line->numSeqs;i++){
519                                 
520                         if (m->control_pressed) { in.close(); return 1; }
521                                         
522                         Sequence seq(in);
523                         if (seq.getName() != "") {
524                                         if(trump != '*'){       F.doTrump(seq); }
525                                         if(isTrue(vertical) || soft != 0){      F.getFreqs(seq);        }
526                                         cout.flush();
527                         }
528                         
529                         //report progress
530                         if((i+1) % 100 == 0){   m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
531                 }
532                 
533                 //report progress
534                 if((line->numSeqs) % 100 != 0){ m->mothurOut(toString(line->numSeqs)); m->mothurOutEndLine();           }
535                 
536                 in.close();
537                 
538                 return 0;
539         }
540         catch(exception& e) {
541                 m->errorOut(e, "FilterSeqsCommand", "driverCreateFilter");
542                 exit(1);
543         }
544 }
545 /**************************************************************************************/
546 int FilterSeqsCommand::MPICreateFilter(Filters& F, vector<string>& seqStrings) {        
547         try {
548                 
549                 for(int i=0;i<seqStrings.size();i++){
550                                 
551                         if (m->control_pressed) { return 1; }
552                         
553                         Sequence seq("", seqStrings[0]);
554                         
555                         if(trump != '*'){       F.doTrump(seq); }
556                         if(isTrue(vertical) || soft != 0){      F.getFreqs(seq);        }
557                         cout.flush();
558                                                 
559                         //report progress
560                         if((i+1) % 100 == 0){   m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
561                 }
562                 
563                 //report progress
564                 if((seqStrings.size()) % 100 != 0){     m->mothurOut(toString(seqStrings.size())); m->mothurOutEndLine();               }
565                 
566                 return 0;
567         }
568         catch(exception& e) {
569                 m->errorOut(e, "FilterSeqsCommand", "MPICreateFilter");
570                 exit(1);
571         }
572 }
573
574 /**************************************************************************************************/
575
576 int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename) {
577         try {
578 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
579                 int process = 0;
580                 int exitCommand = 1;
581                 vector<int> processIDS;
582                 
583                 //loop through and create all the processes you want
584                 while (process != processors) {
585                         int pid = vfork();
586                         
587                         if (pid > 0) {
588                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
589                                 process++;
590                         }else if (pid == 0){
591                                 driverCreateFilter(F, filename, lines[process]);
592                                 exit(0);
593                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
594                 }
595                 
596                 //force parent to wait until all the processes are done
597                 for (int i=0;i<processors;i++) { 
598                         int temp = processIDS[i];
599                         wait(&temp);
600                 }
601                 
602                 return exitCommand;
603 #endif          
604         }
605         catch(exception& e) {
606                 m->errorOut(e, "FilterSeqsCommand", "createProcessesCreateFilter");
607                 exit(1);
608         }
609 }
610 /**************************************************************************************************/
611
612 int FilterSeqsCommand::setLines(string filename) {
613         try {
614                 vector<int> positions;
615                 map<int, int> buf;
616                 bufferSizes.clear();
617                 
618                 int pid;
619                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
620         
621                 ifstream inFASTA;
622                 openInputFile(filename, inFASTA);
623                         
624                 string input;
625                 int numbuf = 0;
626                 while(!inFASTA.eof()){  
627                         input = getline(inFASTA);
628
629                         if (input.length() != 0) {
630                                 numbuf += input.length();
631                                 if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  buf[(pos - input.length() - 1)] = numbuf; }
632                         }
633                 }
634
635                 inFASTA.close();
636                 
637                 int numFastaSeqs = positions.size();
638                 
639                 numSeqs += numFastaSeqs;
640                 
641                 int numSeqsPerProcessor = numFastaSeqs / processors;
642                 
643                 for (int i = 0; i < processors; i++) {
644
645                         long int startPos = positions[ i * numSeqsPerProcessor ];
646                         if(i == processors - 1){
647                                 numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
648                                 bufferSizes.push_back(numbuf-buf[startPos]);
649                         }else{  
650                                 int myEnd = positions[ (i+1) * numSeqsPerProcessor ];
651                                 bufferSizes.push_back(buf[myEnd]-buf[startPos]);
652                         }
653                         lines.push_back(new linePair(startPos, numSeqsPerProcessor));
654                 }
655                 
656                 return 0;
657         }
658         catch(exception& e) {
659                 m->errorOut(e, "FilterSeqsCommand", "setLines");
660                 exit(1);
661         }
662 }
663 /**************************************************************************************************/
664 int FilterSeqsCommand::parseBuffer(string file, vector<string>& seqs) {
665         try {
666                 
667                 istringstream iss (file,istringstream::in);
668                 string name, seqstring;
669 int pid;
670 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
671         Sequence* seq34 = new Sequence();       
672 cout << "address of new sequence " << pid << '\t' << seq34 << endl;
673 cout << "address of seqStrings " << pid << '\t' << &seqs << endl;
674         
675                 while (iss) {
676                         
677                         if (m->control_pressed) { return 0; }
678                 cout << "here" << endl;                 
679                         Sequence* seq = new Sequence(iss); 
680         cout << "here1" << endl;                        
681                         gobble(iss);
682         cout << seq->getName() << endl;         
683                         if (seq->getName() != "") {
684                                 seqs.push_back(seq->getAligned());      
685                         }
686                         delete seq;
687                 }
688                 
689                 return 0;
690         }
691         catch(exception& e) {
692                 m->errorOut(e, "FilterSeqsCommand", "parseBuffer");
693                 exit(1);
694         }
695 }
696 /**************************************************************************************/