]> git.donarmstrong.com Git - mothur.git/blob - filterseqscommand.cpp
b402a5bac5cc8cf8ad6a2ac4b4ab882fc91e306b
[mothur.git] / filterseqscommand.cpp
1 /*
2  *  filterseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Thomas Ryabin on 5/4/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "filterseqscommand.h"
11 #include "sequence.hpp"
12
13 /**************************************************************************************/
14
15 FilterSeqsCommand::FilterSeqsCommand(string option)  {
16         try {
17                 abort = false;
18                 filterFileName = "";
19                 
20                 //allow user to run help
21                 if(option == "help") { help(); abort = true; }
22                 
23                 else {
24                         //valid paramters for this command
25                         string Array[] =  {"fasta", "trump", "soft", "hard", "vertical", "outputdir","inputdir", "processors"};
26                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27                         
28                         OptionParser parser(option);
29                         map<string,string> parameters = parser.getParameters();
30                         
31                         ValidParameters validParameter;
32                         map<string,string>::iterator it;
33                         
34                         //check to make sure all parameters are valid for command
35                         for (it = parameters.begin(); it != parameters.end(); it++) { 
36                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
37                         }
38                         
39                         //if the user changes the input directory command factory will send this info to us in the output parameter 
40                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
41                         if (inputDir == "not found"){   inputDir = "";          }
42                         else {
43                                 string path;
44                                 it = parameters.find("fasta");
45                                 //user has given a template file
46                                 if(it != parameters.end()){ 
47                                         path = hasPath(it->second);
48                                         //if the user has not given a path then, add inputdir. else leave path alone.
49                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
50                                 }
51                                 
52                                 it = parameters.find("hard");
53                                 //user has given a template file
54                                 if(it != parameters.end()){ 
55                                         path = hasPath(it->second);
56                                         //if the user has not given a path then, add inputdir. else leave path alone.
57                                         if (path == "") {       parameters["hard"] = inputDir + it->second;             }
58                                 }
59                         }
60                         
61                         //check for required parameters
62                         fasta = validParameter.validFile(parameters, "fasta", false);
63                         if (fasta == "not found") { m->mothurOut("fasta is a required parameter for the filter.seqs command."); m->mothurOutEndLine(); abort = true;  }
64                         else { 
65                                 splitAtDash(fasta, fastafileNames);
66                                 
67                                 //go through files and make sure they are good, if not, then disregard them
68                                 for (int i = 0; i < fastafileNames.size(); i++) {
69                                         if (inputDir != "") {
70                                                 string path = hasPath(fastafileNames[i]);
71                                                 //if the user has not given a path then, add inputdir. else leave path alone.
72                                                 if (path == "") {       fastafileNames[i] = inputDir + fastafileNames[i];               }
73                                         }
74
75                                         int ableToOpen;
76                                         ifstream in;
77                                         ableToOpen = openInputFile(fastafileNames[i], in);
78                                         if (ableToOpen == 1) { 
79                                                 m->mothurOut(fastafileNames[i] + " will be disregarded."); m->mothurOutEndLine(); 
80                                                 //erase from file list
81                                                 fastafileNames.erase(fastafileNames.begin()+i);
82                                                 i--;
83                                         }else{  
84                                                 string simpleName = getSimpleName(fastafileNames[i]);
85                                                 filterFileName += simpleName.substr(0, simpleName.find_first_of('.'));
86                                         }
87                                         in.close();
88                                 }
89                                 
90                                 //make sure there is at least one valid file left
91                                 if (fastafileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
92                         }
93                         
94                         if (!abort) {
95                                 //if the user changes the output directory command factory will send this info to us in the output parameter 
96                                 outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
97                                         outputDir = ""; 
98                                         outputDir += hasPath(fastafileNames[0]); //if user entered a file with a path then preserve it  
99                                 }
100                         }
101                         //check for optional parameter and set defaults
102                         // ...at some point should added some additional type checking...
103                         
104                         string temp;
105                         temp = validParameter.validFile(parameters, "trump", false);                    if (temp == "not found") { temp = "*"; }
106                         trump = temp[0];
107                         
108                         temp = validParameter.validFile(parameters, "soft", false);                             if (temp == "not found") { soft = 0; }
109                         else {  soft = (float)atoi(temp.c_str()) / 100.0;  }
110                         
111                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
112                         convert(temp, processors); 
113                         
114                         hard = validParameter.validFile(parameters, "hard", true);                              if (hard == "not found") { hard = ""; }
115                         else if (hard == "not open") { abort = true; }  
116                         
117                         vertical = validParameter.validFile(parameters, "vertical", false);             if (vertical == "not found") { vertical = "T"; }
118                         
119                         numSeqs = 0;
120                         
121                 }
122                 
123         }
124         catch(exception& e) {
125                 m->errorOut(e, "FilterSeqsCommand", "FilterSeqsCommand");
126                 exit(1);
127         }
128 }
129
130 //**********************************************************************************************************************
131
132 void FilterSeqsCommand::help(){
133         try {
134                                 
135                 m->mothurOut("The filter.seqs command reads a file containing sequences and creates a .filter and .filter.fasta file.\n");
136                 m->mothurOut("The filter.seqs command parameters are fasta, trump, soft, hard and vertical. \n");
137                 m->mothurOut("The fasta parameter is required. You may enter several fasta files to build the filter from and filter, by separating their names with -'s.\n");
138                 m->mothurOut("For example: fasta=abrecovery.fasta-amazon.fasta \n");
139                 m->mothurOut("The trump parameter .... The default is ...\n");
140                 m->mothurOut("The soft parameter .... The default is ....\n");
141                 m->mothurOut("The hard parameter .... The default is ....\n");
142                 m->mothurOut("The vertical parameter .... The default is T.\n");
143                 m->mothurOut("The filter.seqs command should be in the following format: \n");
144                 m->mothurOut("filter.seqs(fasta=yourFastaFile, trump=yourTrump, soft=yourSoft, hard=yourHard, vertical=yourVertical) \n");
145                 m->mothurOut("Example filter.seqs(fasta=abrecovery.fasta, trump=..., soft=..., hard=..., vertical=T).\n");
146                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
147                 
148         }
149         catch(exception& e) {
150                 m->errorOut(e, "FilterSeqsCommand", "help");
151                 exit(1);
152         }
153 }
154
155 /**************************************************************************************/
156
157 int FilterSeqsCommand::execute() {      
158         try {
159         
160                 if (abort == true) { return 0; }
161                 
162                 ifstream inFASTA;
163                 openInputFile(fastafileNames[0], inFASTA);
164                 
165                 Sequence testSeq(inFASTA);
166                 alignmentLength = testSeq.getAlignLength();
167                 inFASTA.close();
168                 
169                 ////////////create filter/////////////////
170                 
171                 filter = createFilter();
172                 
173                 if (m->control_pressed) { return 0; }
174                 
175                 #ifdef USE_MPI
176                         int pid;
177                         MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
178                                         
179                         if (pid == 0) { //only one process should output the filter
180                 #endif
181                 
182                 ofstream outFilter;
183                 
184                 string filterFile = outputDir + filterFileName + ".filter";
185                 openOutputFile(filterFile, outFilter);
186                 outFilter << filter << endl;
187                 outFilter.close();
188                 outputNames.push_back(filterFile);
189                 
190                 #ifdef USE_MPI
191                         }
192                 #endif
193                 
194                 ////////////run filter/////////////////
195                 
196                 filterSequences();
197                                                 
198                 int filteredLength = 0;
199                 for(int i=0;i<alignmentLength;i++){
200                         if(filter[i] == '1'){   filteredLength++;       }
201                 }
202                 
203                 if (m->control_pressed) {  for(int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }  return 0; }
204
205                 
206                 m->mothurOutEndLine();
207                 m->mothurOut("Length of filtered alignment: " + toString(filteredLength)); m->mothurOutEndLine();
208                 m->mothurOut("Number of columns removed: " + toString((alignmentLength-filteredLength))); m->mothurOutEndLine();
209                 m->mothurOut("Length of the original alignment: " + toString(alignmentLength)); m->mothurOutEndLine();
210                 m->mothurOut("Number of sequences used to construct filter: " + toString(numSeqs)); m->mothurOutEndLine();
211                 
212                 
213                 m->mothurOutEndLine();
214                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
215                 for(int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();      }
216                 m->mothurOutEndLine();
217                 
218                 return 0;
219                 
220         }
221         catch(exception& e) {
222                 m->errorOut(e, "FilterSeqsCommand", "execute");
223                 exit(1);
224         }
225 }
226 /**************************************************************************************/
227 int FilterSeqsCommand::filterSequences() {      
228         try {
229                 
230                 numSeqs = 0;
231                 
232                 for (int s = 0; s < fastafileNames.size(); s++) {
233                         
234                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
235                                 
236                                 string filteredFasta = outputDir + getRootName(getSimpleName(fastafileNames[s])) + "filter.fasta";
237 #ifdef USE_MPI  
238                                 int pid, start, end; 
239                                 int tag = 2001;
240                                                 
241                                 MPI_Status status; 
242                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); //set processors to the number of mpi processes running
243                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
244                                 
245                                 MPI_File outMPI;
246                                 MPI_File inMPI;
247                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
248                                 int inMode=MPI_MODE_RDONLY; 
249                                 
250                                 char outFilename[filteredFasta.length()];
251                                 strcpy(outFilename, filteredFasta.c_str());
252                                 
253                                 char inFileName[fastafileNames[s].length()];
254                                 strcpy(inFileName, fastafileNames[s].c_str());
255                                 
256                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
257                                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
258                                 
259                                 if (pid == 0) { //you are the root process 
260                                         
261                                         setLines(fastafileNames[s]);
262                                         
263                                         char bufF[alignmentLength];
264                                         strcpy(bufF, filter.c_str()); 
265                                                                 
266                                         for (int j = 0; j < lines.size(); j++) { //each process
267                                                 if (j != 0) { //don't send to yourself
268                                                         MPI_Send(&lines[j]->start, 1, MPI_INT, j, tag, MPI_COMM_WORLD); //start position in file
269                                                         MPI_Send(&bufferSizes[j], 1, MPI_INT, j, tag, MPI_COMM_WORLD); //how bytes for the read
270                                                         MPI_Send(bufF, alignmentLength, MPI_CHAR, j, tag, MPI_COMM_WORLD);
271                                                 }
272                                         }
273                                         
274                                         //read your peice of file
275                                         char buf[bufferSizes[0]];
276                                         MPI_File_read_at(inMPI, lines[0]->start, buf, bufferSizes[0], MPI_CHAR, &status);
277                                         istringstream iss (buf,istringstream::in);
278                                         
279                                         //do your part
280                                         driverMPIRun(iss, outMPI);
281                                         
282                                         //wait on chidren
283                                         for(int i = 1; i < processors; i++) { 
284                                                 char buf[4];
285                                                 MPI_Recv(buf, 4, MPI_CHAR, i, tag, MPI_COMM_WORLD, &status); 
286                                         }
287                                         
288                                 }else { //you are a child process
289                                         //receive your section of file
290                                         int startPos, bufferSize;
291                                         char bufF[alignmentLength];
292                                         MPI_Recv(&startPos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
293                                         MPI_Recv(&bufferSize, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
294                                         MPI_Recv(bufF, alignmentLength, MPI_CHAR, 0, tag, MPI_COMM_WORLD, &status); 
295                                         
296                                         filter = bufF; //filter was made by process 0 so other processes need to get it
297                                                                 
298                                         //read your peice of file
299                                         char buf2[bufferSize];
300                                         MPI_File_read_at(inMPI, startPos, buf2, bufferSize, MPI_CHAR, &status);
301                                         istringstream iss (buf2,istringstream::in);
302                                         
303                                         //do your part
304                                         driverMPIRun(iss, outMPI);
305                                 
306                                         char buf[4];
307                                         strcpy(buf, "done"); 
308                                         
309                                         //tell parent you are done.
310                                         MPI_Send(buf, 4, MPI_CHAR, 0, tag, MPI_COMM_WORLD);
311                                 }
312                                 
313                                 MPI_File_close(&outMPI);
314                                 MPI_File_close(&inMPI);
315                                 
316 #else
317                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
318                                 if(processors == 1){
319                                         ifstream inFASTA;
320                                         openInputFile(fastafileNames[s], inFASTA);
321                                         int numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
322                                         inFASTA.close();
323                                         
324                                         lines.push_back(new linePair(0, numFastaSeqs));
325                                         
326                                         driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]);
327                                 }else{
328                                         setLines(fastafileNames[s]);
329                                         createProcessesRunFilter(filter, fastafileNames[s]); 
330                                 
331                                         rename((fastafileNames[s] + toString(processIDS[0]) + ".temp").c_str(), filteredFasta.c_str());
332                                 
333                                         //append fasta files
334                                         for(int i=1;i<processors;i++){
335                                                 appendFiles((fastafileNames[s] + toString(processIDS[i]) + ".temp"), filteredFasta);
336                                                 remove((fastafileNames[s] + toString(processIDS[i]) + ".temp").c_str());
337                                         }
338                                 }
339                                 
340                                 if (m->control_pressed) {  return 1; }
341                 #else
342                                 ifstream inFASTA;
343                                 openInputFile(fastafileNames[s], inFASTA);
344                                 int numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
345                                 inFASTA.close();
346                                         
347                                 lines.push_back(new linePair(0, numFastaSeqs));
348                                 
349                                 driverRunFilter(filter, filteredFasta, fastafileNames[s], lines[0]);
350
351                                 if (m->control_pressed) {  return 1; }
352                 #endif
353 #endif
354                         outputNames.push_back(filteredFasta);
355                 }
356
357                 return 0;
358         }
359         catch(exception& e) {
360                 m->errorOut(e, "FilterSeqsCommand", "filterSequences");
361                 exit(1);
362         }
363 }
364 /**************************************************************************************/
365 int FilterSeqsCommand::driverMPIRun(istringstream& in, MPI_File& outMPI) {      
366         try {
367                 string outputString = "";
368                 int count = 0;
369                 MPI_Status status; 
370                 
371                 while (!in.eof()) {
372                         
373                         Sequence seq(in); gobble(in);
374                         
375                         if (seq.getName() != "") {
376                                 string align = seq.getAligned();
377                                 string filterSeq = "";
378                                         
379                                 for(int j=0;j<alignmentLength;j++){
380                                         if(filter[j] == '1'){
381                                                 filterSeq += align[j];
382                                         }
383                                 }
384                                 
385                                 count++;
386                                 outputString += ">" + seq.getName() + "\n" + filterSeq + "\n";
387                                 
388                                 if(count % 10 == 0){ //output to file 
389                                         //send results to parent
390                                         int length = outputString.length();
391                                         char buf[length];
392                                         strcpy(buf, outputString.c_str()); 
393                                 
394                                         MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
395                                         outputString = "";
396                                 }
397
398                         }
399                 }
400                 
401                 if(outputString != ""){ //output to file 
402                         //send results to parent
403                         int length = outputString.length();
404                         char buf[length];
405                         strcpy(buf, outputString.c_str()); 
406                         
407                         MPI_File_write_shared(outMPI, buf, length, MPI_CHAR, &status);
408                         outputString = "";
409                 }
410
411                         
412                 return 0;
413         }
414         catch(exception& e) {
415                 m->errorOut(e, "FilterSeqsCommand", "driverRunFilter");
416                 exit(1);
417         }
418 }
419 /**************************************************************************************/
420 int FilterSeqsCommand::driverRunFilter(string F, string outputFilename, string inputFilename, linePair* line) { 
421         try {
422                 ofstream out;
423                 openOutputFile(outputFilename, out);
424                 
425                 ifstream in;
426                 openInputFile(inputFilename, in);
427                                 
428                 in.seekg(line->start);
429                 
430                 for(int i=0;i<line->num;i++){
431                                 
432                                 if (m->control_pressed) { in.close(); out.close(); return 0; }
433                                 
434                                 Sequence seq(in);
435                                 if (seq.getName() != "") {
436                                         string align = seq.getAligned();
437                                         string filterSeq = "";
438                                         
439                                         for(int j=0;j<alignmentLength;j++){
440                                                 if(filter[j] == '1'){
441                                                         filterSeq += align[j];
442                                                 }
443                                         }
444                                         
445                                         out << '>' << seq.getName() << endl << filterSeq << endl;
446                                 }
447                                 gobble(in);
448                 }
449                 out.close();
450                 in.close();
451                 
452                 return 0;
453         }
454         catch(exception& e) {
455                 m->errorOut(e, "FilterSeqsCommand", "driverRunFilter");
456                 exit(1);
457         }
458 }
459 /**************************************************************************************************/
460
461 int FilterSeqsCommand::createProcessesRunFilter(string F, string filename) {
462         try {
463 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
464                 int process = 0;
465                 int exitCommand = 1;
466                 processIDS.clear();
467                 
468                 //loop through and create all the processes you want
469                 while (process != processors) {
470                         int pid = fork();
471                         
472                         if (pid > 0) {
473                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
474                                 process++;
475                         }else if (pid == 0){
476                                 string filteredFasta = filename + toString(getpid()) + ".temp";
477                                 driverRunFilter(F, filteredFasta, filename, lines[process]);
478                                 exit(0);
479                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
480                 }
481                 
482                 //force parent to wait until all the processes are done
483                 for (int i=0;i<processors;i++) { 
484                         int temp = processIDS[i];
485                         wait(&temp);
486                 }
487                 
488                 return exitCommand;
489 #endif          
490         }
491         catch(exception& e) {
492                 m->errorOut(e, "FilterSeqsCommand", "createProcessesRunFilter");
493                 exit(1);
494         }
495 }
496 /**************************************************************************************/
497 string FilterSeqsCommand::createFilter() {      
498         try {
499                 string filterString = "";                       
500                 Filters F;
501                 
502                 if (soft != 0)                  {  F.setSoft(soft);             }
503                 if (trump != '*')               {  F.setTrump(trump);   }
504                 
505                 F.setLength(alignmentLength);
506                 
507                 if(soft != 0 || isTrue(vertical)){
508                         F.initialize();
509                 }
510                 
511                 if(hard.compare("") != 0)       {       F.doHard(hard);         }
512                 else                                            {       F.setFilter(string(alignmentLength, '1'));      }
513                 
514                 numSeqs = 0;
515                 if(trump != '*' || isTrue(vertical) || soft != 0){
516                         for (int s = 0; s < fastafileNames.size(); s++) {
517                         
518                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
519                         
520 #ifdef USE_MPI  
521                                 int pid; 
522                                 int Atag = 1; int Ttag = 2; int Ctag = 3; int Gtag = 4; int Gaptag = 5;
523                                 int tag = 2001;
524                                 
525                                 MPI_Status status; 
526                                 MPI_File inMPI; 
527                                 MPI_Comm_size(MPI_COMM_WORLD, &processors);
528                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
529                                                         
530                                 char* tempFileName = new char(fastafileNames[s].length());
531                                 tempFileName = &(fastafileNames[s][0]);
532                 
533                                 MPI_File_open(MPI_COMM_WORLD, tempFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
534                                 
535                                 if (pid == 0) { //you are the root process
536                                                 setLines(fastafileNames[s]);
537                                         
538                                                 for (int j = 0; j < lines.size(); j++) { //each process
539                                                         if (j != 0) { //don't send to yourself
540                                                                 MPI_Send(&lines[j]->start, 1, MPI_INT, j, tag, MPI_COMM_WORLD); //start position in file
541                                                                 MPI_Send(&numSeqs, 1, MPI_INT, j, tag, MPI_COMM_WORLD); 
542                                                                 MPI_Send(&bufferSizes[j], 1, MPI_INT, j, tag, MPI_COMM_WORLD); //how bytes for the read
543                                                         }
544                                                 }
545                         
546                                                 char buf[bufferSizes[0]];
547                                                 MPI_File_read_at(inMPI, 0, buf, bufferSizes[0], MPI_CHAR, &status);
548                         
549                                                 string tempBuf = buf;
550                                                 if (tempBuf.length() > bufferSizes[0]) { tempBuf = tempBuf.substr(0, bufferSizes[0]); }
551
552                                                 MPICreateFilter(F, tempBuf);
553                                                 
554                                                 if (m->control_pressed) { MPI_File_close(&inMPI); return filterString; }
555                                                                                                 
556                                                 vector<int> temp; temp.resize(alignmentLength+1);
557                                                 
558                                                 //get the frequencies from the child processes
559                                                 for(int i = 0; i < ((processors-1)*5); i++) { 
560                                                         MPI_Recv(&temp[0], (alignmentLength+1), MPI_INT, MPI_ANY_SOURCE, tag, MPI_COMM_WORLD, &status); 
561                                                         int receiveTag = temp[temp.size()-1];  //child process added a int to the end to indicate what letter count this is for
562                                 
563                                                         if (receiveTag == Atag) { //you are recieveing the A frequencies
564                                                                 for (int k = 0; k < alignmentLength; k++) {             F.a[k] += temp[k];      }
565                                                         }else if (receiveTag == Ttag) { //you are recieveing the T frequencies
566                                                                 for (int k = 0; k < alignmentLength; k++) {             F.t[k] += temp[k];      }
567                                                         }else if (receiveTag == Ctag) { //you are recieveing the C frequencies
568                                                                 for (int k = 0; k < alignmentLength; k++) {             F.c[k] += temp[k];      }
569                                                         }else if (receiveTag == Gtag) { //you are recieveing the G frequencies
570                                                                 for (int k = 0; k < alignmentLength; k++) {             F.g[k] += temp[k];      }
571                                                         }else if (receiveTag == Gaptag) { //you are recieveing the gap frequencies
572                                                                 for (int k = 0; k < alignmentLength; k++) {             F.gap[k] += temp[k];    }
573                                                         }
574                                                 } 
575
576                                                 
577                                 }else { //i am the child process
578                         
579                                         int startPos, bufferSize;
580                                         MPI_Recv(&startPos, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
581                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
582                                         MPI_Recv(&bufferSize, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
583                                                                 
584                                         //send freqs
585                                         char buf2[bufferSize];
586                                         MPI_File_read_at(inMPI, startPos, buf2, bufferSize, MPI_CHAR, &status);
587                         
588                                         string tempBuf = buf2;
589                                         if (tempBuf.length() > bufferSize) { tempBuf = tempBuf.substr(0, bufferSize); }
590                         
591                                         MPICreateFilter(F, tempBuf);
592                                 
593                                         if (m->control_pressed) { MPI_File_close(&inMPI); return filterString; }
594                                         
595                                         //send my fequency counts
596                                         F.a.push_back(Atag);
597                                         int ierr = MPI_Send(&(F.a[0]), (alignmentLength+1), MPI_INT, 0, tag, MPI_COMM_WORLD);
598                                         F.t.push_back(Ttag);
599                                         ierr = MPI_Send (&(F.t[0]), (alignmentLength+1), MPI_INT, 0, tag, MPI_COMM_WORLD);
600                                         F.c.push_back(Ctag);
601                                         ierr = MPI_Send(&(F.c[0]), (alignmentLength+1), MPI_INT, 0, tag, MPI_COMM_WORLD);
602                                         F.g.push_back(Gtag);
603                                         ierr = MPI_Send(&(F.g[0]), (alignmentLength+1), MPI_INT, 0, tag, MPI_COMM_WORLD);
604                                         F.gap.push_back(Gaptag);
605                                         ierr = MPI_Send(&(F.gap[0]), (alignmentLength+1), MPI_INT, 0, tag, MPI_COMM_WORLD);
606                                 }
607                                 
608                                 MPI_Barrier(MPI_COMM_WORLD);
609                                 MPI_File_close(&inMPI);
610                                 
611 #else
612                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
613                                 if(processors == 1){
614                                         ifstream inFASTA;
615                                         openInputFile(fastafileNames[s], inFASTA);
616                                         int numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
617                                         inFASTA.close();
618                                         
619                                         numSeqs += numFastaSeqs;
620                                         
621                                         lines.push_back(new linePair(0, numFastaSeqs));
622                                         
623                                         driverCreateFilter(F, fastafileNames[s], lines[0]);
624                                 }else{
625                                         setLines(fastafileNames[s]);                                    
626                                         createProcessesCreateFilter(F, fastafileNames[s]); 
627                                 }
628                                 
629                                 if (m->control_pressed) {  return filterString; }
630                 #else
631                                 ifstream inFASTA;
632                                 openInputFile(fastafileNames[s], inFASTA);
633                                 int numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
634                                 inFASTA.close();
635                                 
636                                 numSeqs += numFastaSeqs;
637                                 
638                                 lines.push_back(new linePair(0, numFastaSeqs));
639                                 
640                                 driverCreateFilter(F, fastafileNames[s], lines[0]);
641                                 if (m->control_pressed) {  return filterString; }
642                 #endif
643 #endif
644                         
645                         }
646                 }
647
648                 F.setNumSeqs(numSeqs);
649                                 
650                 if(isTrue(vertical) == 1)       {       F.doVertical(); }
651                 if(soft != 0)                           {       F.doSoft();             }
652                         
653                 filterString = F.getFilter();
654
655                 return filterString;
656         }
657         catch(exception& e) {
658                 m->errorOut(e, "FilterSeqsCommand", "createFilter");
659                 exit(1);
660         }
661 }
662 /**************************************************************************************/
663 int FilterSeqsCommand::driverCreateFilter(Filters& F, string filename, linePair* line) {        
664         try {
665                 
666                 ifstream in;
667                 openInputFile(filename, in);
668                                 
669                 in.seekg(line->start);
670                 
671                 for(int i=0;i<line->num;i++){
672                                 
673                         if (m->control_pressed) { in.close(); return 1; }
674                                         
675                         Sequence seq(in);
676                         if (seq.getName() != "") {
677                                         if (seq.getAligned().length() != alignmentLength) { m->mothurOut("Sequences are not all the same length, please correct."); m->mothurOutEndLine(); m->control_pressed = true;  }
678                                         
679                                         if(trump != '*'){       F.doTrump(seq); }
680                                         if(isTrue(vertical) || soft != 0){      F.getFreqs(seq);        }
681                                         cout.flush();
682                         }
683                         
684                         //report progress
685                         if((i+1) % 100 == 0){   m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
686                 }
687                 
688                 //report progress
689                 if((line->num) % 100 != 0){     m->mothurOut(toString(line->num)); m->mothurOutEndLine();               }
690                 
691                 in.close();
692                 
693                 return 0;
694         }
695         catch(exception& e) {
696                 m->errorOut(e, "FilterSeqsCommand", "driverCreateFilter");
697                 exit(1);
698         }
699 }
700 /**************************************************************************************/
701 int FilterSeqsCommand::MPICreateFilter(Filters& F, string input) {      
702         try {
703                 
704                 vector<string> seqStrings;
705                 parseBuffer(input, seqStrings);
706                 
707                 for(int i=0;i<seqStrings.size();i++){
708                         
709                         if (seqStrings[i].length() != alignmentLength) {  cout << i << '\t' << seqStrings[i].length() << "Sequences are not all the same length, please correct." << endl; m->control_pressed = true;  }
710
711                         if (m->control_pressed) { return 1; }
712                         
713                         Sequence seq("", seqStrings[i]);
714                         
715                         if(trump != '*'){       F.doTrump(seq); }
716                         if(isTrue(vertical) || soft != 0){      F.getFreqs(seq);        }
717                         cout.flush();
718                                                 
719                         //report progress
720                         if((i+1) % 100 == 0){   m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
721                 }
722                 
723                 //report progress
724                 if((seqStrings.size()) % 100 != 0){     m->mothurOut(toString(seqStrings.size())); m->mothurOutEndLine();               }
725                 
726                 return 0;
727         }
728         catch(exception& e) {
729                 m->errorOut(e, "FilterSeqsCommand", "MPICreateFilter");
730                 exit(1);
731         }
732 }
733
734 /**************************************************************************************************/
735
736 int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename) {
737         try {
738 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
739                 int process = 0;
740                 int exitCommand = 1;
741                 processIDS.clear();
742                 
743                 //loop through and create all the processes you want
744                 while (process != processors) {
745                         int pid = vfork();
746                         
747                         if (pid > 0) {
748                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
749                                 process++;
750                         }else if (pid == 0){
751                                 driverCreateFilter(F, filename, lines[process]);
752                                 exit(0);
753                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
754                 }
755                 
756                 //force parent to wait until all the processes are done
757                 for (int i=0;i<processors;i++) { 
758                         int temp = processIDS[i];
759                         wait(&temp);
760                 }
761                 
762                 return exitCommand;
763 #endif          
764         }
765         catch(exception& e) {
766                 m->errorOut(e, "FilterSeqsCommand", "createProcessesCreateFilter");
767                 exit(1);
768         }
769 }
770 /**************************************************************************************************/
771
772 int FilterSeqsCommand::setLines(string filename) {
773         try {
774                 
775                 vector<long int> positions;
776                 bufferSizes.clear();
777                 
778                 ifstream inFASTA;
779                 openInputFile(filename, inFASTA);
780                         
781                 string input;
782                 while(!inFASTA.eof()){  
783                         input = getline(inFASTA);
784
785                         if (input.length() != 0) {
786                                 if(input[0] == '>'){ long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);     }
787                         }
788                 }
789                 inFASTA.close();
790                 
791                 int numFastaSeqs = positions.size();
792         
793                 FILE * pFile;
794                 long size;
795                 
796                 //get num bytes in file
797                 pFile = fopen (filename.c_str(),"rb");
798                 if (pFile==NULL) perror ("Error opening file");
799                 else{
800                         fseek (pFile, 0, SEEK_END);
801                         size=ftell (pFile);
802                         fclose (pFile);
803                 }
804         
805                 numSeqs += numFastaSeqs;
806                 
807                 int numSeqsPerProcessor = numFastaSeqs / processors;
808                 
809                 for (int i = 0; i < processors; i++) {
810
811                         long int startPos = positions[ i * numSeqsPerProcessor ];
812                         if(i == processors - 1){
813                                 numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
814                                 bufferSizes.push_back(size - startPos);
815                         }else{  
816                                 long int myEnd = positions[ (i+1) * numSeqsPerProcessor ];
817                                 bufferSizes.push_back(myEnd-startPos);
818                         }
819                         lines.push_back(new linePair(startPos, numSeqsPerProcessor));
820                 }
821                 
822                 return 0;
823         }
824         catch(exception& e) {
825                 m->errorOut(e, "FilterSeqsCommand", "setLines");
826                 exit(1);
827         }
828 }
829 /**************************************************************************************************/
830 int FilterSeqsCommand::parseBuffer(string file, vector<string>& seqs) {
831         try {   
832                 istringstream iss (file); //,istringstream::in
833                 string name, seqstring;
834
835                 while (!iss.eof()) {
836                         
837                         if (m->control_pressed) { return 0; }
838                                 
839                         Sequence seq(iss); gobble(iss);
840                         
841                         if (seq.getName() != "") {
842                                 seqs.push_back(seq.getAligned());       
843                         }
844                 }
845                 
846                 return 0;
847         }
848         catch(exception& e) {
849                 m->errorOut(e, "FilterSeqsCommand", "parseBuffer");
850                 exit(1);
851         }
852 }
853 /**************************************************************************************/