]> git.donarmstrong.com Git - mothur.git/blob - filterseqscommand.cpp
326be01ff722fda90a61e0dd887f8d3e57107be4
[mothur.git] / filterseqscommand.cpp
1 /*
2  *  filterseqscommand.cpp
3  *  Mothur
4  *
5  *  Created by Thomas Ryabin on 5/4/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "filterseqscommand.h"
11 #include "sequence.hpp"
12
13 /**************************************************************************************/
14
15 FilterSeqsCommand::FilterSeqsCommand(string option)  {
16         try {
17                 abort = false;
18                 filterFileName = "";
19                 
20                 //allow user to run help
21                 if(option == "help") { help(); abort = true; }
22                 
23                 else {
24                         //valid paramters for this command
25                         string Array[] =  {"fasta", "trump", "soft", "hard", "vertical", "outputdir","inputdir", "processors"};
26                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27                         
28                         OptionParser parser(option);
29                         map<string,string> parameters = parser.getParameters();
30                         
31                         ValidParameters validParameter;
32                         map<string,string>::iterator it;
33                         
34                         //check to make sure all parameters are valid for command
35                         for (it = parameters.begin(); it != parameters.end(); it++) { 
36                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
37                         }
38                         
39                         //if the user changes the input directory command factory will send this info to us in the output parameter 
40                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
41                         if (inputDir == "not found"){   inputDir = "";          }
42                         else {
43                                 string path;
44                                 it = parameters.find("fasta");
45                                 //user has given a template file
46                                 if(it != parameters.end()){ 
47                                         path = hasPath(it->second);
48                                         //if the user has not given a path then, add inputdir. else leave path alone.
49                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
50                                 }
51                                 
52                                 it = parameters.find("hard");
53                                 //user has given a template file
54                                 if(it != parameters.end()){ 
55                                         path = hasPath(it->second);
56                                         //if the user has not given a path then, add inputdir. else leave path alone.
57                                         if (path == "") {       parameters["hard"] = inputDir + it->second;             }
58                                 }
59                         }
60                         
61                         //check for required parameters
62                         fasta = validParameter.validFile(parameters, "fasta", false);
63                         if (fasta == "not found") { m->mothurOut("fasta is a required parameter for the filter.seqs command."); m->mothurOutEndLine(); abort = true;  }
64                         else { 
65                                 splitAtDash(fasta, fastafileNames);
66                                 
67                                 //go through files and make sure they are good, if not, then disregard them
68                                 for (int i = 0; i < fastafileNames.size(); i++) {
69                                         if (inputDir != "") {
70                                                 string path = hasPath(fastafileNames[i]);
71                                                 //if the user has not given a path then, add inputdir. else leave path alone.
72                                                 if (path == "") {       fastafileNames[i] = inputDir + fastafileNames[i];               }
73                                         }
74
75                                         int ableToOpen;
76                                         ifstream in;
77                                         ableToOpen = openInputFile(fastafileNames[i], in);
78                                         if (ableToOpen == 1) { 
79                                                 m->mothurOut(fastafileNames[i] + " will be disregarded."); m->mothurOutEndLine(); 
80                                                 //erase from file list
81                                                 fastafileNames.erase(fastafileNames.begin()+i);
82                                                 i--;
83                                         }else{  
84                                                 string simpleName = getSimpleName(fastafileNames[i]);
85                                                 filterFileName += simpleName.substr(0, simpleName.find_first_of('.'));
86                                         }
87                                         in.close();
88                                 }
89                                 
90                                 //make sure there is at least one valid file left
91                                 if (fastafileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
92                         }
93                         
94                         if (!abort) {
95                                 //if the user changes the output directory command factory will send this info to us in the output parameter 
96                                 outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  
97                                         outputDir = ""; 
98                                         outputDir += hasPath(fastafileNames[0]); //if user entered a file with a path then preserve it  
99                                 }
100                         }
101                         //check for optional parameter and set defaults
102                         // ...at some point should added some additional type checking...
103                         
104                         string temp;
105                         temp = validParameter.validFile(parameters, "trump", false);                    if (temp == "not found") { temp = "*"; }
106                         trump = temp[0];
107                         
108                         temp = validParameter.validFile(parameters, "soft", false);                             if (temp == "not found") { soft = 0; }
109                         else {  soft = (float)atoi(temp.c_str()) / 100.0;  }
110                         
111                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = "1";                             }
112                         convert(temp, processors); 
113                         
114                         hard = validParameter.validFile(parameters, "hard", true);                              if (hard == "not found") { hard = ""; }
115                         else if (hard == "not open") { abort = true; }  
116                         
117                         vertical = validParameter.validFile(parameters, "vertical", false);             if (vertical == "not found") { vertical = "T"; }
118                         
119                         numSeqs = 0;
120                         
121                 }
122                 
123         }
124         catch(exception& e) {
125                 m->errorOut(e, "FilterSeqsCommand", "FilterSeqsCommand");
126                 exit(1);
127         }
128 }
129
130 //**********************************************************************************************************************
131
132 void FilterSeqsCommand::help(){
133         try {
134                 #ifdef USE_MPI
135                                 int pid;
136                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
137                                         
138                                 if (pid == 0) {
139                 #endif
140                 
141                 m->mothurOut("The filter.seqs command reads a file containing sequences and creates a .filter and .filter.fasta file.\n");
142                 m->mothurOut("The filter.seqs command parameters are fasta, trump, soft, hard and vertical. \n");
143                 m->mothurOut("The fasta parameter is required. You may enter several fasta files to build the filter from and filter, by separating their names with -'s.\n");
144                 m->mothurOut("For example: fasta=abrecovery.fasta-amazon.fasta \n");
145                 m->mothurOut("The trump parameter .... The default is ...\n");
146                 m->mothurOut("The soft parameter .... The default is ....\n");
147                 m->mothurOut("The hard parameter .... The default is ....\n");
148                 m->mothurOut("The vertical parameter .... The default is T.\n");
149                 m->mothurOut("The filter.seqs command should be in the following format: \n");
150                 m->mothurOut("filter.seqs(fasta=yourFastaFile, trump=yourTrump, soft=yourSoft, hard=yourHard, vertical=yourVertical) \n");
151                 m->mothurOut("Example filter.seqs(fasta=abrecovery.fasta, trump=..., soft=..., hard=..., vertical=T).\n");
152                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n");
153                 
154                 #ifdef USE_MPI
155                         }
156                 #endif
157                 
158         }
159         catch(exception& e) {
160                 m->errorOut(e, "FilterSeqsCommand", "help");
161                 exit(1);
162         }
163 }
164
165 /**************************************************************************************/
166
167 int FilterSeqsCommand::execute() {      
168         try {
169         
170                 if (abort == true) { return 0; }
171                 vector<string> outputNames;
172                 
173                 ifstream inFASTA;
174                 openInputFile(fastafileNames[0], inFASTA);
175                 
176                 Sequence testSeq(inFASTA);
177                 alignmentLength = testSeq.getAlignLength();
178                 inFASTA.close();
179                 
180                 ////////////create filter/////////////////
181                 
182                 filter = createFilter();
183                 
184                 ofstream outFilter;
185                 
186                 string filterFile = outputDir + filterFileName + ".filter";
187                 openOutputFile(filterFile, outFilter);
188                 outFilter << filter << endl;
189                 outFilter.close();
190                 outputNames.push_back(filterFile);
191                 
192                 
193                 ////////////run filter/////////////////
194                 
195                 numSeqs = 0;
196                 for (int i = 0; i < fastafileNames.size(); i++) {
197                         ifstream in;
198                         openInputFile(fastafileNames[i], in);
199                         string filteredFasta = outputDir + getRootName(getSimpleName(fastafileNames[i])) + "filter.fasta";
200                         ofstream outFASTA;
201                         openOutputFile(filteredFasta, outFASTA);
202                         outputNames.push_back(filteredFasta);
203                         
204                         
205                         while(!in.eof()){
206                                 if (m->control_pressed) { in.close(); outFASTA.close(); for(int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }  return 0; }
207                                 
208                                 Sequence seq(in);
209                                 if (seq.getName() != "") {
210                                         string align = seq.getAligned();
211                                         string filterSeq = "";
212                                         
213                                         for(int j=0;j<alignmentLength;j++){
214                                                 if(filter[j] == '1'){
215                                                         filterSeq += align[j];
216                                                 }
217                                         }
218                                         
219                                         outFASTA << '>' << seq.getName() << endl << filterSeq << endl;
220                                         numSeqs++;
221                                 }
222                                 gobble(in);
223                         }
224                         outFASTA.close();
225                         in.close();
226                 }
227                 
228                 int filteredLength = 0;
229                 for(int i=0;i<alignmentLength;i++){
230                         if(filter[i] == '1'){   filteredLength++;       }
231                 }
232                 
233                 if (m->control_pressed) {  for(int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); }  return 0; }
234
235                 #ifdef USE_MPI
236                                 int pid;
237                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
238                                         
239                                 if (pid == 0) {
240                 #endif
241
242                 m->mothurOutEndLine();
243                 m->mothurOut("Length of filtered alignment: " + toString(filteredLength)); m->mothurOutEndLine();
244                 m->mothurOut("Number of columns removed: " + toString((alignmentLength-filteredLength))); m->mothurOutEndLine();
245                 m->mothurOut("Length of the original alignment: " + toString(alignmentLength)); m->mothurOutEndLine();
246                 m->mothurOut("Number of sequences used to construct filter: " + toString(numSeqs)); m->mothurOutEndLine();
247                 
248                 
249                 m->mothurOutEndLine();
250                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
251                 for(int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();      }
252                 m->mothurOutEndLine();
253                 
254                 #ifdef USE_MPI
255                         }
256                 #endif
257
258                 return 0;
259                 
260         }
261         catch(exception& e) {
262                 m->errorOut(e, "FilterSeqsCommand", "execute");
263                 exit(1);
264         }
265 }
266 /**************************************************************************************/
267 string FilterSeqsCommand::createFilter() {      
268         try {
269                 string filterString = "";
270                 
271                 Filters F;
272                 
273                 if (soft != 0)                  {  F.setSoft(soft);             }
274                 if (trump != '*')               {  F.setTrump(trump);   }
275                 
276                 F.setLength(alignmentLength);
277                 
278                 if(soft != 0 || isTrue(vertical)){
279                         F.initialize();
280                 }
281                 
282                 if(hard.compare("") != 0)       {       F.doHard(hard);         }
283                 else                                            {       F.setFilter(string(alignmentLength, '1'));      }
284                 
285                 numSeqs = 0;
286                 
287                 if(trump != '*' || isTrue(vertical) || soft != 0){
288                         for (int s = 0; s < fastafileNames.size(); s++) {
289                         
290                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
291                         
292 #ifdef USE_MPI  
293                                 int pid, rc, ierr; 
294                                 char* buf;
295                                 int Atag = 1; int Ttag = 2; int Ctag = 3; int Gtag = 4; int Gaptag = 5;
296                                 
297                                 MPI_Status status; 
298                                 MPI_File in; 
299                                 rc = MPI_Comm_size(MPI_COMM_WORLD, &processors);
300                                 rc = MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
301                                                 
302                                                         
303                                 char* tempFileName = &(fastafileNames[s][0]);
304                                 MPI_File_open(MPI_COMM_WORLD, tempFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &in);  //comm, filename, mode, info, filepointer
305                                                                 
306                                 if (pid == 0) { //you are the root process
307                                                 setLines(fastafileNames[s]);
308                                                 
309                                                 for (int j = 0; j < lines.size(); j++) { //each process
310                                                         if (j != 0) { //don't send to yourself
311                                                                 MPI_Send(&lines[j]->start, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); //start position in file
312                                                                 MPI_Send(&lines[j]->numSeqs, 1, MPI_INT, j, 2001, MPI_COMM_WORLD); //how many sequences we are sending
313                                                                 MPI_Send(&bufferSizes[j], 1, MPI_INT, j, 2001, MPI_COMM_WORLD); //how bytes for the read
314                                                         }
315                                                 }
316                                                 cout << "done sending" << endl;
317                                                 cout << "parent = " << pid << " lines = " << lines[pid]->start << '\t' << lines[pid]->numSeqs << " size = " <<  lines.size() << endl;   
318                                                 
319                                                 buf = new char(bufferSizes[0]);
320                         cout << pid << '\t' << bufferSizes[0] << " line 1 start pos = " << lines[1]->start   << " buffer size 0 " << bufferSizes[0] << " buffer size 1 " << bufferSizes[1] << endl;                     
321                                                 MPI_File_read_at(in, 0, buf, bufferSizes[0], MPI_CHAR, &status);
322                                                 
323                 cout << pid << " done reading " << endl;
324                                                 string tempBuf = buf;
325                         cout << pid << '\t' << (tempBuf.substr(0, 10)) << endl;                                                                 
326                                                 //do your part
327                                                 MPICreateFilter(F, tempBuf);
328                                                 
329                                                 vector<int> temp; temp.resize(numSeqs);
330                                                 
331                                                 //get the frequencies from the child processes
332                                                 for(int i = 0; i < ((processors-1)*5); i++) { 
333                                 cout << "i = " << i << endl;
334                                                         int ierr = MPI_Recv(&temp, numSeqs, MPI_INT, MPI_ANY_SOURCE, 2001, MPI_COMM_WORLD, &status); 
335                                                         
336                                                         int receiveTag = temp[temp.size()-1];  //child process added a int to the end to indicate what letter count this is for
337                                                         
338                                                         int sender = status.MPI_SOURCE; 
339                                                         
340                                                         if (receiveTag == Atag) { //you are recieveing the A frequencies
341                                                                 for (int k = 0; k < alignmentLength; k++) {             F.a[k] += temp[k];      }
342                                                         }else if (receiveTag == Ttag) { //you are recieveing the T frequencies
343                                                                 for (int k = 0; k < alignmentLength; k++) {             F.t[k] += temp[k];      }
344                                                         }else if (receiveTag == Ctag) { //you are recieveing the C frequencies
345                                                                 for (int k = 0; k < alignmentLength; k++) {             F.c[k] += temp[k];      }
346                                                         }else if (receiveTag == Gtag) { //you are recieveing the G frequencies
347                                                                 for (int k = 0; k < alignmentLength; k++) {             F.g[k] += temp[k];      }
348                                                         }else if (receiveTag == Gaptag) { //you are recieveing the gap frequencies
349                                                                 for (int k = 0; k < alignmentLength; k++) {             F.gap[k] += temp[k];    }
350                                                         }
351                                                         
352                                                         m->mothurOut("receive tag = " + toString(receiveTag) + " " + toString(sender) + " is complete."); m->mothurOutEndLine();
353                                                 } 
354
355                                                 
356                                 }else { //i am the child process
357                                         int startPos, numLines, bufferSize;
358                                 cout << "child = " << pid << endl;
359                                         ierr = MPI_Recv(&startPos, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
360                                         ierr = MPI_Recv(&numLines, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
361                                         ierr = MPI_Recv(&bufferSize, 1, MPI_INT, 0, 2001, MPI_COMM_WORLD, &status);
362                                 cout << "child = " << pid << " done recv messages startpos = " << startPos << " numLines = " << numLines << " buffersize = " << bufferSize << endl;     
363                                 
364                                         
365                                         //send freqs
366                                         char* buf2 = new char(bufferSize);
367                                         MPI_File_read_at( in, startPos, buf2, bufferSize, MPI_CHAR, &status);
368                                 cout << pid << " done reading " << endl;
369                                         
370                                         string tempBuf = buf2;
371                         cout << pid << '\t' << (tempBuf.substr(0, 10)) << endl;
372                                         MPICreateFilter(F, tempBuf);
373                                 
374                                         //send my fequency counts
375                                         F.a.push_back(Atag);
376                                         int ierr = MPI_Send( &F.a[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD);
377                                         F.t.push_back(Ttag);
378                                         ierr = MPI_Send( &F.t[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD);
379                                         F.c.push_back(Ctag);
380                                         ierr = MPI_Send( &F.c[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD);
381                                         F.g.push_back(Gtag);
382                                         ierr = MPI_Send( &F.g[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD);
383                                         F.gap.push_back(Gaptag);
384                                         ierr = MPI_Send( &F.gap[0], alignmentLength, MPI_INT, 0, 2001, MPI_COMM_WORLD);
385                                         
386                                         cout << "child " << pid << " done sending counts" << endl;
387                                 }
388                                 
389                                 MPI_Barrier(MPI_COMM_WORLD);
390                                 
391 #else
392                 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
393                                 if(processors == 1){
394                                         ifstream inFASTA;
395                                         openInputFile(fastafileNames[s], inFASTA);
396                                         int numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
397                                         inFASTA.close();
398                                         
399                                         numSeqs += numFastaSeqs;
400                                         
401                                         lines.push_back(new linePair(0, numFastaSeqs));
402                                         
403                                         driverCreateFilter(F, fastafileNames[s], lines[0]);
404                                 }else{
405                                         
406                                         setLines(fastafileNames[s]);                                    
407                                         createProcessesCreateFilter(F, fastafileNames[s]); 
408                                 }
409                 #else
410                                 ifstream inFASTA;
411                                 openInputFile(fastafileNames[s], inFASTA);
412                                 int numFastaSeqs=count(istreambuf_iterator<char>(inFASTA),istreambuf_iterator<char>(), '>');
413                                 inFASTA.close();
414                                 
415                                 numSeqs += numFastaSeqs;
416                                 
417                                 lines.push_back(new linePair(0, numFastaSeqs));
418                                 
419                                 driverCreateFilter(F, lines[0], fastafileNames[s]);
420                 #endif
421 #endif
422                         
423                         }
424                 }
425
426 #ifdef USE_MPI
427
428 //merge all frequency data and create filter string
429                                         //int pid;
430                                         //MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
431                                         
432                                         //if (pid == 0) { //only one process should output to screen
433 #endif
434
435         cout << "made it here" << endl; 
436                 F.setNumSeqs(numSeqs);
437                                 
438                 if(isTrue(vertical) == 1)       {       F.doVertical(); }
439                 if(soft != 0)                           {       F.doSoft();             }
440 //cout << "Filter String = " << F.getFilter() << endl;                  
441                 filterString = F.getFilter();
442
443                 return filterString;
444         }
445         catch(exception& e) {
446                 m->errorOut(e, "FilterSeqsCommand", "createFilter");
447                 exit(1);
448         }
449 }
450 /**************************************************************************************/
451 int FilterSeqsCommand::driverCreateFilter(Filters& F, string filename, linePair* line) {        
452         try {
453                 
454                 ifstream in;
455                 openInputFile(filename, in);
456                                 
457                 in.seekg(line->start);
458                 
459                 for(int i=0;i<line->numSeqs;i++){
460                                 
461                         if (m->control_pressed) { in.close(); return 1; }
462                                         
463                         Sequence seq(in);
464                         if (seq.getName() != "") {
465                                         if(trump != '*'){       F.doTrump(seq); }
466                                         if(isTrue(vertical) || soft != 0){      F.getFreqs(seq);        }
467                                         cout.flush();
468                         }
469                         
470                         //report progress
471                         if((i+1) % 100 == 0){   m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
472                 }
473                 
474                 //report progress
475                 if((line->numSeqs) % 100 != 0){ m->mothurOut(toString(line->numSeqs)); m->mothurOutEndLine();           }
476                 
477                 in.close();
478                 
479                 return 0;
480         }
481         catch(exception& e) {
482                 m->errorOut(e, "FilterSeqsCommand", "driverCreateFilter");
483                 exit(1);
484         }
485 }
486 /**************************************************************************************/
487 int FilterSeqsCommand::MPICreateFilter(Filters& F, string temp) {       
488         try {
489                 
490                 vector<string> seqStrings;
491                 parseBuffer(temp, seqStrings);
492                 
493                 for(int i=0;i<seqStrings.size();i++){
494                                 
495                         if (m->control_pressed) { return 1; }
496                         
497                         Sequence seq("", seqStrings[0]);
498                         
499                         if(trump != '*'){       F.doTrump(seq); }
500                         if(isTrue(vertical) || soft != 0){      F.getFreqs(seq);        }
501                         cout.flush();
502                                                 
503                         //report progress
504                         if((i+1) % 100 == 0){   m->mothurOut(toString(i+1)); m->mothurOutEndLine();             }
505                 }
506                 
507                 //report progress
508                 if((seqStrings.size()) % 100 != 0){     m->mothurOut(toString(seqStrings.size())); m->mothurOutEndLine();               }
509                 
510                 return 0;
511         }
512         catch(exception& e) {
513                 m->errorOut(e, "FilterSeqsCommand", "MPICreateFilter");
514                 exit(1);
515         }
516 }
517
518 /**************************************************************************************************/
519
520 int FilterSeqsCommand::createProcessesCreateFilter(Filters& F, string filename) {
521         try {
522 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
523                 int process = 0;
524                 int exitCommand = 1;
525                 vector<int> processIDS;
526                 
527                 //loop through and create all the processes you want
528                 while (process != processors) {
529                         int pid = vfork();
530                         
531                         if (pid > 0) {
532                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
533                                 process++;
534                         }else if (pid == 0){
535                                 driverCreateFilter(F, filename, lines[process]);
536                                 exit(0);
537                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
538                 }
539                 
540                 //force parent to wait until all the processes are done
541                 for (int i=0;i<processors;i++) { 
542                         int temp = processIDS[i];
543                         wait(&temp);
544                 }
545                 
546                 return exitCommand;
547 #endif          
548         }
549         catch(exception& e) {
550                 m->errorOut(e, "FilterSeqsCommand", "createProcessesCreateFilter");
551                 exit(1);
552         }
553 }
554 /**************************************************************************************************/
555
556 int FilterSeqsCommand::setLines(string filename) {
557         try {
558                 vector<int> positions;
559                 map<int, int> buf;
560                 bufferSizes.clear();
561                 
562                 int pid;
563                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); 
564         
565                 ifstream inFASTA;
566                 openInputFile(filename, inFASTA);
567                         
568                 string input;
569                 int numbuf = 0;
570                 while(!inFASTA.eof()){  
571                         input = getline(inFASTA);
572
573                         if (input.length() != 0) {
574                                 numbuf += input.length();
575                                 if(input[0] == '>'){    long int pos = inFASTA.tellg(); positions.push_back(pos - input.length() - 1);  buf[(pos - input.length() - 1)] = numbuf; }
576                         }
577                 }
578
579                 inFASTA.close();
580                 
581                 int numFastaSeqs = positions.size();
582                 
583                 numSeqs += numFastaSeqs;
584                 
585                 int numSeqsPerProcessor = numFastaSeqs / processors;
586                 
587                 for (int i = 0; i < processors; i++) {
588
589                         long int startPos = positions[ i * numSeqsPerProcessor ];
590                         if(i == processors - 1){
591                                 numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor;
592                                 bufferSizes.push_back(numbuf-buf[startPos]);
593                         }else{  
594                                 int myEnd = positions[ (i+1) * numSeqsPerProcessor ];
595                                 bufferSizes.push_back(buf[myEnd]-buf[startPos]);
596                         }
597                         lines.push_back(new linePair(startPos, numSeqsPerProcessor));
598                 }
599                 
600                 return 0;
601         }
602         catch(exception& e) {
603                 m->errorOut(e, "FilterSeqsCommand", "setLines");
604                 exit(1);
605         }
606 }
607 /**************************************************************************************************/
608 int FilterSeqsCommand::parseBuffer(string file, vector<string>& seqs) {
609         try {
610                 
611                 istringstream iss (file,istringstream::in);
612                 string name, seqstring;
613                 
614                 while (iss) {
615                         
616                         if (m->control_pressed) { return 0; }
617                 cout << "here" << endl;                 
618                         Sequence seq(iss); 
619         cout << "here1" << endl;                        
620                         gobble(iss);
621         cout << seq.getName() << endl;          
622                         if (seq.getName() != "") {
623                                 seqs.push_back(seq.getAligned());       
624                         }
625                 }
626                 
627                 return 0;
628         }
629         catch(exception& e) {
630                 m->errorOut(e, "FilterSeqsCommand", "parseBuffer");
631                 exit(1);
632         }
633 }
634 /**************************************************************************************/