]> git.donarmstrong.com Git - mothur.git/blob - chimerapintailcommand.cpp
added parse.fastaq command, added permute option to venn command, fixed bug with...
[mothur.git] / chimerapintailcommand.cpp
1 /*
2  *  chimerapintailcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/1/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimerapintailcommand.h"
11 #include "pintail.h"
12
13 //***************************************************************************************************************
14
15 ChimeraPintailCommand::ChimeraPintailCommand(string option)  {
16         try {
17                 abort = false;
18                 
19                 //allow user to run help
20                 if(option == "help") { help(); abort = true; }
21                 
22                 else {
23                         //valid paramters for this command
24                         string Array[] =  {"fasta","filter","processors","window" ,"increment","template","conservation","quantile","mask","outputdir","inputdir"};
25                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26                         
27                         OptionParser parser(option);
28                         map<string,string> parameters = parser.getParameters();
29                         
30                         ValidParameters validParameter("chimera.pintail");
31                         map<string,string>::iterator it;
32                         
33                         //check to make sure all parameters are valid for command
34                         for (it = parameters.begin(); it != parameters.end(); it++) { 
35                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
36                         }
37                         
38                         //if the user changes the input directory command factory will send this info to us in the output parameter 
39                         inputDir = validParameter.validFile(parameters, "inputdir", false);             
40                         if (inputDir == "not found"){   inputDir = "";          }
41                         else {
42                                 string path;
43                                 it = parameters.find("template");
44                                 //user has given a template file
45                                 if(it != parameters.end()){ 
46                                         path = m->hasPath(it->second);
47                                         //if the user has not given a path then, add inputdir. else leave path alone.
48                                         if (path == "") {       parameters["template"] = inputDir + it->second;         }
49                                 }
50                                 
51                                 it = parameters.find("conservation");
52                                 //user has given a template file
53                                 if(it != parameters.end()){ 
54                                         path = m->hasPath(it->second);
55                                         //if the user has not given a path then, add inputdir. else leave path alone.
56                                         if (path == "") {       parameters["conservation"] = inputDir + it->second;             }
57                                 }
58                                 
59                                 it = parameters.find("quantile");
60                                 //user has given a template file
61                                 if(it != parameters.end()){ 
62                                         path = m->hasPath(it->second);
63                                         //if the user has not given a path then, add inputdir. else leave path alone.
64                                         if (path == "") {       parameters["quantile"] = inputDir + it->second;         }
65                                 }
66                         }
67
68                         
69                         //check for required parameters
70                         fastafile = validParameter.validFile(parameters, "fasta", false);
71                         if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
72                         else { 
73                                 m->splitAtDash(fastafile, fastaFileNames);
74                                 
75                                 //go through files and make sure they are good, if not, then disregard them
76                                 for (int i = 0; i < fastaFileNames.size(); i++) {
77                                         if (inputDir != "") {
78                                                 string path = m->hasPath(fastaFileNames[i]);
79                                                 //if the user has not given a path then, add inputdir. else leave path alone.
80                                                 if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
81                                         }
82         
83                                         int ableToOpen;
84                                         ifstream in;
85                                         
86                                         ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
87                                 
88                                         //if you can't open it, try default location
89                                         if (ableToOpen == 1) {
90                                                 if (m->getDefaultPath() != "") { //default path is set
91                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
92                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
93                                                         ableToOpen = m->openInputFile(tryPath, in, "noerror");
94                                                         fastaFileNames[i] = tryPath;
95                                                 }
96                                         }
97                                         in.close();
98                                         
99                                         if (ableToOpen == 1) { 
100                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
101                                                 //erase from file list
102                                                 fastaFileNames.erase(fastaFileNames.begin()+i);
103                                                 i--;
104                                         }
105                                 }
106                                 
107                                 //make sure there is at least one valid file left
108                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
109                         }
110                         
111                         string temp;
112                         temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
113                         filter = m->isTrue(temp);
114                         
115                         temp = validParameter.validFile(parameters, "processors", false);               if (temp == "not found") { temp = "1"; }
116                         convert(temp, processors);
117                         
118                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
119                         convert(temp, window);
120                         
121                         temp = validParameter.validFile(parameters, "increment", false);                if (temp == "not found") { temp = "25"; }
122                         convert(temp, increment);
123                         
124                         maskfile = validParameter.validFile(parameters, "mask", false);
125                         if (maskfile == "not found") { maskfile = "";  }        
126                         else if (maskfile != "default")  { 
127                                 if (inputDir != "") {
128                                         string path = m->hasPath(maskfile);
129                                         //if the user has not given a path then, add inputdir. else leave path alone.
130                                         if (path == "") {       maskfile = inputDir + maskfile;         }
131                                 }
132
133                                 ifstream in;
134                                 int     ableToOpen = m->openInputFile(maskfile, in, "no error");
135                                 if (ableToOpen == 1) { 
136                                         if (m->getDefaultPath() != "") { //default path is set
137                                                         string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
138                                                         m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
139                                                         ableToOpen = m->openInputFile(tryPath, in, "noerror");
140                                                         maskfile = tryPath;
141                                         }
142                                 }
143                                         in.close();
144                                         
145                                 if (ableToOpen == 1) { 
146                                                 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine(); 
147                                                 abort = true;
148                                 }
149                         }
150
151                         
152                         //if the user changes the output directory command factory will send this info to us in the output parameter 
153                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
154                 
155                         templatefile = validParameter.validFile(parameters, "template", true);
156                         if (templatefile == "not open") { abort = true; }
157                         else if (templatefile == "not found") { templatefile = "";  m->mothurOut("template is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true;  }
158                         
159                         consfile = validParameter.validFile(parameters, "conservation", true);
160                         if (consfile == "not open") { abort = true; }
161                         else if (consfile == "not found") { 
162                                 consfile = "";  
163                                 //check for consfile
164                                 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
165                                 ifstream FileTest(tempConsFile.c_str());
166                                 if(FileTest){   
167                                         bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
168                                         if (GoodFile) {  
169                                                 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest.close();     
170                                         }
171                                 }else {
172                                         string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
173                                         ifstream FileTest2(tempConsFile.c_str());
174                                         if(FileTest2){  
175                                                 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
176                                                 if (GoodFile) {  
177                                                         m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  consfile = tempConsFile;  FileTest2.close();    
178                                                 }
179                                         }
180                                 }
181                         }       
182                         
183                         quanfile = validParameter.validFile(parameters, "quantile", true);
184                         if (quanfile == "not open") { abort = true; }
185                         else if (quanfile == "not found") { quanfile = ""; }
186                 }
187         }
188         catch(exception& e) {
189                 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
190                 exit(1);
191         }
192 }
193 //**********************************************************************************************************************
194
195 void ChimeraPintailCommand::help(){
196         try {
197         
198                 m->mothurOut("The chimera.pintail command reads a fastafile and templatefile and outputs potentially chimeric sequences.\n");
199                 m->mothurOut("This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n");
200                 m->mothurOut("The chimera.pintail command parameters are fasta, template, filter, mask, processors, window, increment, conservation and quantile.\n");
201                 m->mothurOut("The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required. \n");
202                 m->mothurOut("You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n");
203                 m->mothurOut("The template parameter allows you to enter a template file containing known non-chimeric sequences, and is required. \n");
204                 m->mothurOut("The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n");
205                 m->mothurOut("The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied.  You can apply an ecoli mask by typing, mask=default. \n");
206                 m->mothurOut("The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n");
207                 #ifdef USE_MPI
208                 m->mothurOut("When using MPI, the processors parameter is set to the number of MPI processes running. \n");
209                 #endif
210                 m->mothurOut("The window parameter allows you to specify the window size for searching for chimeras, default=300. \n");
211                 m->mothurOut("The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n");
212                 m->mothurOut("The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n");
213                 m->mothurOut("The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n");
214                 m->mothurOut("The chimera.pintail command should be in the following format: \n");
215                 m->mothurOut("chimera.pintail(fasta=yourFastaFile, template=yourTemplate) \n");
216                 m->mothurOut("Example: chimera.pintail(fasta=AD.align, template=silva.bacteria.fasta) \n");
217                 m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n\n");     
218         }
219         catch(exception& e) {
220                 m->errorOut(e, "ChimeraPintailCommand", "help");
221                 exit(1);
222         }
223 }
224
225 //***************************************************************************************************************
226
227 ChimeraPintailCommand::~ChimeraPintailCommand(){        /*      do nothing      */      }
228
229 //***************************************************************************************************************
230
231 int ChimeraPintailCommand::execute(){
232         try{
233                 
234                 if (abort == true) { return 0; }
235                 
236                 for (int s = 0; s < fastaFileNames.size(); s++) {
237                                 
238                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
239
240                         int start = time(NULL); 
241                         
242                         //set user options
243                         if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
244                         
245                         //check for quantile to save the time
246                         string tempQuan = "";
247                         if ((!filter) && (maskfile == "")) {
248                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
249                         }else if ((!filter) && (maskfile != "")) { 
250                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
251                         }else if ((filter) && (maskfile != "")) { 
252                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
253                         }else if ((filter) && (maskfile == "")) { 
254                                 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
255                         }
256                         
257                         ifstream FileTest(tempQuan.c_str());
258                         if(FileTest){   
259                                 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
260                                 if (GoodFile) {  
261                                         m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest.close();     
262                                 }
263                         }else {
264                                 string tryPath = m->getDefaultPath();
265                                 string tempQuan = "";
266                                 if ((!filter) && (maskfile == "")) {
267                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
268                                 }else if ((!filter) && (maskfile != "")) { 
269                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
270                                 }else if ((filter) && (maskfile != "")) { 
271                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
272                                 }else if ((filter) && (maskfile == "")) { 
273                                         tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
274                                 }
275                                 
276                                 ifstream FileTest2(tempQuan.c_str());
277                                 if(FileTest2){  
278                                         bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
279                                         if (GoodFile) {  
280                                                 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine();  quanfile = tempQuan;  FileTest2.close();    
281                                         }
282                                 }
283                         }
284                         
285                         chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
286                         
287                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it
288                         string outputFileName, accnosFileName;
289                         if (maskfile != "") {
290                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
291                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
292                         }else {
293                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.chimeras";
294                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + "pintail.accnos";
295                         }
296                         
297                         if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); }  return 0;    }
298                         
299                         if (chimera->getUnaligned()) { 
300                                 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); 
301                                 delete chimera;
302                                 return 0; 
303                         }
304                         templateSeqsLength = chimera->getLength();
305                 
306                 #ifdef USE_MPI
307                         int pid, end, numSeqsPerProcessor; 
308                                 int tag = 2001;
309                                 vector<unsigned long int> MPIPos;
310                                 
311                                 MPI_Status status; 
312                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
313                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
314
315                                 MPI_File inMPI;
316                                 MPI_File outMPI;
317                                 MPI_File outMPIAccnos;
318                                 
319                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
320                                 int inMode=MPI_MODE_RDONLY; 
321                                 
322                                 char outFilename[1024];
323                                 strcpy(outFilename, outputFileName.c_str());
324                                 
325                                 char outAccnosFilename[1024];
326                                 strcpy(outAccnosFilename, accnosFileName.c_str());
327                                 
328                                 char inFileName[1024];
329                                 strcpy(inFileName, fastaFileNames[s].c_str());
330
331                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
332                                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
333                                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
334                                 
335                                 if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
336
337                                 if (pid == 0) { //you are the root process 
338                                                                 
339                                         MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
340                                         
341                                         //send file positions to all processes
342                                         for(int i = 1; i < processors; i++) { 
343                                                 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
344                                                 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
345                                         }
346                                         
347                                         //figure out how many sequences you have to align
348                                         numSeqsPerProcessor = numSeqs / processors;
349                                         int startIndex =  pid * numSeqsPerProcessor;
350                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
351                                 
352                                         //do your part
353                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
354                                         
355                                         if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  remove(outputFileName.c_str());  remove(accnosFileName.c_str());  for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
356                                         
357                                 }else{ //you are a child process
358                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
359                                         MPIPos.resize(numSeqs+1);
360                                         MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
361                                         
362                                         //figure out how many sequences you have to align
363                                         numSeqsPerProcessor = numSeqs / processors;
364                                         int startIndex =  pid * numSeqsPerProcessor;
365                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
366                                         
367                                         //do your part
368                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
369                                         
370                                         if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   remove(outputNames[j].c_str()); }  delete chimera; return 0;  }
371                                 }
372                                 
373                                 //close files 
374                                 MPI_File_close(&inMPI);
375                                 MPI_File_close(&outMPI);
376                                 MPI_File_close(&outMPIAccnos);
377                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
378                 #else
379                         vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
380                                 
381                         for (int i = 0; i < (positions.size()-1); i++) {
382                                 lines.push_back(new linePair(positions[i], positions[(i+1)]));
383                         }       
384                         
385                         //break up file
386                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
387                                 if(processors == 1){
388                 
389                                         numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
390                                         
391                                         if (m->control_pressed) { remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
392                                         
393                                 }else{
394                                         processIDS.resize(0);
395                                         
396                                         numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
397                                 
398                                         rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
399                                         rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
400                                                 
401                                         //append output files
402                                         for(int i=1;i<processors;i++){
403                                                 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
404                                                 remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
405                                         }
406                                         
407                                         //append output files
408                                         for(int i=1;i<processors;i++){
409                                                 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
410                                                 remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
411                                         }
412                                                                                 
413                                         if (m->control_pressed) { 
414                                                 remove(outputFileName.c_str()); 
415                                                 remove(accnosFileName.c_str());
416                                                 for (int j = 0; j < outputNames.size(); j++) {  remove(outputNames[j].c_str()); } 
417                                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
418                                                 delete chimera;
419                                                 return 0;
420                                         }
421                                 }
422
423                         #else
424                                 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
425                                 
426                                 if (m->control_pressed) { remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) {        remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear(); delete chimera; return 0; }
427                         #endif
428                         
429                 #endif  
430                 
431                         delete chimera;
432                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
433                         
434                         outputNames.push_back(outputFileName);
435                         outputNames.push_back(accnosFileName); 
436                         
437                         m->mothurOutEndLine();
438                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
439                 }
440                 
441                 m->mothurOutEndLine();
442                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
443                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
444                 m->mothurOutEndLine();
445                         
446                 return 0;
447                 
448         }
449         catch(exception& e) {
450                 m->errorOut(e, "ChimeraPintailCommand", "execute");
451                 exit(1);
452         }
453 }
454 //**********************************************************************************************************************
455
456 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
457         try {
458                 ofstream out;
459                 m->openOutputFile(outputFName, out);
460                 
461                 ofstream out2;
462                 m->openOutputFile(accnos, out2);
463                 
464                 ifstream inFASTA;
465                 m->openInputFile(filename, inFASTA);
466
467                 inFASTA.seekg(filePos->start);
468
469                 bool done = false;
470                 int count = 0;
471         
472                 while (!done) {
473                                 
474                         if (m->control_pressed) {       return 1;       }
475                 
476                         Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
477                                 
478                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
479                                 
480                                 if (candidateSeq->getAligned().length() != templateSeqsLength)  {  //chimeracheck does not require seqs to be aligned
481                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
482                                 }else{
483                                         //find chimeras
484                                         chimera->getChimeras(candidateSeq);
485                                         
486                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
487                 
488                                         //print results
489                                         chimera->print(out, out2);
490                                 }
491                                 count++;
492                         }
493                         delete candidateSeq;
494                         
495                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
496                                 unsigned long int pos = inFASTA.tellg();
497                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
498                         #else
499                                 if (inFASTA.eof()) { break; }
500                         #endif
501                         
502                         //report progress
503                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
504                 }
505                 //report progress
506                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
507                 
508                 out.close();
509                 out2.close();
510                 inFASTA.close();
511                                 
512                 return count;
513         }
514         catch(exception& e) {
515                 m->errorOut(e, "ChimeraPintailCommand", "driver");
516                 exit(1);
517         }
518 }
519 //**********************************************************************************************************************
520 #ifdef USE_MPI
521 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
522         try {
523                                 
524                 MPI_Status status; 
525                 int pid;
526                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
527                 
528                 for(int i=0;i<num;i++){
529                         
530                         if (m->control_pressed) {       return 1;       }
531                         
532                         //read next sequence
533                         int length = MPIPos[start+i+1] - MPIPos[start+i];
534         
535                         char* buf4 = new char[length];
536                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
537                         
538                         string tempBuf = buf4;
539                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
540                         istringstream iss (tempBuf,istringstream::in);
541                         delete buf4;
542
543                         Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
544                                 
545                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
546                                 
547                                 if      (candidateSeq->getAligned().length() != templateSeqsLength) {  //chimeracheck does not require seqs to be aligned
548                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
549                                 }else{
550                                         //find chimeras
551                                         chimera->getChimeras(candidateSeq);
552                                         
553                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
554                 
555                                         //print results
556                                         bool isChimeric = chimera->print(outMPI, outAccMPI);
557                                 }
558                         }
559                         delete candidateSeq;
560                         
561                         //report progress
562                         if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
563                 }
564                 //report progress
565                 if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
566                 
567                                 
568                 return 0;
569         }
570         catch(exception& e) {
571                 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
572                 exit(1);
573         }
574 }
575 #endif
576
577 /**************************************************************************************************/
578
579 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
580         try {
581 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
582                 int process = 0;
583                 int num = 0;
584                 
585                 //loop through and create all the processes you want
586                 while (process != processors) {
587                         int pid = fork();
588                         
589                         if (pid > 0) {
590                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
591                                 process++;
592                         }else if (pid == 0){
593                                 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
594                                 
595                                 //pass numSeqs to parent
596                                 ofstream out;
597                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
598                                 m->openOutputFile(tempFile, out);
599                                 out << num << endl;
600                                 out.close();
601
602                                 exit(0);
603                         }else { m->mothurOut("unable to spawn the necessary processes."); m->mothurOutEndLine(); exit(0); }
604                 }
605                 
606                 //force parent to wait until all the processes are done
607                 for (int i=0;i<processors;i++) { 
608                         int temp = processIDS[i];
609                         wait(&temp);
610                 }
611                 
612                 for (int i = 0; i < processIDS.size(); i++) {
613                         ifstream in;
614                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
615                         m->openInputFile(tempFile, in);
616                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
617                         in.close(); remove(tempFile.c_str());
618                 }
619                 
620                 return num;
621 #endif          
622         }
623         catch(exception& e) {
624                 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
625                 exit(1);
626         }
627 }
628
629 /**************************************************************************************************/
630
631