]> git.donarmstrong.com Git - mothur.git/blob - chimeraccodecommand.cpp
added load.logfile command. changed summary.single output for subsample=t.
[mothur.git] / chimeraccodecommand.cpp
1 /*
2  *  chimeraccodecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 3/30/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "chimeraccodecommand.h"
11 #include "ccode.h"
12 #include "referencedb.h"
13 //**********************************************************************************************************************
14 vector<string> ChimeraCcodeCommand::setParameters(){    
15         try {
16                 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
17                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18                 CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfilter);
19                 CommandParameter pwindow("window", "Number", "", "0", "", "", "",false,false); parameters.push_back(pwindow);
20                 CommandParameter pnumwanted("numwanted", "Number", "", "20", "", "", "",false,false); parameters.push_back(pnumwanted);
21                 CommandParameter pmask("mask", "String", "", "", "", "", "",false,false); parameters.push_back(pmask);
22                 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25                 CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
26                 
27                 vector<string> myArray;
28                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
29                 return myArray;
30         }
31         catch(exception& e) {
32                 m->errorOut(e, "ChimeraCcodeCommand", "setParameters");
33                 exit(1);
34         }
35 }
36 //**********************************************************************************************************************
37 string ChimeraCcodeCommand::getHelpString(){    
38         try {
39                 string helpString = "";
40                 helpString += "The chimera.ccode command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
41                 helpString += "This command was created using the algorythms described in the 'Evaluating putative chimeric sequences from PCR-amplified products' paper by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez.\n";
42                 helpString += "The chimera.ccode command parameters are fasta, reference, filter, mask, processors, window and numwanted.\n";
43                 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
44                 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
45                 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n";
46                 helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
47                 helpString += "The processors parameter allows you to specify how many processors you would like to use.  The default is 1. \n";
48 #ifdef USE_MPI
49                 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
50 #endif
51                 helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n";
52                 helpString += "The window parameter allows you to specify the window size for searching for chimeras. \n";
53                 helpString += "The numwanted parameter allows you to specify how many sequences you would each query sequence compared with.\n";
54                 helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
55                 helpString += "The chimera.ccode command should be in the following format: \n";
56                 helpString += "chimera.ccode(fasta=yourFastaFile, reference=yourTemplate) \n";
57                 helpString += "Example: chimera.ccode(fasta=AD.align, reference=core_set_aligned.imputed.fasta) \n";
58                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";       
59                 return helpString;
60         }
61         catch(exception& e) {
62                 m->errorOut(e, "ChimeraCcodeCommand", "getHelpString");
63                 exit(1);
64         }
65 }
66 //**********************************************************************************************************************
67 string ChimeraCcodeCommand::getOutputFileNameTag(string type, string inputName=""){     
68         try {
69         string outputFileName = "";
70                 map<string, vector<string> >::iterator it;
71         
72         //is this a type this command creates
73         it = outputTypes.find(type);
74         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
75         else {
76             if (type == "chimera") {  outputFileName =  "ccode.chimeras"; }
77             else if (type == "mapinfo") {  outputFileName =  "mapinfo"; }
78             else if (type == "accnos") {  outputFileName =  "ccode.accnos"; }
79             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
80         }
81         return outputFileName;
82         }
83         catch(exception& e) {
84                 m->errorOut(e, "ChimeraCcodeCommand", "getOutputFileNameTag");
85                 exit(1);
86         }
87 }
88 //**********************************************************************************************************************
89 ChimeraCcodeCommand::ChimeraCcodeCommand(){     
90         try {
91                 abort = true; calledHelp = true;
92                 setParameters();
93                 vector<string> tempOutNames;
94                 outputTypes["chimera"] = tempOutNames;
95                 outputTypes["mapinfo"] = tempOutNames;
96                 outputTypes["accnos"] = tempOutNames;
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "ChimeraCcodeCommand", "ChimeraCcodeCommand");
100                 exit(1);
101         }
102 }
103 //***************************************************************************************************************
104 ChimeraCcodeCommand::ChimeraCcodeCommand(string option)  {
105         try {
106                 abort = false; calledHelp = false;   
107                 ReferenceDB* rdb = ReferenceDB::getInstance();
108                 
109                 //allow user to run help
110                 if(option == "help") { help(); abort = true; calledHelp = true; }
111                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
112                 
113                 else {
114                         vector<string> myArray = setParameters();
115                         
116                         OptionParser parser(option);
117                         map<string,string> parameters = parser.getParameters();
118                         
119                         ValidParameters validParameter("chimera.ccode");
120                         map<string,string>::iterator it;
121                         
122                         //check to make sure all parameters are valid for command
123                         for (it = parameters.begin(); it != parameters.end(); it++) { 
124                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
125                         }
126                         
127                         vector<string> tempOutNames;
128                         outputTypes["chimera"] = tempOutNames;
129                         outputTypes["mapinfo"] = tempOutNames;
130                         outputTypes["accnos"] = tempOutNames;
131                         
132                         //if the user changes the input directory command factory will send this info to us in the output parameter 
133                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
134                         if (inputDir == "not found"){   inputDir = "";          }
135                         else {
136                                 string path;
137                                 it = parameters.find("reference");
138                                 //user has given a template file
139                                 if(it != parameters.end()){ 
140                                         path = m->hasPath(it->second);
141                                         //if the user has not given a path then, add inputdir. else leave path alone.
142                                         if (path == "") {       parameters["reference"] = inputDir + it->second;                }
143                                 }
144                         }
145
146                         //check for required parameters
147                         fastafile = validParameter.validFile(parameters, "fasta", false);
148                         if (fastafile == "not found") {                                 //if there is a current fasta file, use it
149                                 string filename = m->getFastaFile(); 
150                                 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
151                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
152                         }else { 
153                                 m->splitAtDash(fastafile, fastaFileNames);
154                                 
155                                 //go through files and make sure they are good, if not, then disregard them
156                                 for (int i = 0; i < fastaFileNames.size(); i++) {
157                                         
158                                         bool ignore = false;
159                                         if (fastaFileNames[i] == "current") { 
160                                                 fastaFileNames[i] = m->getFastaFile(); 
161                                                 if (fastaFileNames[i] != "") {  m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
162                                                 else {  
163                                                         m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true; 
164                                                         //erase from file list
165                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
166                                                         i--;
167                                                 }
168                                         }
169                                         
170                                         if (!ignore) {
171                                         
172                                                 if (inputDir != "") {
173                                                         string path = m->hasPath(fastaFileNames[i]);
174                                                         //if the user has not given a path then, add inputdir. else leave path alone.
175                                                         if (path == "") {       fastaFileNames[i] = inputDir + fastaFileNames[i];               }
176                                                 }
177                 
178                                                 int ableToOpen;
179                                                 ifstream in;
180                                                 
181                                                 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
182                                         
183                                                 //if you can't open it, try default location
184                                                 if (ableToOpen == 1) {
185                                                         if (m->getDefaultPath() != "") { //default path is set
186                                                                 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
187                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
188                                                                 ifstream in2;
189                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
190                                                                 in2.close();
191                                                                 fastaFileNames[i] = tryPath;
192                                                         }
193                                                 }
194                                                 
195                                                 //if you can't open it, try default location
196                                                 if (ableToOpen == 1) {
197                                                         if (m->getOutputDir() != "") { //default path is set
198                                                                 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
199                                                                 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
200                                                                 ifstream in2;
201                                                                 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
202                                                                 in2.close();
203                                                                 fastaFileNames[i] = tryPath;
204                                                         }
205                                                 }
206                                                 
207                                                 in.close();
208                                                 
209                                                 if (ableToOpen == 1) { 
210                                                         m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine(); 
211                                                         //erase from file list
212                                                         fastaFileNames.erase(fastaFileNames.begin()+i);
213                                                         i--;
214                                                 }else {
215                                                         m->setFastaFile(fastaFileNames[i]);
216                                                 }
217                                         }
218                                 }
219                                 
220                                 //make sure there is at least one valid file left
221                                 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
222                         }
223                         
224                         //if the user changes the output directory command factory will send this info to us in the output parameter 
225                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
226                         
227                         maskfile = validParameter.validFile(parameters, "mask", false);
228                         if (maskfile == "not found") { maskfile = "";  }        
229                         else if (maskfile != "default")  { 
230                                 if (inputDir != "") {
231                                         string path = m->hasPath(maskfile);
232                                         //if the user has not given a path then, add inputdir. else leave path alone.
233                                         if (path == "") {       maskfile = inputDir + maskfile;         }
234                                 }
235
236                                 ifstream in;
237                                 int     ableToOpen = m->openInputFile(maskfile, in);
238                                 if (ableToOpen == 1) { abort = true; }
239                                 in.close();
240                         }
241                         
242                         string temp;
243                         temp = validParameter.validFile(parameters, "filter", false);                   if (temp == "not found") { temp = "F"; }
244                         filter = m->isTrue(temp);
245                         
246                         temp = validParameter.validFile(parameters, "processors", false);       if (temp == "not found"){       temp = m->getProcessors();      }
247                         m->setProcessors(temp);
248                         m->mothurConvert(temp, processors);
249                         
250                         temp = validParameter.validFile(parameters, "window", false);                   if (temp == "not found") { temp = "0"; }
251                         m->mothurConvert(temp, window);
252                         
253                         temp = validParameter.validFile(parameters, "numwanted", false);                if (temp == "not found") { temp = "20"; }
254                         m->mothurConvert(temp, numwanted);
255                         
256                         temp = validParameter.validFile(parameters, "save", false);                     if (temp == "not found"){       temp = "f";                             }
257                         save = m->isTrue(temp); 
258                         rdb->save = save; 
259                         if (save) { //clear out old references
260                                 rdb->clearMemory();     
261                         }
262                         
263                         //this has to go after save so that if the user sets save=t and provides no reference we abort
264                         templatefile = validParameter.validFile(parameters, "reference", true);
265                         if (templatefile == "not found") { 
266                                 //check for saved reference sequences
267                                 if (rdb->referenceSeqs.size() != 0) {
268                                         templatefile = "saved";
269                                 }else {
270                                         m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required."); 
271                                         m->mothurOutEndLine();
272                                         abort = true; 
273                                 }
274                         }else if (templatefile == "not open") { abort = true; } 
275                         else {  if (save) {     rdb->setSavedReference(templatefile);   }       }
276                         
277
278                 }
279         }
280         catch(exception& e) {
281                 m->errorOut(e, "ChimeraCcodeCommand", "ChimeraCcodeCommand");
282                 exit(1);
283         }
284 }
285 //***************************************************************************************************************
286 int ChimeraCcodeCommand::execute(){
287         try{
288                 
289                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
290                 
291                 for (int s = 0; s < fastaFileNames.size(); s++) {
292                                 
293                         m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
294                 
295                         int start = time(NULL); 
296                         
297                         //set user options
298                         if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine();  }
299
300                         chimera = new Ccode(fastaFileNames[s], templatefile, filter, maskfile, window, numwanted, outputDir);   
301                         
302                         //is your template aligned?
303                         if (chimera->getUnaligned()) { m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); delete chimera; return 0; }
304                         templateSeqsLength = chimera->getLength();
305                         
306                         if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]);  }//if user entered a file with a path then preserve it
307                         string outputFileName, accnosFileName;
308                         if (maskfile != "") {
309                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + maskfile + getOutputFileNameTag("chimera");
310                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + maskfile + getOutputFileNameTag("accnos");
311                         }else {
312                                 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + getOutputFileNameTag("chimera");
313                                 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]))  + getOutputFileNameTag("accnos");
314
315                         }
316
317                         string mapInfo = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("mapinfo");
318
319                         
320                         if (m->control_pressed) { delete chimera;  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        } outputTypes.clear(); return 0;        }
321                         
322                 #ifdef USE_MPI
323                 
324                                 int pid, numSeqsPerProcessor; 
325                                 int tag = 2001;
326                                 vector<unsigned long long> MPIPos;
327                                                                 
328                                 MPI_Status status; 
329                                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
330                                 MPI_Comm_size(MPI_COMM_WORLD, &processors); 
331
332                                 MPI_File inMPI;
333                                 MPI_File outMPI;
334                                 MPI_File outMPIAccnos;
335                                 
336                                 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY; 
337                                 int inMode=MPI_MODE_RDONLY; 
338                                 
339                                 char outFilename[1024];
340                                 strcpy(outFilename, outputFileName.c_str());
341                                 
342                                 char outAccnosFilename[1024];
343                                 strcpy(outAccnosFilename, accnosFileName.c_str());
344                                 
345                                 char inFileName[1024];
346                                 strcpy(inFileName, fastaFileNames[s].c_str());
347
348                                 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI);  //comm, filename, mode, info, filepointer
349                                 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
350                                 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
351
352                                 if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        } outputTypes.clear();  delete chimera; return 0;  }
353                         
354                                 if (pid == 0) { //you are the root process 
355                                         string outTemp = "For full window mapping info refer to " + mapInfo + "\n";
356                                         
357                                         //print header
358                                         int length = outTemp.length();
359                                         char* buf2 = new char[length];
360                                         memcpy(buf2, outTemp.c_str(), length);
361
362                                         MPI_File_write_shared(outMPI, buf2, length, MPI_CHAR, &status);
363                                         delete buf2;
364
365                                         MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
366                                         
367                                         //send file positions to all processes
368                                         for(int i = 1; i < processors; i++) { 
369                                                 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
370                                                 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
371                                         }
372                                         
373                                         //figure out how many sequences you have to align
374                                         numSeqsPerProcessor = numSeqs / processors;
375                                         int startIndex =  pid * numSeqsPerProcessor;
376                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
377                                         
378                                 
379                                         //align your part
380                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
381                                         
382                                         if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  m->mothurRemove(outputFileName);  m->mothurRemove(accnosFileName);  for (int j = 0; j < outputNames.size(); j++) {       m->mothurRemove(outputNames[j]);        } outputTypes.clear();  delete chimera; return 0;  }
383
384                                 }else{ //you are a child process
385                                         MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
386                                         MPIPos.resize(numSeqs+1);
387                                         MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
388                                         
389                                         //figure out how many sequences you have to align
390                                         numSeqsPerProcessor = numSeqs / processors;
391                                         int startIndex =  pid * numSeqsPerProcessor;
392                                         if(pid == (processors - 1)){    numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor;      }
393                                         
394                                         
395                                         //align your part
396                                         driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
397                                         
398                                         if (m->control_pressed) {  MPI_File_close(&inMPI);  MPI_File_close(&outMPI);   MPI_File_close(&outMPIAccnos);  for (int j = 0; j < outputNames.size(); j++) {   m->mothurRemove(outputNames[j]);        }  outputTypes.clear(); delete chimera; return 0;  }
399                                 }
400                                 
401                                 //close files 
402                                 MPI_File_close(&inMPI);
403                                 MPI_File_close(&outMPI);
404                                 MPI_File_close(&outMPIAccnos);
405                                 
406                                 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
407                                         
408                 #else
409                         ofstream outHeader;
410                         string tempHeader = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + maskfile + "ccode.chimeras.tempHeader";
411                         m->openOutputFile(tempHeader, outHeader);
412                         
413                         outHeader << "For full window mapping info refer to " << mapInfo << endl << endl;
414
415                         outHeader.close();
416                         
417                         
418                         
419                         //break up file
420                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
421                                 vector<unsigned long long> positions = m->divideFile(fastaFileNames[s], processors);
422                         
423                                 for (int i = 0; i < (positions.size()-1); i++) {
424                                         lines.push_back(new linePair(positions[i], positions[(i+1)]));
425                                 }       
426                         
427                                 if(processors == 1){
428                                                                                 
429                                         numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
430                                         
431                                         if (m->control_pressed) { m->mothurRemove(outputFileName); m->mothurRemove(tempHeader); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  } outputTypes.clear();  lines.clear(); delete chimera; return 0; }
432                                         
433                                 }else{
434                                         processIDS.resize(0);
435                                         
436                                         numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName); 
437                                 
438                                         rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
439                                         rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
440                                                 
441                                         //append output files
442                                         for(int i=1;i<processors;i++){
443                                                 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
444                                                 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
445                                         }
446                                         
447                                         //append output files
448                                         for(int i=1;i<processors;i++){
449                                                 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
450                                                 m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp"));
451                                         }
452                                         
453                                         if (m->control_pressed) { 
454                                                 m->mothurRemove(outputFileName); 
455                                                 m->mothurRemove(accnosFileName);
456                                                 for (int j = 0; j < outputNames.size(); j++) {  m->mothurRemove(outputNames[j]);        } outputTypes.clear();
457                                                 for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
458                                                 delete chimera;
459                                                 return 0;
460                                         }
461
462                                 }
463
464                         #else
465                                 lines.push_back(new linePair(0, 1000));
466                                 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
467                                 
468                                 if (m->control_pressed) { m->mothurRemove(outputFileName); m->mothurRemove(tempHeader); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]);        } for (int i = 0; i < lines.size(); i++) {  delete lines[i];  } outputTypes.clear();  lines.clear(); delete chimera; return 0; }
469                                 
470                         #endif
471         
472                         m->appendFiles(outputFileName, tempHeader);
473                 
474                         m->mothurRemove(outputFileName);
475                         rename(tempHeader.c_str(), outputFileName.c_str());
476                 #endif
477                 
478                         delete chimera;
479                         
480                         outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
481                         outputNames.push_back(mapInfo); outputTypes["mapinfo"].push_back(mapInfo);
482                         outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
483                          
484                         for (int i = 0; i < lines.size(); i++) {  delete lines[i];  }  lines.clear();
485                         
486                         m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
487                 }
488                 
489                 
490                 //set accnos file as new current accnosfile
491                 string current = "";
492                 itTypes = outputTypes.find("accnos");
493                 if (itTypes != outputTypes.end()) {
494                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
495                 }
496                 
497                 m->mothurOutEndLine();
498                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
499                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }       
500                 m->mothurOutEndLine();
501                 
502                 return 0;
503                 
504         }
505         catch(exception& e) {
506                 m->errorOut(e, "ChimeraCcodeCommand", "execute");
507                 exit(1);
508         }
509 }
510 //**********************************************************************************************************************
511
512 int ChimeraCcodeCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
513         try {
514                 ofstream out;
515                 m->openOutputFile(outputFName, out);
516                 
517                 ofstream out2;
518                 m->openOutputFile(accnos, out2);
519                 
520                 ifstream inFASTA;
521                 m->openInputFile(filename, inFASTA);
522
523                 inFASTA.seekg(filePos->start);
524
525                 bool done = false;
526                 int count = 0;
527         
528                 while (!done) {
529                 
530                         if (m->control_pressed) {       return 1;       }
531                 
532                         Sequence* candidateSeq = new Sequence(inFASTA);  m->gobble(inFASTA);
533                                 
534                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
535                                 
536                                 if (candidateSeq->getAligned().length() != templateSeqsLength) {  
537                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
538                                 }else{
539                                         //find chimeras
540                                         chimera->getChimeras(candidateSeq);
541                                         
542                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
543                 
544                                         //print results
545                                         chimera->print(out, out2);
546                                 }
547                                 count++;
548                         }
549                         delete candidateSeq;
550                         
551                         #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
552                                 unsigned long long pos = inFASTA.tellg();
553                                 if ((pos == -1) || (pos >= filePos->end)) { break; }
554                         #else
555                                 if (inFASTA.eof()) { break; }
556                         #endif
557                         
558                         //report progress
559                         if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
560                 }
561                 //report progress
562                 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine();         }
563                 
564                 out.close();
565                 out2.close();
566                 inFASTA.close();
567                                 
568                 return count;
569         }
570         catch(exception& e) {
571                 m->errorOut(e, "ChimeraCcodeCommand", "driver");
572                 exit(1);
573         }
574 }
575 //**********************************************************************************************************************
576 #ifdef USE_MPI
577 int ChimeraCcodeCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long long>& MPIPos){
578         try {
579                                 
580                 MPI_Status status; 
581                 int pid;
582                 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
583                 
584                 for(int i=0;i<num;i++){
585                 
586                         if (m->control_pressed) { return 0; }
587                         
588                         //read next sequence
589                         int length = MPIPos[start+i+1] - MPIPos[start+i];
590         
591                         char* buf4 = new char[length];
592                                 
593                         MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
594                         
595                         string tempBuf = buf4;
596                         if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length);  }
597                         istringstream iss (tempBuf,istringstream::in);
598                         delete buf4;
599
600                         Sequence* candidateSeq = new Sequence(iss);  m->gobble(iss);
601                                 
602                         if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
603                                 
604                                 if (candidateSeq->getAligned().length() != templateSeqsLength) {  
605                                         m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
606                                 }else{
607                                         //find chimeras
608                                         chimera->getChimeras(candidateSeq);
609                                         
610                                         if (m->control_pressed) {       delete candidateSeq; return 1;  }
611                 
612                                         //print results
613                                         chimera->print(outMPI, outAccMPI);
614                                 }
615                         }
616                         delete candidateSeq;
617                         
618                         //report progress
619                         if((i+1) % 100 == 0){  cout << "Processing sequence: " << (i+1) << endl;        m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n");          }
620                 }
621                 //report progress
622                 if(num % 100 != 0){             cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n");  }
623                 
624                                 
625                 return 0;
626         }
627         catch(exception& e) {
628                 m->errorOut(e, "ChimeraCcodeCommand", "driverMPI");
629                 exit(1);
630         }
631 }
632 #endif
633
634 /**************************************************************************************************/
635
636 int ChimeraCcodeCommand::createProcesses(string outputFileName, string filename, string accnos) {
637         try {
638 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
639                 int process = 0;
640                 int num = 0;
641                 
642                 //loop through and create all the processes you want
643                 while (process != processors) {
644                         int pid = fork();
645                         
646                         if (pid > 0) {
647                                 processIDS.push_back(pid);  //create map from line number to pid so you can append files in correct order later
648                                 process++;
649                         }else if (pid == 0){
650                                 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
651                                 
652                                 //pass numSeqs to parent
653                                 ofstream out;
654                                 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
655                                 m->openOutputFile(tempFile, out);
656                                 out << num << endl;
657                                 out.close();
658
659                                 exit(0);
660                         }else { 
661                                 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); 
662                                 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
663                                 exit(0);
664                         }
665                 }
666                 
667                 //force parent to wait until all the processes are done
668                 for (int i=0;i<processors;i++) { 
669                         int temp = processIDS[i];
670                         wait(&temp);
671                 }
672                 
673                 for (int i = 0; i < processIDS.size(); i++) {
674                         ifstream in;
675                         string tempFile =  outputFileName + toString(processIDS[i]) + ".num.temp";
676                         m->openInputFile(tempFile, in);
677                         if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
678                         in.close(); m->mothurRemove(tempFile);
679                 }
680                 
681                 return num;
682 #endif          
683         }
684         catch(exception& e) {
685                 m->errorOut(e, "ChimeraCcodeCommand", "createProcesses");
686                 exit(1);
687         }
688 }
689 //**********************************************************************************************************************
690