2 * chimerapintailcommand.cpp
5 * Created by westcott on 4/1/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "chimerapintailcommand.h"
13 //**********************************************************************************************************************
14 vector<string> ChimeraPintailCommand::setParameters(){
16 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
17 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18 CommandParameter pconservation("conservation", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pconservation);
19 CommandParameter pquantile("quantile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pquantile);
20 CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfilter);
21 CommandParameter pwindow("window", "Number", "", "0", "", "", "",false,false); parameters.push_back(pwindow);
22 CommandParameter pincrement("increment", "Number", "", "25", "", "", "",false,false); parameters.push_back(pincrement);
23 CommandParameter pmask("mask", "String", "", "", "", "", "",false,false); parameters.push_back(pmask);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28 vector<string> myArray;
29 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
33 m->errorOut(e, "ChimeraPintailCommand", "setParameters");
37 //**********************************************************************************************************************
38 string ChimeraPintailCommand::getHelpString(){
40 string helpString = "";
41 helpString += "The chimera.pintail command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
42 helpString += "This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n";
43 helpString += "The chimera.pintail command parameters are fasta, reference, filter, mask, processors, window, increment, conservation and quantile.\n";
44 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
45 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
46 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n";
47 helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
48 helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied. You can apply an ecoli mask by typing, mask=default. \n";
49 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
51 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
53 helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
54 helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
55 helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
56 helpString += "The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n";
57 helpString += "The chimera.pintail command should be in the following format: \n";
58 helpString += "chimera.pintail(fasta=yourFastaFile, reference=yourTemplate) \n";
59 helpString += "Example: chimera.pintail(fasta=AD.align, reference=silva.bacteria.fasta) \n";
60 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
64 m->errorOut(e, "ChimeraPintailCommand", "getHelpString");
68 //**********************************************************************************************************************
69 ChimeraPintailCommand::ChimeraPintailCommand(){
71 abort = true; calledHelp = true;
73 vector<string> tempOutNames;
74 outputTypes["chimera"] = tempOutNames;
75 outputTypes["accnos"] = tempOutNames;
78 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
82 //***************************************************************************************************************
83 ChimeraPintailCommand::ChimeraPintailCommand(string option) {
85 abort = false; calledHelp = false;
87 //allow user to run help
88 if(option == "help") { help(); abort = true; calledHelp = true; }
89 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
92 vector<string> myArray = setParameters();
94 OptionParser parser(option);
95 map<string,string> parameters = parser.getParameters();
97 ValidParameters validParameter("chimera.pintail");
98 map<string,string>::iterator it;
100 //check to make sure all parameters are valid for command
101 for (it = parameters.begin(); it != parameters.end(); it++) {
102 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
105 vector<string> tempOutNames;
106 outputTypes["chimera"] = tempOutNames;
107 outputTypes["accnos"] = tempOutNames;
109 //if the user changes the input directory command factory will send this info to us in the output parameter
110 inputDir = validParameter.validFile(parameters, "inputdir", false);
111 if (inputDir == "not found"){ inputDir = ""; }
114 it = parameters.find("reference");
115 //user has given a template file
116 if(it != parameters.end()){
117 path = m->hasPath(it->second);
118 //if the user has not given a path then, add inputdir. else leave path alone.
119 if (path == "") { parameters["reference"] = inputDir + it->second; }
122 it = parameters.find("conservation");
123 //user has given a template file
124 if(it != parameters.end()){
125 path = m->hasPath(it->second);
126 //if the user has not given a path then, add inputdir. else leave path alone.
127 if (path == "") { parameters["conservation"] = inputDir + it->second; }
130 it = parameters.find("quantile");
131 //user has given a template file
132 if(it != parameters.end()){
133 path = m->hasPath(it->second);
134 //if the user has not given a path then, add inputdir. else leave path alone.
135 if (path == "") { parameters["quantile"] = inputDir + it->second; }
140 //check for required parameters
141 fastafile = validParameter.validFile(parameters, "fasta", false);
142 if (fastafile == "not found") {
143 //if there is a current fasta file, use it
144 string filename = m->getFastaFile();
145 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
146 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
148 m->splitAtDash(fastafile, fastaFileNames);
150 //go through files and make sure they are good, if not, then disregard them
151 for (int i = 0; i < fastaFileNames.size(); i++) {
154 if (fastaFileNames[i] == "current") {
155 fastaFileNames[i] = m->getFastaFile();
156 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
158 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
159 //erase from file list
160 fastaFileNames.erase(fastaFileNames.begin()+i);
167 if (inputDir != "") {
168 string path = m->hasPath(fastaFileNames[i]);
169 //if the user has not given a path then, add inputdir. else leave path alone.
170 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
176 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
178 //if you can't open it, try default location
179 if (ableToOpen == 1) {
180 if (m->getDefaultPath() != "") { //default path is set
181 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
182 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
184 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
186 fastaFileNames[i] = tryPath;
190 if (ableToOpen == 1) {
191 if (m->getOutputDir() != "") { //default path is set
192 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
193 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
195 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
197 fastaFileNames[i] = tryPath;
203 if (ableToOpen == 1) {
204 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
205 //erase from file list
206 fastaFileNames.erase(fastaFileNames.begin()+i);
209 m->setFastaFile(fastaFileNames[i]);
214 //make sure there is at least one valid file left
215 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
219 temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; }
220 filter = m->isTrue(temp);
222 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
223 m->setProcessors(temp);
224 convert(temp, processors);
226 temp = validParameter.validFile(parameters, "window", false); if (temp == "not found") { temp = "0"; }
227 convert(temp, window);
229 temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "25"; }
230 convert(temp, increment);
232 maskfile = validParameter.validFile(parameters, "mask", false);
233 if (maskfile == "not found") { maskfile = ""; }
234 else if (maskfile != "default") {
235 if (inputDir != "") {
236 string path = m->hasPath(maskfile);
237 //if the user has not given a path then, add inputdir. else leave path alone.
238 if (path == "") { maskfile = inputDir + maskfile; }
242 int ableToOpen = m->openInputFile(maskfile, in, "no error");
243 if (ableToOpen == 1) {
244 if (m->getDefaultPath() != "") { //default path is set
245 string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
246 m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
248 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
254 if (ableToOpen == 1) {
255 if (m->getOutputDir() != "") { //default path is set
256 string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
257 m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
259 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
267 if (ableToOpen == 1) {
268 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine();
274 //if the user changes the output directory command factory will send this info to us in the output parameter
275 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
277 templatefile = validParameter.validFile(parameters, "reference", true);
278 if (templatefile == "not open") { abort = true; }
279 else if (templatefile == "not found") { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true; }
281 consfile = validParameter.validFile(parameters, "conservation", true);
282 if (consfile == "not open") { abort = true; }
283 else if (consfile == "not found") {
286 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
287 ifstream FileTest(tempConsFile.c_str());
289 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
291 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest.close();
294 string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
295 ifstream FileTest2(tempConsFile.c_str());
297 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
299 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest2.close();
305 quanfile = validParameter.validFile(parameters, "quantile", true);
306 if (quanfile == "not open") { abort = true; }
307 else if (quanfile == "not found") { quanfile = ""; }
310 catch(exception& e) {
311 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
315 //***************************************************************************************************************
317 int ChimeraPintailCommand::execute(){
320 if (abort == true) { if (calledHelp) { return 0; } return 2; }
322 for (int s = 0; s < fastaFileNames.size(); s++) {
324 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
326 int start = time(NULL);
329 if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine(); }
331 //check for quantile to save the time
332 string tempQuan = "";
333 if ((!filter) && (maskfile == "")) {
334 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
335 }else if ((!filter) && (maskfile != "")) {
336 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
337 }else if ((filter) && (maskfile != "")) {
338 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
339 }else if ((filter) && (maskfile == "")) {
340 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
343 ifstream FileTest(tempQuan.c_str());
345 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
347 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest.close();
350 string tryPath = m->getDefaultPath();
351 string tempQuan = "";
352 if ((!filter) && (maskfile == "")) {
353 tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
354 }else if ((!filter) && (maskfile != "")) {
355 tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
356 }else if ((filter) && (maskfile != "")) {
357 tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
358 }else if ((filter) && (maskfile == "")) {
359 tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
362 ifstream FileTest2(tempQuan.c_str());
364 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
366 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest2.close();
371 chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
373 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
374 string outputFileName, accnosFileName;
375 if (maskfile != "") {
376 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
377 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
379 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "pintail.chimeras";
380 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "pintail.accnos";
383 if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
385 if (chimera->getUnaligned()) {
386 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine();
390 templateSeqsLength = chimera->getLength();
393 int pid, numSeqsPerProcessor;
395 vector<unsigned long int> MPIPos;
398 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
399 MPI_Comm_size(MPI_COMM_WORLD, &processors);
403 MPI_File outMPIAccnos;
405 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
406 int inMode=MPI_MODE_RDONLY;
408 char outFilename[1024];
409 strcpy(outFilename, outputFileName.c_str());
411 char outAccnosFilename[1024];
412 strcpy(outAccnosFilename, accnosFileName.c_str());
414 char inFileName[1024];
415 strcpy(inFileName, fastaFileNames[s].c_str());
417 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
418 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
419 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
421 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
423 if (pid == 0) { //you are the root process
425 MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
427 //send file positions to all processes
428 for(int i = 1; i < processors; i++) {
429 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
430 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
433 //figure out how many sequences you have to align
434 numSeqsPerProcessor = numSeqs / processors;
435 int startIndex = pid * numSeqsPerProcessor;
436 if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
439 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
441 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
443 }else{ //you are a child process
444 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
445 MPIPos.resize(numSeqs+1);
446 MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
448 //figure out how many sequences you have to align
449 numSeqsPerProcessor = numSeqs / processors;
450 int startIndex = pid * numSeqsPerProcessor;
451 if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
454 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
456 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
460 MPI_File_close(&inMPI);
461 MPI_File_close(&outMPI);
462 MPI_File_close(&outMPIAccnos);
463 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
465 vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
467 for (int i = 0; i < (positions.size()-1); i++) {
468 lines.push_back(new linePair(positions[i], positions[(i+1)]));
472 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
475 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
477 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
480 processIDS.resize(0);
482 numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName);
484 rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
485 rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
487 //append output files
488 for(int i=1;i<processors;i++){
489 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
490 remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
493 //append output files
494 for(int i=1;i<processors;i++){
495 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
496 remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
499 if (m->control_pressed) {
500 remove(outputFileName.c_str());
501 remove(accnosFileName.c_str());
502 for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } outputTypes.clear();
503 for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
510 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
512 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
518 for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
520 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
521 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
523 m->mothurOutEndLine();
524 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
527 //set accnos file as new current accnosfile
529 itTypes = outputTypes.find("accnos");
530 if (itTypes != outputTypes.end()) {
531 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
534 m->mothurOutEndLine();
535 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
536 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
537 m->mothurOutEndLine();
542 catch(exception& e) {
543 m->errorOut(e, "ChimeraPintailCommand", "execute");
547 //**********************************************************************************************************************
549 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
552 m->openOutputFile(outputFName, out);
555 m->openOutputFile(accnos, out2);
558 m->openInputFile(filename, inFASTA);
560 inFASTA.seekg(filePos->start);
567 if (m->control_pressed) { return 1; }
569 Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA);
571 if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
573 if (candidateSeq->getAligned().length() != templateSeqsLength) { //chimeracheck does not require seqs to be aligned
574 m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
577 chimera->getChimeras(candidateSeq);
579 if (m->control_pressed) { delete candidateSeq; return 1; }
582 chimera->print(out, out2);
588 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
589 unsigned long int pos = inFASTA.tellg();
590 if ((pos == -1) || (pos >= filePos->end)) { break; }
592 if (inFASTA.eof()) { break; }
596 if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
599 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
607 catch(exception& e) {
608 m->errorOut(e, "ChimeraPintailCommand", "driver");
612 //**********************************************************************************************************************
614 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
619 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
621 for(int i=0;i<num;i++){
623 if (m->control_pressed) { return 1; }
626 int length = MPIPos[start+i+1] - MPIPos[start+i];
628 char* buf4 = new char[length];
629 MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
631 string tempBuf = buf4;
632 if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
633 istringstream iss (tempBuf,istringstream::in);
636 Sequence* candidateSeq = new Sequence(iss); m->gobble(iss);
638 if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
640 if (candidateSeq->getAligned().length() != templateSeqsLength) { //chimeracheck does not require seqs to be aligned
641 m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
644 chimera->getChimeras(candidateSeq);
646 if (m->control_pressed) { delete candidateSeq; return 1; }
649 chimera->print(outMPI, outAccMPI);
655 if((i+1) % 100 == 0){ cout << "Processing sequence: " << (i+1) << endl; m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n"); }
658 if(num % 100 != 0){ cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n"); }
663 catch(exception& e) {
664 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
670 /**************************************************************************************************/
672 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
674 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
678 //loop through and create all the processes you want
679 while (process != processors) {
683 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
686 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
688 //pass numSeqs to parent
690 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
691 m->openOutputFile(tempFile, out);
697 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
698 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
703 //force parent to wait until all the processes are done
704 for (int i=0;i<processors;i++) {
705 int temp = processIDS[i];
709 for (int i = 0; i < processIDS.size(); i++) {
711 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
712 m->openInputFile(tempFile, in);
713 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
714 in.close(); remove(tempFile.c_str());
720 catch(exception& e) {
721 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
726 /**************************************************************************************************/