2 * chimerapintailcommand.cpp
5 * Created by westcott on 4/1/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "chimerapintailcommand.h"
13 //**********************************************************************************************************************
14 vector<string> ChimeraPintailCommand::setParameters(){
16 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
17 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18 CommandParameter pconservation("conservation", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pconservation);
19 CommandParameter pquantile("quantile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pquantile);
20 CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfilter);
21 CommandParameter pwindow("window", "Number", "", "0", "", "", "",false,false); parameters.push_back(pwindow);
22 CommandParameter pincrement("increment", "Number", "", "25", "", "", "",false,false); parameters.push_back(pincrement);
23 CommandParameter pmask("mask", "String", "", "", "", "", "",false,false); parameters.push_back(pmask);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28 vector<string> myArray;
29 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
33 m->errorOut(e, "ChimeraPintailCommand", "setParameters");
37 //**********************************************************************************************************************
38 string ChimeraPintailCommand::getHelpString(){
40 string helpString = "";
41 helpString += "The chimera.pintail command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
42 helpString += "This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n";
43 helpString += "The chimera.pintail command parameters are fasta, reference, filter, mask, processors, window, increment, conservation and quantile.\n";
44 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
45 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
46 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n";
47 helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
48 helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied. You can apply an ecoli mask by typing, mask=default. \n";
49 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
51 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
53 helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
54 helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
55 helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
56 helpString += "The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n";
57 helpString += "The chimera.pintail command should be in the following format: \n";
58 helpString += "chimera.pintail(fasta=yourFastaFile, reference=yourTemplate) \n";
59 helpString += "Example: chimera.pintail(fasta=AD.align, reference=silva.bacteria.fasta) \n";
60 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
64 m->errorOut(e, "ChimeraPintailCommand", "getHelpString");
68 //**********************************************************************************************************************
69 ChimeraPintailCommand::ChimeraPintailCommand(){
71 abort = true; calledHelp = true;
73 vector<string> tempOutNames;
74 outputTypes["chimera"] = tempOutNames;
75 outputTypes["accnos"] = tempOutNames;
78 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
82 //***************************************************************************************************************
83 ChimeraPintailCommand::ChimeraPintailCommand(string option) {
85 abort = false; calledHelp = false;
87 //allow user to run help
88 if(option == "help") { help(); abort = true; calledHelp = true; }
91 vector<string> myArray = setParameters();
93 OptionParser parser(option);
94 map<string,string> parameters = parser.getParameters();
96 ValidParameters validParameter("chimera.pintail");
97 map<string,string>::iterator it;
99 //check to make sure all parameters are valid for command
100 for (it = parameters.begin(); it != parameters.end(); it++) {
101 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
104 vector<string> tempOutNames;
105 outputTypes["chimera"] = tempOutNames;
106 outputTypes["accnos"] = tempOutNames;
108 //if the user changes the input directory command factory will send this info to us in the output parameter
109 inputDir = validParameter.validFile(parameters, "inputdir", false);
110 if (inputDir == "not found"){ inputDir = ""; }
113 it = parameters.find("reference");
114 //user has given a template file
115 if(it != parameters.end()){
116 path = m->hasPath(it->second);
117 //if the user has not given a path then, add inputdir. else leave path alone.
118 if (path == "") { parameters["reference"] = inputDir + it->second; }
121 it = parameters.find("conservation");
122 //user has given a template file
123 if(it != parameters.end()){
124 path = m->hasPath(it->second);
125 //if the user has not given a path then, add inputdir. else leave path alone.
126 if (path == "") { parameters["conservation"] = inputDir + it->second; }
129 it = parameters.find("quantile");
130 //user has given a template file
131 if(it != parameters.end()){
132 path = m->hasPath(it->second);
133 //if the user has not given a path then, add inputdir. else leave path alone.
134 if (path == "") { parameters["quantile"] = inputDir + it->second; }
139 //check for required parameters
140 fastafile = validParameter.validFile(parameters, "fasta", false);
141 if (fastafile == "not found") {
142 //if there is a current fasta file, use it
143 string filename = m->getFastaFile();
144 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
145 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
147 m->splitAtDash(fastafile, fastaFileNames);
149 //go through files and make sure they are good, if not, then disregard them
150 for (int i = 0; i < fastaFileNames.size(); i++) {
153 if (fastaFileNames[i] == "current") {
154 fastaFileNames[i] = m->getFastaFile();
155 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
157 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
158 //erase from file list
159 fastaFileNames.erase(fastaFileNames.begin()+i);
166 if (inputDir != "") {
167 string path = m->hasPath(fastaFileNames[i]);
168 //if the user has not given a path then, add inputdir. else leave path alone.
169 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
175 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
177 //if you can't open it, try default location
178 if (ableToOpen == 1) {
179 if (m->getDefaultPath() != "") { //default path is set
180 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
181 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
183 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
185 fastaFileNames[i] = tryPath;
189 if (ableToOpen == 1) {
190 if (m->getOutputDir() != "") { //default path is set
191 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
192 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
194 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
196 fastaFileNames[i] = tryPath;
202 if (ableToOpen == 1) {
203 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
204 //erase from file list
205 fastaFileNames.erase(fastaFileNames.begin()+i);
211 //make sure there is at least one valid file left
212 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
216 temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; }
217 filter = m->isTrue(temp);
219 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
220 m->setProcessors(temp);
221 convert(temp, processors);
223 temp = validParameter.validFile(parameters, "window", false); if (temp == "not found") { temp = "0"; }
224 convert(temp, window);
226 temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "25"; }
227 convert(temp, increment);
229 maskfile = validParameter.validFile(parameters, "mask", false);
230 if (maskfile == "not found") { maskfile = ""; }
231 else if (maskfile != "default") {
232 if (inputDir != "") {
233 string path = m->hasPath(maskfile);
234 //if the user has not given a path then, add inputdir. else leave path alone.
235 if (path == "") { maskfile = inputDir + maskfile; }
239 int ableToOpen = m->openInputFile(maskfile, in, "no error");
240 if (ableToOpen == 1) {
241 if (m->getDefaultPath() != "") { //default path is set
242 string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
243 m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
245 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
251 if (ableToOpen == 1) {
252 if (m->getOutputDir() != "") { //default path is set
253 string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
254 m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
256 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
264 if (ableToOpen == 1) {
265 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine();
271 //if the user changes the output directory command factory will send this info to us in the output parameter
272 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
274 templatefile = validParameter.validFile(parameters, "reference", true);
275 if (templatefile == "not open") { abort = true; }
276 else if (templatefile == "not found") { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true; }
278 consfile = validParameter.validFile(parameters, "conservation", true);
279 if (consfile == "not open") { abort = true; }
280 else if (consfile == "not found") {
283 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
284 ifstream FileTest(tempConsFile.c_str());
286 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
288 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest.close();
291 string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
292 ifstream FileTest2(tempConsFile.c_str());
294 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
296 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest2.close();
302 quanfile = validParameter.validFile(parameters, "quantile", true);
303 if (quanfile == "not open") { abort = true; }
304 else if (quanfile == "not found") { quanfile = ""; }
307 catch(exception& e) {
308 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
312 //***************************************************************************************************************
314 int ChimeraPintailCommand::execute(){
317 if (abort == true) { if (calledHelp) { return 0; } return 2; }
319 for (int s = 0; s < fastaFileNames.size(); s++) {
321 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
323 int start = time(NULL);
326 if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine(); }
328 //check for quantile to save the time
329 string tempQuan = "";
330 if ((!filter) && (maskfile == "")) {
331 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
332 }else if ((!filter) && (maskfile != "")) {
333 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
334 }else if ((filter) && (maskfile != "")) {
335 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
336 }else if ((filter) && (maskfile == "")) {
337 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
340 ifstream FileTest(tempQuan.c_str());
342 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
344 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest.close();
347 string tryPath = m->getDefaultPath();
348 string tempQuan = "";
349 if ((!filter) && (maskfile == "")) {
350 tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
351 }else if ((!filter) && (maskfile != "")) {
352 tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
353 }else if ((filter) && (maskfile != "")) {
354 tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
355 }else if ((filter) && (maskfile == "")) {
356 tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
359 ifstream FileTest2(tempQuan.c_str());
361 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
363 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest2.close();
368 chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
370 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
371 string outputFileName, accnosFileName;
372 if (maskfile != "") {
373 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
374 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
376 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "pintail.chimeras";
377 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "pintail.accnos";
380 if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
382 if (chimera->getUnaligned()) {
383 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine();
387 templateSeqsLength = chimera->getLength();
390 int pid, numSeqsPerProcessor;
392 vector<unsigned long int> MPIPos;
395 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
396 MPI_Comm_size(MPI_COMM_WORLD, &processors);
400 MPI_File outMPIAccnos;
402 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
403 int inMode=MPI_MODE_RDONLY;
405 char outFilename[1024];
406 strcpy(outFilename, outputFileName.c_str());
408 char outAccnosFilename[1024];
409 strcpy(outAccnosFilename, accnosFileName.c_str());
411 char inFileName[1024];
412 strcpy(inFileName, fastaFileNames[s].c_str());
414 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
415 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
416 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
418 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
420 if (pid == 0) { //you are the root process
422 MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
424 //send file positions to all processes
425 for(int i = 1; i < processors; i++) {
426 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
427 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
430 //figure out how many sequences you have to align
431 numSeqsPerProcessor = numSeqs / processors;
432 int startIndex = pid * numSeqsPerProcessor;
433 if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
436 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
438 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
440 }else{ //you are a child process
441 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
442 MPIPos.resize(numSeqs+1);
443 MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
445 //figure out how many sequences you have to align
446 numSeqsPerProcessor = numSeqs / processors;
447 int startIndex = pid * numSeqsPerProcessor;
448 if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
451 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
453 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
457 MPI_File_close(&inMPI);
458 MPI_File_close(&outMPI);
459 MPI_File_close(&outMPIAccnos);
460 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
462 vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
464 for (int i = 0; i < (positions.size()-1); i++) {
465 lines.push_back(new linePair(positions[i], positions[(i+1)]));
469 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
472 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
474 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
477 processIDS.resize(0);
479 numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName);
481 rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
482 rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
484 //append output files
485 for(int i=1;i<processors;i++){
486 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
487 remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
490 //append output files
491 for(int i=1;i<processors;i++){
492 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
493 remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
496 if (m->control_pressed) {
497 remove(outputFileName.c_str());
498 remove(accnosFileName.c_str());
499 for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } outputTypes.clear();
500 for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
507 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
509 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
515 for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
517 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
518 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
520 m->mothurOutEndLine();
521 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
524 //set accnos file as new current accnosfile
526 itTypes = outputTypes.find("accnos");
527 if (itTypes != outputTypes.end()) {
528 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
531 m->mothurOutEndLine();
532 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
533 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
534 m->mothurOutEndLine();
539 catch(exception& e) {
540 m->errorOut(e, "ChimeraPintailCommand", "execute");
544 //**********************************************************************************************************************
546 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
549 m->openOutputFile(outputFName, out);
552 m->openOutputFile(accnos, out2);
555 m->openInputFile(filename, inFASTA);
557 inFASTA.seekg(filePos->start);
564 if (m->control_pressed) { return 1; }
566 Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA);
568 if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
570 if (candidateSeq->getAligned().length() != templateSeqsLength) { //chimeracheck does not require seqs to be aligned
571 m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
574 chimera->getChimeras(candidateSeq);
576 if (m->control_pressed) { delete candidateSeq; return 1; }
579 chimera->print(out, out2);
585 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
586 unsigned long int pos = inFASTA.tellg();
587 if ((pos == -1) || (pos >= filePos->end)) { break; }
589 if (inFASTA.eof()) { break; }
593 if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
596 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
604 catch(exception& e) {
605 m->errorOut(e, "ChimeraPintailCommand", "driver");
609 //**********************************************************************************************************************
611 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
616 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
618 for(int i=0;i<num;i++){
620 if (m->control_pressed) { return 1; }
623 int length = MPIPos[start+i+1] - MPIPos[start+i];
625 char* buf4 = new char[length];
626 MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
628 string tempBuf = buf4;
629 if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
630 istringstream iss (tempBuf,istringstream::in);
633 Sequence* candidateSeq = new Sequence(iss); m->gobble(iss);
635 if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
637 if (candidateSeq->getAligned().length() != templateSeqsLength) { //chimeracheck does not require seqs to be aligned
638 m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
641 chimera->getChimeras(candidateSeq);
643 if (m->control_pressed) { delete candidateSeq; return 1; }
646 chimera->print(outMPI, outAccMPI);
652 if((i+1) % 100 == 0){ cout << "Processing sequence: " << (i+1) << endl; m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n"); }
655 if(num % 100 != 0){ cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n"); }
660 catch(exception& e) {
661 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
667 /**************************************************************************************************/
669 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
671 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
675 //loop through and create all the processes you want
676 while (process != processors) {
680 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
683 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
685 //pass numSeqs to parent
687 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
688 m->openOutputFile(tempFile, out);
694 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
695 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
700 //force parent to wait until all the processes are done
701 for (int i=0;i<processors;i++) {
702 int temp = processIDS[i];
706 for (int i = 0; i < processIDS.size(); i++) {
708 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
709 m->openInputFile(tempFile, in);
710 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
711 in.close(); remove(tempFile.c_str());
717 catch(exception& e) {
718 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
723 /**************************************************************************************************/