2 * chimerapintailcommand.cpp
5 * Created by westcott on 4/1/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "chimerapintailcommand.h"
13 //**********************************************************************************************************************
14 vector<string> ChimeraPintailCommand::setParameters(){
16 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
17 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18 CommandParameter pconservation("conservation", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pconservation);
19 CommandParameter pquantile("quantile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pquantile);
20 CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfilter);
21 CommandParameter pwindow("window", "Number", "", "0", "", "", "",false,false); parameters.push_back(pwindow);
22 CommandParameter pincrement("increment", "Number", "", "25", "", "", "",false,false); parameters.push_back(pincrement);
23 CommandParameter pmask("mask", "String", "", "", "", "", "",false,false); parameters.push_back(pmask);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28 vector<string> myArray;
29 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
33 m->errorOut(e, "ChimeraPintailCommand", "setParameters");
37 //**********************************************************************************************************************
38 string ChimeraPintailCommand::getHelpString(){
40 string helpString = "";
41 helpString += "The chimera.pintail command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
42 helpString += "This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n";
43 helpString += "The chimera.pintail command parameters are fasta, reference, filter, mask, processors, window, increment, conservation and quantile.\n";
44 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
45 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
46 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n";
47 helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
48 helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied. You can apply an ecoli mask by typing, mask=default. \n";
49 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
51 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
53 helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
54 helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
55 helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
56 helpString += "The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n";
57 helpString += "The chimera.pintail command should be in the following format: \n";
58 helpString += "chimera.pintail(fasta=yourFastaFile, reference=yourTemplate) \n";
59 helpString += "Example: chimera.pintail(fasta=AD.align, reference=silva.bacteria.fasta) \n";
60 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
64 m->errorOut(e, "ChimeraPintailCommand", "getHelpString");
68 //**********************************************************************************************************************
69 ChimeraPintailCommand::ChimeraPintailCommand(){
71 abort = true; calledHelp = true;
73 vector<string> tempOutNames;
74 outputTypes["chimera"] = tempOutNames;
75 outputTypes["accnos"] = tempOutNames;
78 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
82 //***************************************************************************************************************
83 ChimeraPintailCommand::ChimeraPintailCommand(string option) {
85 abort = false; calledHelp = false;
87 //allow user to run help
88 if(option == "help") { help(); abort = true; calledHelp = true; }
89 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
92 vector<string> myArray = setParameters();
94 OptionParser parser(option);
95 map<string,string> parameters = parser.getParameters();
97 ValidParameters validParameter("chimera.pintail");
98 map<string,string>::iterator it;
100 //check to make sure all parameters are valid for command
101 for (it = parameters.begin(); it != parameters.end(); it++) {
102 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
105 vector<string> tempOutNames;
106 outputTypes["chimera"] = tempOutNames;
107 outputTypes["accnos"] = tempOutNames;
109 //if the user changes the input directory command factory will send this info to us in the output parameter
110 inputDir = validParameter.validFile(parameters, "inputdir", false);
111 if (inputDir == "not found"){ inputDir = ""; }
114 it = parameters.find("reference");
115 //user has given a template file
116 if(it != parameters.end()){
117 path = m->hasPath(it->second);
118 //if the user has not given a path then, add inputdir. else leave path alone.
119 if (path == "") { parameters["reference"] = inputDir + it->second; }
122 it = parameters.find("conservation");
123 //user has given a template file
124 if(it != parameters.end()){
125 path = m->hasPath(it->second);
126 //if the user has not given a path then, add inputdir. else leave path alone.
127 if (path == "") { parameters["conservation"] = inputDir + it->second; }
130 it = parameters.find("quantile");
131 //user has given a template file
132 if(it != parameters.end()){
133 path = m->hasPath(it->second);
134 //if the user has not given a path then, add inputdir. else leave path alone.
135 if (path == "") { parameters["quantile"] = inputDir + it->second; }
140 //check for required parameters
141 fastafile = validParameter.validFile(parameters, "fasta", false);
142 if (fastafile == "not found") {
143 //if there is a current fasta file, use it
144 string filename = m->getFastaFile();
145 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
146 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
148 m->splitAtDash(fastafile, fastaFileNames);
150 //go through files and make sure they are good, if not, then disregard them
151 for (int i = 0; i < fastaFileNames.size(); i++) {
154 if (fastaFileNames[i] == "current") {
155 fastaFileNames[i] = m->getFastaFile();
156 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
158 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
159 //erase from file list
160 fastaFileNames.erase(fastaFileNames.begin()+i);
167 if (inputDir != "") {
168 string path = m->hasPath(fastaFileNames[i]);
169 //if the user has not given a path then, add inputdir. else leave path alone.
170 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
176 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
178 //if you can't open it, try default location
179 if (ableToOpen == 1) {
180 if (m->getDefaultPath() != "") { //default path is set
181 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
182 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
184 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
186 fastaFileNames[i] = tryPath;
190 if (ableToOpen == 1) {
191 if (m->getOutputDir() != "") { //default path is set
192 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
193 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
195 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
197 fastaFileNames[i] = tryPath;
203 if (ableToOpen == 1) {
204 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
205 //erase from file list
206 fastaFileNames.erase(fastaFileNames.begin()+i);
212 //make sure there is at least one valid file left
213 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
217 temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; }
218 filter = m->isTrue(temp);
220 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
221 m->setProcessors(temp);
222 convert(temp, processors);
224 temp = validParameter.validFile(parameters, "window", false); if (temp == "not found") { temp = "0"; }
225 convert(temp, window);
227 temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "25"; }
228 convert(temp, increment);
230 maskfile = validParameter.validFile(parameters, "mask", false);
231 if (maskfile == "not found") { maskfile = ""; }
232 else if (maskfile != "default") {
233 if (inputDir != "") {
234 string path = m->hasPath(maskfile);
235 //if the user has not given a path then, add inputdir. else leave path alone.
236 if (path == "") { maskfile = inputDir + maskfile; }
240 int ableToOpen = m->openInputFile(maskfile, in, "no error");
241 if (ableToOpen == 1) {
242 if (m->getDefaultPath() != "") { //default path is set
243 string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
244 m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
246 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
252 if (ableToOpen == 1) {
253 if (m->getOutputDir() != "") { //default path is set
254 string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
255 m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
257 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
265 if (ableToOpen == 1) {
266 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine();
272 //if the user changes the output directory command factory will send this info to us in the output parameter
273 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
275 templatefile = validParameter.validFile(parameters, "reference", true);
276 if (templatefile == "not open") { abort = true; }
277 else if (templatefile == "not found") { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true; }
279 consfile = validParameter.validFile(parameters, "conservation", true);
280 if (consfile == "not open") { abort = true; }
281 else if (consfile == "not found") {
284 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
285 ifstream FileTest(tempConsFile.c_str());
287 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
289 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest.close();
292 string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
293 ifstream FileTest2(tempConsFile.c_str());
295 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
297 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest2.close();
303 quanfile = validParameter.validFile(parameters, "quantile", true);
304 if (quanfile == "not open") { abort = true; }
305 else if (quanfile == "not found") { quanfile = ""; }
308 catch(exception& e) {
309 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
313 //***************************************************************************************************************
315 int ChimeraPintailCommand::execute(){
318 if (abort == true) { if (calledHelp) { return 0; } return 2; }
320 for (int s = 0; s < fastaFileNames.size(); s++) {
322 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
324 int start = time(NULL);
327 if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine(); }
329 //check for quantile to save the time
330 string tempQuan = "";
331 if ((!filter) && (maskfile == "")) {
332 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
333 }else if ((!filter) && (maskfile != "")) {
334 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
335 }else if ((filter) && (maskfile != "")) {
336 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
337 }else if ((filter) && (maskfile == "")) {
338 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
341 ifstream FileTest(tempQuan.c_str());
343 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
345 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest.close();
348 string tryPath = m->getDefaultPath();
349 string tempQuan = "";
350 if ((!filter) && (maskfile == "")) {
351 tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
352 }else if ((!filter) && (maskfile != "")) {
353 tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
354 }else if ((filter) && (maskfile != "")) {
355 tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
356 }else if ((filter) && (maskfile == "")) {
357 tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
360 ifstream FileTest2(tempQuan.c_str());
362 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
364 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest2.close();
369 chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
371 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
372 string outputFileName, accnosFileName;
373 if (maskfile != "") {
374 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
375 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
377 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "pintail.chimeras";
378 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "pintail.accnos";
381 if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
383 if (chimera->getUnaligned()) {
384 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine();
388 templateSeqsLength = chimera->getLength();
391 int pid, numSeqsPerProcessor;
393 vector<unsigned long int> MPIPos;
396 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
397 MPI_Comm_size(MPI_COMM_WORLD, &processors);
401 MPI_File outMPIAccnos;
403 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
404 int inMode=MPI_MODE_RDONLY;
406 char outFilename[1024];
407 strcpy(outFilename, outputFileName.c_str());
409 char outAccnosFilename[1024];
410 strcpy(outAccnosFilename, accnosFileName.c_str());
412 char inFileName[1024];
413 strcpy(inFileName, fastaFileNames[s].c_str());
415 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
416 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
417 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
419 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
421 if (pid == 0) { //you are the root process
423 MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
425 //send file positions to all processes
426 for(int i = 1; i < processors; i++) {
427 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
428 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
431 //figure out how many sequences you have to align
432 numSeqsPerProcessor = numSeqs / processors;
433 int startIndex = pid * numSeqsPerProcessor;
434 if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
437 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
439 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
441 }else{ //you are a child process
442 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
443 MPIPos.resize(numSeqs+1);
444 MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
446 //figure out how many sequences you have to align
447 numSeqsPerProcessor = numSeqs / processors;
448 int startIndex = pid * numSeqsPerProcessor;
449 if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
452 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
454 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
458 MPI_File_close(&inMPI);
459 MPI_File_close(&outMPI);
460 MPI_File_close(&outMPIAccnos);
461 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
463 vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
465 for (int i = 0; i < (positions.size()-1); i++) {
466 lines.push_back(new linePair(positions[i], positions[(i+1)]));
470 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
473 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
475 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
478 processIDS.resize(0);
480 numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName);
482 rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
483 rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
485 //append output files
486 for(int i=1;i<processors;i++){
487 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
488 remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
491 //append output files
492 for(int i=1;i<processors;i++){
493 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
494 remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
497 if (m->control_pressed) {
498 remove(outputFileName.c_str());
499 remove(accnosFileName.c_str());
500 for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } outputTypes.clear();
501 for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
508 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
510 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
516 for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
518 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
519 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
521 m->mothurOutEndLine();
522 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
525 //set accnos file as new current accnosfile
527 itTypes = outputTypes.find("accnos");
528 if (itTypes != outputTypes.end()) {
529 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
532 m->mothurOutEndLine();
533 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
534 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
535 m->mothurOutEndLine();
540 catch(exception& e) {
541 m->errorOut(e, "ChimeraPintailCommand", "execute");
545 //**********************************************************************************************************************
547 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
550 m->openOutputFile(outputFName, out);
553 m->openOutputFile(accnos, out2);
556 m->openInputFile(filename, inFASTA);
558 inFASTA.seekg(filePos->start);
565 if (m->control_pressed) { return 1; }
567 Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA);
569 if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
571 if (candidateSeq->getAligned().length() != templateSeqsLength) { //chimeracheck does not require seqs to be aligned
572 m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
575 chimera->getChimeras(candidateSeq);
577 if (m->control_pressed) { delete candidateSeq; return 1; }
580 chimera->print(out, out2);
586 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
587 unsigned long int pos = inFASTA.tellg();
588 if ((pos == -1) || (pos >= filePos->end)) { break; }
590 if (inFASTA.eof()) { break; }
594 if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
597 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
605 catch(exception& e) {
606 m->errorOut(e, "ChimeraPintailCommand", "driver");
610 //**********************************************************************************************************************
612 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
617 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
619 for(int i=0;i<num;i++){
621 if (m->control_pressed) { return 1; }
624 int length = MPIPos[start+i+1] - MPIPos[start+i];
626 char* buf4 = new char[length];
627 MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
629 string tempBuf = buf4;
630 if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
631 istringstream iss (tempBuf,istringstream::in);
634 Sequence* candidateSeq = new Sequence(iss); m->gobble(iss);
636 if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
638 if (candidateSeq->getAligned().length() != templateSeqsLength) { //chimeracheck does not require seqs to be aligned
639 m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
642 chimera->getChimeras(candidateSeq);
644 if (m->control_pressed) { delete candidateSeq; return 1; }
647 chimera->print(outMPI, outAccMPI);
653 if((i+1) % 100 == 0){ cout << "Processing sequence: " << (i+1) << endl; m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n"); }
656 if(num % 100 != 0){ cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n"); }
661 catch(exception& e) {
662 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
668 /**************************************************************************************************/
670 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
672 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
676 //loop through and create all the processes you want
677 while (process != processors) {
681 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
684 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
686 //pass numSeqs to parent
688 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
689 m->openOutputFile(tempFile, out);
695 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
696 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
701 //force parent to wait until all the processes are done
702 for (int i=0;i<processors;i++) {
703 int temp = processIDS[i];
707 for (int i = 0; i < processIDS.size(); i++) {
709 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
710 m->openInputFile(tempFile, in);
711 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
712 in.close(); remove(tempFile.c_str());
718 catch(exception& e) {
719 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
724 /**************************************************************************************************/