2 * chimerapintailcommand.cpp
5 * Created by westcott on 4/1/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "chimerapintailcommand.h"
13 //**********************************************************************************************************************
14 vector<string> ChimeraPintailCommand::setParameters(){
16 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
17 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
18 CommandParameter pconservation("conservation", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pconservation);
19 CommandParameter pquantile("quantile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pquantile);
20 CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfilter);
21 CommandParameter pwindow("window", "Number", "", "0", "", "", "",false,false); parameters.push_back(pwindow);
22 CommandParameter pincrement("increment", "Number", "", "25", "", "", "",false,false); parameters.push_back(pincrement);
23 CommandParameter pmask("mask", "String", "", "", "", "", "",false,false); parameters.push_back(pmask);
24 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28 vector<string> myArray;
29 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
33 m->errorOut(e, "ChimeraPintailCommand", "setParameters");
37 //**********************************************************************************************************************
38 string ChimeraPintailCommand::getHelpString(){
40 string helpString = "";
41 helpString += "The chimera.pintail command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
42 helpString += "This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n";
43 helpString += "The chimera.pintail command parameters are fasta, reference, filter, mask, processors, window, increment, conservation and quantile.\n";
44 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
45 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
46 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n";
47 helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
48 helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied. You can apply an ecoli mask by typing, mask=default. \n";
49 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
51 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
53 helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
54 helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
55 helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
56 helpString += "The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n";
57 helpString += "The chimera.pintail command should be in the following format: \n";
58 helpString += "chimera.pintail(fasta=yourFastaFile, reference=yourTemplate) \n";
59 helpString += "Example: chimera.pintail(fasta=AD.align, reference=silva.bacteria.fasta) \n";
60 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
64 m->errorOut(e, "ChimeraPintailCommand", "getHelpString");
68 //**********************************************************************************************************************
69 ChimeraPintailCommand::ChimeraPintailCommand(){
71 abort = true; calledHelp = true;
73 vector<string> tempOutNames;
74 outputTypes["chimera"] = tempOutNames;
75 outputTypes["accnos"] = tempOutNames;
78 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
82 //***************************************************************************************************************
83 ChimeraPintailCommand::ChimeraPintailCommand(string option) {
85 abort = false; calledHelp = false;
87 //allow user to run help
88 if(option == "help") { help(); abort = true; calledHelp = true; }
91 vector<string> myArray = setParameters();
93 OptionParser parser(option);
94 map<string,string> parameters = parser.getParameters();
96 ValidParameters validParameter("chimera.pintail");
97 map<string,string>::iterator it;
99 //check to make sure all parameters are valid for command
100 for (it = parameters.begin(); it != parameters.end(); it++) {
101 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
104 vector<string> tempOutNames;
105 outputTypes["chimera"] = tempOutNames;
106 outputTypes["accnos"] = tempOutNames;
108 //if the user changes the input directory command factory will send this info to us in the output parameter
109 inputDir = validParameter.validFile(parameters, "inputdir", false);
110 if (inputDir == "not found"){ inputDir = ""; }
113 it = parameters.find("reference");
114 //user has given a template file
115 if(it != parameters.end()){
116 path = m->hasPath(it->second);
117 //if the user has not given a path then, add inputdir. else leave path alone.
118 if (path == "") { parameters["reference"] = inputDir + it->second; }
121 it = parameters.find("conservation");
122 //user has given a template file
123 if(it != parameters.end()){
124 path = m->hasPath(it->second);
125 //if the user has not given a path then, add inputdir. else leave path alone.
126 if (path == "") { parameters["conservation"] = inputDir + it->second; }
129 it = parameters.find("quantile");
130 //user has given a template file
131 if(it != parameters.end()){
132 path = m->hasPath(it->second);
133 //if the user has not given a path then, add inputdir. else leave path alone.
134 if (path == "") { parameters["quantile"] = inputDir + it->second; }
139 //check for required parameters
140 fastafile = validParameter.validFile(parameters, "fasta", false);
141 if (fastafile == "not found") {
142 //if there is a current fasta file, use it
143 string filename = m->getFastaFile();
144 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
145 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
147 m->splitAtDash(fastafile, fastaFileNames);
149 //go through files and make sure they are good, if not, then disregard them
150 for (int i = 0; i < fastaFileNames.size(); i++) {
151 if (inputDir != "") {
152 string path = m->hasPath(fastaFileNames[i]);
153 //if the user has not given a path then, add inputdir. else leave path alone.
154 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
160 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
162 //if you can't open it, try default location
163 if (ableToOpen == 1) {
164 if (m->getDefaultPath() != "") { //default path is set
165 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
166 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
168 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
170 fastaFileNames[i] = tryPath;
174 if (ableToOpen == 1) {
175 if (m->getOutputDir() != "") { //default path is set
176 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
177 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
179 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
181 fastaFileNames[i] = tryPath;
187 if (ableToOpen == 1) {
188 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
189 //erase from file list
190 fastaFileNames.erase(fastaFileNames.begin()+i);
195 //make sure there is at least one valid file left
196 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
200 temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; }
201 filter = m->isTrue(temp);
203 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
204 m->setProcessors(temp);
205 convert(temp, processors);
207 temp = validParameter.validFile(parameters, "window", false); if (temp == "not found") { temp = "0"; }
208 convert(temp, window);
210 temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "25"; }
211 convert(temp, increment);
213 maskfile = validParameter.validFile(parameters, "mask", false);
214 if (maskfile == "not found") { maskfile = ""; }
215 else if (maskfile != "default") {
216 if (inputDir != "") {
217 string path = m->hasPath(maskfile);
218 //if the user has not given a path then, add inputdir. else leave path alone.
219 if (path == "") { maskfile = inputDir + maskfile; }
223 int ableToOpen = m->openInputFile(maskfile, in, "no error");
224 if (ableToOpen == 1) {
225 if (m->getDefaultPath() != "") { //default path is set
226 string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
227 m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
229 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
235 if (ableToOpen == 1) {
236 if (m->getOutputDir() != "") { //default path is set
237 string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
238 m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
240 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
248 if (ableToOpen == 1) {
249 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine();
255 //if the user changes the output directory command factory will send this info to us in the output parameter
256 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
258 templatefile = validParameter.validFile(parameters, "reference", true);
259 if (templatefile == "not open") { abort = true; }
260 else if (templatefile == "not found") { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true; }
262 consfile = validParameter.validFile(parameters, "conservation", true);
263 if (consfile == "not open") { abort = true; }
264 else if (consfile == "not found") {
267 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
268 ifstream FileTest(tempConsFile.c_str());
270 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
272 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest.close();
275 string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
276 ifstream FileTest2(tempConsFile.c_str());
278 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
280 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest2.close();
286 quanfile = validParameter.validFile(parameters, "quantile", true);
287 if (quanfile == "not open") { abort = true; }
288 else if (quanfile == "not found") { quanfile = ""; }
291 catch(exception& e) {
292 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
296 //***************************************************************************************************************
298 int ChimeraPintailCommand::execute(){
301 if (abort == true) { if (calledHelp) { return 0; } return 2; }
303 for (int s = 0; s < fastaFileNames.size(); s++) {
305 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
307 int start = time(NULL);
310 if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine(); }
312 //check for quantile to save the time
313 string tempQuan = "";
314 if ((!filter) && (maskfile == "")) {
315 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
316 }else if ((!filter) && (maskfile != "")) {
317 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
318 }else if ((filter) && (maskfile != "")) {
319 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
320 }else if ((filter) && (maskfile == "")) {
321 tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
324 ifstream FileTest(tempQuan.c_str());
326 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
328 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest.close();
331 string tryPath = m->getDefaultPath();
332 string tempQuan = "";
333 if ((!filter) && (maskfile == "")) {
334 tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
335 }else if ((!filter) && (maskfile != "")) {
336 tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
337 }else if ((filter) && (maskfile != "")) {
338 tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
339 }else if ((filter) && (maskfile == "")) {
340 tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
343 ifstream FileTest2(tempQuan.c_str());
345 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
347 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest2.close();
352 chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
354 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
355 string outputFileName, accnosFileName;
356 if (maskfile != "") {
357 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
358 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
360 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "pintail.chimeras";
361 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "pintail.accnos";
364 if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
366 if (chimera->getUnaligned()) {
367 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine();
371 templateSeqsLength = chimera->getLength();
374 int pid, numSeqsPerProcessor;
376 vector<unsigned long int> MPIPos;
379 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
380 MPI_Comm_size(MPI_COMM_WORLD, &processors);
384 MPI_File outMPIAccnos;
386 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
387 int inMode=MPI_MODE_RDONLY;
389 char outFilename[1024];
390 strcpy(outFilename, outputFileName.c_str());
392 char outAccnosFilename[1024];
393 strcpy(outAccnosFilename, accnosFileName.c_str());
395 char inFileName[1024];
396 strcpy(inFileName, fastaFileNames[s].c_str());
398 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
399 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
400 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
402 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
404 if (pid == 0) { //you are the root process
406 MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
408 //send file positions to all processes
409 for(int i = 1; i < processors; i++) {
410 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
411 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
414 //figure out how many sequences you have to align
415 numSeqsPerProcessor = numSeqs / processors;
416 int startIndex = pid * numSeqsPerProcessor;
417 if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
420 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
422 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
424 }else{ //you are a child process
425 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
426 MPIPos.resize(numSeqs+1);
427 MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
429 //figure out how many sequences you have to align
430 numSeqsPerProcessor = numSeqs / processors;
431 int startIndex = pid * numSeqsPerProcessor;
432 if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
435 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
437 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
441 MPI_File_close(&inMPI);
442 MPI_File_close(&outMPI);
443 MPI_File_close(&outMPIAccnos);
444 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
446 vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
448 for (int i = 0; i < (positions.size()-1); i++) {
449 lines.push_back(new linePair(positions[i], positions[(i+1)]));
453 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
456 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
458 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
461 processIDS.resize(0);
463 numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName);
465 rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
466 rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
468 //append output files
469 for(int i=1;i<processors;i++){
470 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
471 remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
474 //append output files
475 for(int i=1;i<processors;i++){
476 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
477 remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
480 if (m->control_pressed) {
481 remove(outputFileName.c_str());
482 remove(accnosFileName.c_str());
483 for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } outputTypes.clear();
484 for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
491 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
493 if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
499 for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
501 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
502 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
504 m->mothurOutEndLine();
505 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
508 //set accnos file as new current accnosfile
510 itTypes = outputTypes.find("accnos");
511 if (itTypes != outputTypes.end()) {
512 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
515 m->mothurOutEndLine();
516 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
517 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
518 m->mothurOutEndLine();
523 catch(exception& e) {
524 m->errorOut(e, "ChimeraPintailCommand", "execute");
528 //**********************************************************************************************************************
530 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
533 m->openOutputFile(outputFName, out);
536 m->openOutputFile(accnos, out2);
539 m->openInputFile(filename, inFASTA);
541 inFASTA.seekg(filePos->start);
548 if (m->control_pressed) { return 1; }
550 Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA);
552 if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
554 if (candidateSeq->getAligned().length() != templateSeqsLength) { //chimeracheck does not require seqs to be aligned
555 m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
558 chimera->getChimeras(candidateSeq);
560 if (m->control_pressed) { delete candidateSeq; return 1; }
563 chimera->print(out, out2);
569 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
570 unsigned long int pos = inFASTA.tellg();
571 if ((pos == -1) || (pos >= filePos->end)) { break; }
573 if (inFASTA.eof()) { break; }
577 if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
580 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
588 catch(exception& e) {
589 m->errorOut(e, "ChimeraPintailCommand", "driver");
593 //**********************************************************************************************************************
595 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
600 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
602 for(int i=0;i<num;i++){
604 if (m->control_pressed) { return 1; }
607 int length = MPIPos[start+i+1] - MPIPos[start+i];
609 char* buf4 = new char[length];
610 MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
612 string tempBuf = buf4;
613 if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
614 istringstream iss (tempBuf,istringstream::in);
617 Sequence* candidateSeq = new Sequence(iss); m->gobble(iss);
619 if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
621 if (candidateSeq->getAligned().length() != templateSeqsLength) { //chimeracheck does not require seqs to be aligned
622 m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
625 chimera->getChimeras(candidateSeq);
627 if (m->control_pressed) { delete candidateSeq; return 1; }
630 chimera->print(outMPI, outAccMPI);
636 if((i+1) % 100 == 0){ cout << "Processing sequence: " << (i+1) << endl; m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n"); }
639 if(num % 100 != 0){ cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n"); }
644 catch(exception& e) {
645 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
651 /**************************************************************************************************/
653 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
655 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
659 //loop through and create all the processes you want
660 while (process != processors) {
664 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
667 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
669 //pass numSeqs to parent
671 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
672 m->openOutputFile(tempFile, out);
678 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
679 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
684 //force parent to wait until all the processes are done
685 for (int i=0;i<processors;i++) {
686 int temp = processIDS[i];
690 for (int i = 0; i < processIDS.size(); i++) {
692 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
693 m->openInputFile(tempFile, in);
694 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
695 in.close(); remove(tempFile.c_str());
701 catch(exception& e) {
702 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
707 /**************************************************************************************************/