2 * chimerapintailcommand.cpp
5 * Created by westcott on 4/1/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "chimerapintailcommand.h"
14 //**********************************************************************************************************************
15 vector<string> ChimeraPintailCommand::setParameters(){
17 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
18 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
19 CommandParameter pconservation("conservation", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pconservation);
20 CommandParameter pquantile("quantile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pquantile);
21 CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfilter);
22 CommandParameter pwindow("window", "Number", "", "0", "", "", "",false,false); parameters.push_back(pwindow);
23 CommandParameter pincrement("increment", "Number", "", "25", "", "", "",false,false); parameters.push_back(pincrement);
24 CommandParameter pmask("mask", "String", "", "", "", "", "",false,false); parameters.push_back(pmask);
25 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28 CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "ChimeraPintailCommand", "setParameters");
39 //**********************************************************************************************************************
40 string ChimeraPintailCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The chimera.pintail command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
44 helpString += "This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n";
45 helpString += "The chimera.pintail command parameters are fasta, reference, filter, mask, processors, window, increment, conservation and quantile.\n";
46 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
47 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
48 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n";
49 helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
50 helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied. You can apply an ecoli mask by typing, mask=default. \n";
51 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
53 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
55 helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
56 helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
57 helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
58 helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
59 helpString += "The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n";
60 helpString += "The chimera.pintail command should be in the following format: \n";
61 helpString += "chimera.pintail(fasta=yourFastaFile, reference=yourTemplate) \n";
62 helpString += "Example: chimera.pintail(fasta=AD.align, reference=silva.bacteria.fasta) \n";
63 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
67 m->errorOut(e, "ChimeraPintailCommand", "getHelpString");
71 //**********************************************************************************************************************
72 ChimeraPintailCommand::ChimeraPintailCommand(){
74 abort = true; calledHelp = true;
76 vector<string> tempOutNames;
77 outputTypes["chimera"] = tempOutNames;
78 outputTypes["accnos"] = tempOutNames;
81 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
85 //***************************************************************************************************************
86 ChimeraPintailCommand::ChimeraPintailCommand(string option) {
88 abort = false; calledHelp = false;
89 rdb = ReferenceDB::getInstance();
91 //allow user to run help
92 if(option == "help") { help(); abort = true; calledHelp = true; }
93 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
96 vector<string> myArray = setParameters();
98 OptionParser parser(option);
99 map<string,string> parameters = parser.getParameters();
101 ValidParameters validParameter("chimera.pintail");
102 map<string,string>::iterator it;
104 //check to make sure all parameters are valid for command
105 for (it = parameters.begin(); it != parameters.end(); it++) {
106 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
109 vector<string> tempOutNames;
110 outputTypes["chimera"] = tempOutNames;
111 outputTypes["accnos"] = tempOutNames;
113 //if the user changes the input directory command factory will send this info to us in the output parameter
114 inputDir = validParameter.validFile(parameters, "inputdir", false);
115 if (inputDir == "not found"){ inputDir = ""; }
118 it = parameters.find("reference");
119 //user has given a template file
120 if(it != parameters.end()){
121 path = m->hasPath(it->second);
122 //if the user has not given a path then, add inputdir. else leave path alone.
123 if (path == "") { parameters["reference"] = inputDir + it->second; }
126 it = parameters.find("conservation");
127 //user has given a template file
128 if(it != parameters.end()){
129 path = m->hasPath(it->second);
130 //if the user has not given a path then, add inputdir. else leave path alone.
131 if (path == "") { parameters["conservation"] = inputDir + it->second; }
134 it = parameters.find("quantile");
135 //user has given a template file
136 if(it != parameters.end()){
137 path = m->hasPath(it->second);
138 //if the user has not given a path then, add inputdir. else leave path alone.
139 if (path == "") { parameters["quantile"] = inputDir + it->second; }
144 //check for required parameters
145 fastafile = validParameter.validFile(parameters, "fasta", false);
146 if (fastafile == "not found") {
147 //if there is a current fasta file, use it
148 string filename = m->getFastaFile();
149 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
150 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
152 m->splitAtDash(fastafile, fastaFileNames);
154 //go through files and make sure they are good, if not, then disregard them
155 for (int i = 0; i < fastaFileNames.size(); i++) {
158 if (fastaFileNames[i] == "current") {
159 fastaFileNames[i] = m->getFastaFile();
160 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
162 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
163 //erase from file list
164 fastaFileNames.erase(fastaFileNames.begin()+i);
171 if (inputDir != "") {
172 string path = m->hasPath(fastaFileNames[i]);
173 //if the user has not given a path then, add inputdir. else leave path alone.
174 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
180 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
182 //if you can't open it, try default location
183 if (ableToOpen == 1) {
184 if (m->getDefaultPath() != "") { //default path is set
185 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
186 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
188 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
190 fastaFileNames[i] = tryPath;
194 if (ableToOpen == 1) {
195 if (m->getOutputDir() != "") { //default path is set
196 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
197 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
199 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
201 fastaFileNames[i] = tryPath;
207 if (ableToOpen == 1) {
208 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
209 //erase from file list
210 fastaFileNames.erase(fastaFileNames.begin()+i);
213 m->setFastaFile(fastaFileNames[i]);
218 //make sure there is at least one valid file left
219 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
223 temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; }
224 filter = m->isTrue(temp);
226 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
227 m->setProcessors(temp);
228 convert(temp, processors);
230 temp = validParameter.validFile(parameters, "window", false); if (temp == "not found") { temp = "0"; }
231 convert(temp, window);
233 temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "25"; }
234 convert(temp, increment);
236 temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
237 save = m->isTrue(temp);
239 if (save) { //clear out old references
243 //this has to go after save so that if the user sets save=t and provides no reference we abort
244 templatefile = validParameter.validFile(parameters, "reference", true);
245 if (templatefile == "not found") {
246 //check for saved reference sequences
247 if (rdb->referenceSeqs.size() != 0) {
248 templatefile = "saved";
250 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
251 m->mothurOutEndLine();
254 }else if (templatefile == "not open") { abort = true; }
255 else { if (save) { rdb->setSavedReference(templatefile); } }
258 maskfile = validParameter.validFile(parameters, "mask", false);
259 if (maskfile == "not found") { maskfile = ""; }
260 else if (maskfile != "default") {
261 if (inputDir != "") {
262 string path = m->hasPath(maskfile);
263 //if the user has not given a path then, add inputdir. else leave path alone.
264 if (path == "") { maskfile = inputDir + maskfile; }
268 int ableToOpen = m->openInputFile(maskfile, in, "no error");
269 if (ableToOpen == 1) {
270 if (m->getDefaultPath() != "") { //default path is set
271 string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
272 m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
274 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
280 if (ableToOpen == 1) {
281 if (m->getOutputDir() != "") { //default path is set
282 string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
283 m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
285 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
293 if (ableToOpen == 1) {
294 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine();
300 //if the user changes the output directory command factory will send this info to us in the output parameter
301 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
303 consfile = validParameter.validFile(parameters, "conservation", true);
304 if (consfile == "not open") { abort = true; }
305 else if (consfile == "not found") {
308 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
309 ifstream FileTest(tempConsFile.c_str());
311 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
313 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest.close();
316 string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
317 ifstream FileTest2(tempConsFile.c_str());
319 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
321 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest2.close();
327 quanfile = validParameter.validFile(parameters, "quantile", true);
328 if (quanfile == "not open") { abort = true; }
329 else if (quanfile == "not found") { quanfile = ""; }
332 catch(exception& e) {
333 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
337 //***************************************************************************************************************
339 int ChimeraPintailCommand::execute(){
342 if (abort == true) { if (calledHelp) { return 0; } return 2; }
344 for (int s = 0; s < fastaFileNames.size(); s++) {
346 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
348 int start = time(NULL);
351 if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine(); }
353 //check for quantile to save the time
354 string baseName = templatefile;
355 if (templatefile == "saved") { baseName = rdb->getSavedReference(); }
357 string tempQuan = "";
358 if ((!filter) && (maskfile == "")) {
359 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
360 }else if ((!filter) && (maskfile != "")) {
361 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
362 }else if ((filter) && (maskfile != "")) {
363 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
364 }else if ((filter) && (maskfile == "")) {
365 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
368 ifstream FileTest(tempQuan.c_str());
370 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
372 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest.close();
375 string tryPath = m->getDefaultPath();
376 string tempQuan = "";
377 if ((!filter) && (maskfile == "")) {
378 tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
379 }else if ((!filter) && (maskfile != "")) {
380 tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
381 }else if ((filter) && (maskfile != "")) {
382 tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
383 }else if ((filter) && (maskfile == "")) {
384 tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
387 ifstream FileTest2(tempQuan.c_str());
389 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
391 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest2.close();
396 chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
398 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
399 string outputFileName, accnosFileName;
400 if (maskfile != "") {
401 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
402 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
404 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "pintail.chimeras";
405 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "pintail.accnos";
408 if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
410 if (chimera->getUnaligned()) {
411 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine();
415 templateSeqsLength = chimera->getLength();
418 int pid, numSeqsPerProcessor;
420 vector<unsigned long int> MPIPos;
423 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
424 MPI_Comm_size(MPI_COMM_WORLD, &processors);
428 MPI_File outMPIAccnos;
430 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
431 int inMode=MPI_MODE_RDONLY;
433 char outFilename[1024];
434 strcpy(outFilename, outputFileName.c_str());
436 char outAccnosFilename[1024];
437 strcpy(outAccnosFilename, accnosFileName.c_str());
439 char inFileName[1024];
440 strcpy(inFileName, fastaFileNames[s].c_str());
442 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
443 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
444 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
446 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } delete chimera; return 0; }
448 if (pid == 0) { //you are the root process
450 MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
452 //send file positions to all processes
453 for(int i = 1; i < processors; i++) {
454 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
455 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
458 //figure out how many sequences you have to align
459 numSeqsPerProcessor = numSeqs / processors;
460 int startIndex = pid * numSeqsPerProcessor;
461 if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
464 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
466 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } delete chimera; return 0; }
468 }else{ //you are a child process
469 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
470 MPIPos.resize(numSeqs+1);
471 MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
473 //figure out how many sequences you have to align
474 numSeqsPerProcessor = numSeqs / processors;
475 int startIndex = pid * numSeqsPerProcessor;
476 if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
479 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
481 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } delete chimera; return 0; }
485 MPI_File_close(&inMPI);
486 MPI_File_close(&outMPI);
487 MPI_File_close(&outMPIAccnos);
488 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
490 vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
492 for (int i = 0; i < (positions.size()-1); i++) {
493 lines.push_back(new linePair(positions[i], positions[(i+1)]));
497 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
500 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
502 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
505 processIDS.resize(0);
507 numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName);
509 rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
510 rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
512 //append output files
513 for(int i=1;i<processors;i++){
514 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
515 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
518 //append output files
519 for(int i=1;i<processors;i++){
520 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
521 m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp"));
524 if (m->control_pressed) {
525 m->mothurRemove(outputFileName);
526 m->mothurRemove(accnosFileName);
527 for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear();
528 for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
535 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
537 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
543 for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
545 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
546 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
548 m->mothurOutEndLine();
549 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
552 //set accnos file as new current accnosfile
554 itTypes = outputTypes.find("accnos");
555 if (itTypes != outputTypes.end()) {
556 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
559 m->mothurOutEndLine();
560 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
561 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
562 m->mothurOutEndLine();
567 catch(exception& e) {
568 m->errorOut(e, "ChimeraPintailCommand", "execute");
572 //**********************************************************************************************************************
574 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
577 m->openOutputFile(outputFName, out);
580 m->openOutputFile(accnos, out2);
583 m->openInputFile(filename, inFASTA);
585 inFASTA.seekg(filePos->start);
592 if (m->control_pressed) { return 1; }
594 Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA);
596 if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
598 if (candidateSeq->getAligned().length() != templateSeqsLength) { //chimeracheck does not require seqs to be aligned
599 m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
602 chimera->getChimeras(candidateSeq);
604 if (m->control_pressed) { delete candidateSeq; return 1; }
607 chimera->print(out, out2);
613 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
614 unsigned long int pos = inFASTA.tellg();
615 if ((pos == -1) || (pos >= filePos->end)) { break; }
617 if (inFASTA.eof()) { break; }
621 if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
624 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
632 catch(exception& e) {
633 m->errorOut(e, "ChimeraPintailCommand", "driver");
637 //**********************************************************************************************************************
639 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
644 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
646 for(int i=0;i<num;i++){
648 if (m->control_pressed) { return 1; }
651 int length = MPIPos[start+i+1] - MPIPos[start+i];
653 char* buf4 = new char[length];
654 MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
656 string tempBuf = buf4;
657 if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
658 istringstream iss (tempBuf,istringstream::in);
661 Sequence* candidateSeq = new Sequence(iss); m->gobble(iss);
663 if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
665 if (candidateSeq->getAligned().length() != templateSeqsLength) { //chimeracheck does not require seqs to be aligned
666 m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
669 chimera->getChimeras(candidateSeq);
671 if (m->control_pressed) { delete candidateSeq; return 1; }
674 chimera->print(outMPI, outAccMPI);
680 if((i+1) % 100 == 0){ cout << "Processing sequence: " << (i+1) << endl; m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n"); }
683 if(num % 100 != 0){ cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n"); }
688 catch(exception& e) {
689 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
695 /**************************************************************************************************/
697 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
699 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
703 //loop through and create all the processes you want
704 while (process != processors) {
708 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
711 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
713 //pass numSeqs to parent
715 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
716 m->openOutputFile(tempFile, out);
722 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
723 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
728 //force parent to wait until all the processes are done
729 for (int i=0;i<processors;i++) {
730 int temp = processIDS[i];
734 for (int i = 0; i < processIDS.size(); i++) {
736 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
737 m->openInputFile(tempFile, in);
738 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
739 in.close(); m->mothurRemove(tempFile);
745 catch(exception& e) {
746 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
751 /**************************************************************************************************/