2 * chimerapintailcommand.cpp
5 * Created by westcott on 4/1/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "chimerapintailcommand.h"
14 //**********************************************************************************************************************
15 vector<string> ChimeraPintailCommand::setParameters(){
17 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
18 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
19 CommandParameter pconservation("conservation", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pconservation);
20 CommandParameter pquantile("quantile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pquantile);
21 CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfilter);
22 CommandParameter pwindow("window", "Number", "", "0", "", "", "",false,false); parameters.push_back(pwindow);
23 CommandParameter pincrement("increment", "Number", "", "25", "", "", "",false,false); parameters.push_back(pincrement);
24 CommandParameter pmask("mask", "String", "", "", "", "", "",false,false); parameters.push_back(pmask);
25 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28 CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "ChimeraPintailCommand", "setParameters");
39 //**********************************************************************************************************************
40 string ChimeraPintailCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The chimera.pintail command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
44 helpString += "This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n";
45 helpString += "The chimera.pintail command parameters are fasta, reference, filter, mask, processors, window, increment, conservation and quantile.\n";
46 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
47 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
48 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n";
49 helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
50 helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied. You can apply an ecoli mask by typing, mask=default. \n";
51 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
53 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
55 helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
56 helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
57 helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
58 helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
59 helpString += "The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n";
60 helpString += "The chimera.pintail command should be in the following format: \n";
61 helpString += "chimera.pintail(fasta=yourFastaFile, reference=yourTemplate) \n";
62 helpString += "Example: chimera.pintail(fasta=AD.align, reference=silva.bacteria.fasta) \n";
63 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
67 m->errorOut(e, "ChimeraPintailCommand", "getHelpString");
71 //**********************************************************************************************************************
72 ChimeraPintailCommand::ChimeraPintailCommand(){
74 abort = true; calledHelp = true;
76 vector<string> tempOutNames;
77 outputTypes["chimera"] = tempOutNames;
78 outputTypes["accnos"] = tempOutNames;
81 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
85 //***************************************************************************************************************
86 ChimeraPintailCommand::ChimeraPintailCommand(string option) {
88 abort = false; calledHelp = false;
89 rdb = ReferenceDB::getInstance();
91 //allow user to run help
92 if(option == "help") { help(); abort = true; calledHelp = true; }
93 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
96 vector<string> myArray = setParameters();
98 OptionParser parser(option);
99 map<string,string> parameters = parser.getParameters();
101 ValidParameters validParameter("chimera.pintail");
102 map<string,string>::iterator it;
104 //check to make sure all parameters are valid for command
105 for (it = parameters.begin(); it != parameters.end(); it++) {
106 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
109 vector<string> tempOutNames;
110 outputTypes["chimera"] = tempOutNames;
111 outputTypes["accnos"] = tempOutNames;
113 //if the user changes the input directory command factory will send this info to us in the output parameter
114 inputDir = validParameter.validFile(parameters, "inputdir", false);
115 if (inputDir == "not found"){ inputDir = ""; }
118 it = parameters.find("reference");
119 //user has given a template file
120 if(it != parameters.end()){
121 path = m->hasPath(it->second);
122 //if the user has not given a path then, add inputdir. else leave path alone.
123 if (path == "") { parameters["reference"] = inputDir + it->second; }
126 it = parameters.find("conservation");
127 //user has given a template file
128 if(it != parameters.end()){
129 path = m->hasPath(it->second);
130 //if the user has not given a path then, add inputdir. else leave path alone.
131 if (path == "") { parameters["conservation"] = inputDir + it->second; }
134 it = parameters.find("quantile");
135 //user has given a template file
136 if(it != parameters.end()){
137 path = m->hasPath(it->second);
138 //if the user has not given a path then, add inputdir. else leave path alone.
139 if (path == "") { parameters["quantile"] = inputDir + it->second; }
144 //check for required parameters
145 fastafile = validParameter.validFile(parameters, "fasta", false);
146 if (fastafile == "not found") {
147 //if there is a current fasta file, use it
148 string filename = m->getFastaFile();
149 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
150 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
152 m->splitAtDash(fastafile, fastaFileNames);
154 //go through files and make sure they are good, if not, then disregard them
155 for (int i = 0; i < fastaFileNames.size(); i++) {
158 if (fastaFileNames[i] == "current") {
159 fastaFileNames[i] = m->getFastaFile();
160 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
162 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
163 //erase from file list
164 fastaFileNames.erase(fastaFileNames.begin()+i);
171 if (inputDir != "") {
172 string path = m->hasPath(fastaFileNames[i]);
173 //if the user has not given a path then, add inputdir. else leave path alone.
174 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
180 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
182 //if you can't open it, try default location
183 if (ableToOpen == 1) {
184 if (m->getDefaultPath() != "") { //default path is set
185 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
186 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
188 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
190 fastaFileNames[i] = tryPath;
194 if (ableToOpen == 1) {
195 if (m->getOutputDir() != "") { //default path is set
196 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
197 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
199 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
201 fastaFileNames[i] = tryPath;
207 if (ableToOpen == 1) {
208 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
209 //erase from file list
210 fastaFileNames.erase(fastaFileNames.begin()+i);
213 m->setFastaFile(fastaFileNames[i]);
218 //make sure there is at least one valid file left
219 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
223 temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; }
224 filter = m->isTrue(temp);
226 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
227 m->setProcessors(temp);
228 m->mothurConvert(temp, processors);
230 temp = validParameter.validFile(parameters, "window", false); if (temp == "not found") { temp = "0"; }
231 m->mothurConvert(temp, window);
233 temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "25"; }
234 m->mothurConvert(temp, increment);
236 temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
237 save = m->isTrue(temp);
239 if (save) { //clear out old references
243 //this has to go after save so that if the user sets save=t and provides no reference we abort
244 templatefile = validParameter.validFile(parameters, "reference", true);
245 if (templatefile == "not found") {
246 //check for saved reference sequences
247 if (rdb->referenceSeqs.size() != 0) {
248 templatefile = "saved";
250 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
251 m->mothurOutEndLine();
254 }else if (templatefile == "not open") { abort = true; }
255 else { if (save) { rdb->setSavedReference(templatefile); } }
258 maskfile = validParameter.validFile(parameters, "mask", false);
259 if (maskfile == "not found") { maskfile = ""; }
260 else if (maskfile != "default") {
261 if (inputDir != "") {
262 string path = m->hasPath(maskfile);
263 //if the user has not given a path then, add inputdir. else leave path alone.
264 if (path == "") { maskfile = inputDir + maskfile; }
268 int ableToOpen = m->openInputFile(maskfile, in, "no error");
269 if (ableToOpen == 1) {
270 if (m->getDefaultPath() != "") { //default path is set
271 string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
272 m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
274 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
280 if (ableToOpen == 1) {
281 if (m->getOutputDir() != "") { //default path is set
282 string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
283 m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
285 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
293 if (ableToOpen == 1) {
294 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine();
300 //if the user changes the output directory command factory will send this info to us in the output parameter
301 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
303 consfile = validParameter.validFile(parameters, "conservation", true);
304 if (consfile == "not open") { abort = true; }
305 else if (consfile == "not found") {
308 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
309 ifstream FileTest(tempConsFile.c_str());
311 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
313 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest.close();
316 string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
317 ifstream FileTest2(tempConsFile.c_str());
319 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
321 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest2.close();
327 quanfile = validParameter.validFile(parameters, "quantile", true);
328 if (quanfile == "not open") { abort = true; }
329 else if (quanfile == "not found") { quanfile = ""; }
332 catch(exception& e) {
333 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
337 //***************************************************************************************************************
339 int ChimeraPintailCommand::execute(){
342 if (abort == true) { if (calledHelp) { return 0; } return 2; }
344 for (int s = 0; s < fastaFileNames.size(); s++) {
346 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
348 int start = time(NULL);
351 if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine(); }
353 //check for quantile to save the time
354 string baseName = templatefile;
355 if (templatefile == "saved") { baseName = rdb->getSavedReference(); }
357 string tempQuan = "";
358 if ((!filter) && (maskfile == "")) {
359 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
360 }else if ((!filter) && (maskfile != "")) {
361 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
362 }else if ((filter) && (maskfile != "")) {
363 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
364 }else if ((filter) && (maskfile == "")) {
365 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
368 ifstream FileTest(tempQuan.c_str());
370 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
372 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest.close();
375 string tryPath = m->getDefaultPath();
376 string tempQuan = "";
377 if ((!filter) && (maskfile == "")) {
378 tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
379 }else if ((!filter) && (maskfile != "")) {
380 tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
381 }else if ((filter) && (maskfile != "")) {
382 tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
383 }else if ((filter) && (maskfile == "")) {
384 tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
387 ifstream FileTest2(tempQuan.c_str());
389 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
391 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest2.close();
395 chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
397 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
398 string outputFileName, accnosFileName;
399 if (maskfile != "") {
400 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
401 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
403 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "pintail.chimeras";
404 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "pintail.accnos";
407 if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
409 if (chimera->getUnaligned()) {
410 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine();
414 templateSeqsLength = chimera->getLength();
417 int pid, numSeqsPerProcessor;
419 vector<unsigned long long> MPIPos;
422 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
423 MPI_Comm_size(MPI_COMM_WORLD, &processors);
427 MPI_File outMPIAccnos;
429 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
430 int inMode=MPI_MODE_RDONLY;
432 char outFilename[1024];
433 strcpy(outFilename, outputFileName.c_str());
435 char outAccnosFilename[1024];
436 strcpy(outAccnosFilename, accnosFileName.c_str());
438 char inFileName[1024];
439 strcpy(inFileName, fastaFileNames[s].c_str());
441 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
442 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
443 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
445 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } delete chimera; return 0; }
447 if (pid == 0) { //you are the root process
449 MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
451 //send file positions to all processes
452 for(int i = 1; i < processors; i++) {
453 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
454 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
457 //figure out how many sequences you have to align
458 numSeqsPerProcessor = numSeqs / processors;
459 int startIndex = pid * numSeqsPerProcessor;
460 if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
463 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
465 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } delete chimera; return 0; }
467 }else{ //you are a child process
468 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
469 MPIPos.resize(numSeqs+1);
470 MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
472 //figure out how many sequences you have to align
473 numSeqsPerProcessor = numSeqs / processors;
474 int startIndex = pid * numSeqsPerProcessor;
475 if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
478 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
480 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } delete chimera; return 0; }
484 MPI_File_close(&inMPI);
485 MPI_File_close(&outMPI);
486 MPI_File_close(&outMPIAccnos);
487 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
491 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
492 vector<unsigned long long> positions = m->divideFile(fastaFileNames[s], processors);
494 for (int i = 0; i < (positions.size()-1); i++) {
495 lines.push_back(new linePair(positions[i], positions[(i+1)]));
500 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
502 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
505 processIDS.resize(0);
507 numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName);
509 rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
510 rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
512 //append output files
513 for(int i=1;i<processors;i++){
514 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
515 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
518 //append output files
519 for(int i=1;i<processors;i++){
520 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
521 m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp"));
524 if (m->control_pressed) {
525 m->mothurRemove(outputFileName);
526 m->mothurRemove(accnosFileName);
527 for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear();
528 for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
535 lines.push_back(new linePair(0, 1000));
536 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
538 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
544 for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
546 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
547 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
549 m->mothurOutEndLine();
550 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
553 //set accnos file as new current accnosfile
555 itTypes = outputTypes.find("accnos");
556 if (itTypes != outputTypes.end()) {
557 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
560 m->mothurOutEndLine();
561 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
562 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
563 m->mothurOutEndLine();
568 catch(exception& e) {
569 m->errorOut(e, "ChimeraPintailCommand", "execute");
573 //**********************************************************************************************************************
575 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
578 m->openOutputFile(outputFName, out);
581 m->openOutputFile(accnos, out2);
584 m->openInputFile(filename, inFASTA);
586 inFASTA.seekg(filePos->start);
593 if (m->control_pressed) { return 1; }
595 Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA);
597 if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
599 if (candidateSeq->getAligned().length() != templateSeqsLength) { //chimeracheck does not require seqs to be aligned
600 m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
603 chimera->getChimeras(candidateSeq);
605 if (m->control_pressed) { delete candidateSeq; return 1; }
608 chimera->print(out, out2);
614 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
615 unsigned long long pos = inFASTA.tellg();
616 if ((pos == -1) || (pos >= filePos->end)) { break; }
618 if (inFASTA.eof()) { break; }
622 if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
625 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
633 catch(exception& e) {
634 m->errorOut(e, "ChimeraPintailCommand", "driver");
638 //**********************************************************************************************************************
640 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long long>& MPIPos){
645 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
647 for(int i=0;i<num;i++){
649 if (m->control_pressed) { return 1; }
652 int length = MPIPos[start+i+1] - MPIPos[start+i];
654 char* buf4 = new char[length];
655 MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
657 string tempBuf = buf4;
658 if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
659 istringstream iss (tempBuf,istringstream::in);
662 Sequence* candidateSeq = new Sequence(iss); m->gobble(iss);
664 if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
666 if (candidateSeq->getAligned().length() != templateSeqsLength) { //chimeracheck does not require seqs to be aligned
667 m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
670 chimera->getChimeras(candidateSeq);
672 if (m->control_pressed) { delete candidateSeq; return 1; }
675 chimera->print(outMPI, outAccMPI);
681 if((i+1) % 100 == 0){ cout << "Processing sequence: " << (i+1) << endl; m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n"); }
684 if(num % 100 != 0){ cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n"); }
689 catch(exception& e) {
690 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
696 /**************************************************************************************************/
698 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
700 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
704 //loop through and create all the processes you want
705 while (process != processors) {
709 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
712 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
714 //pass numSeqs to parent
716 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
717 m->openOutputFile(tempFile, out);
723 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
724 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
729 //force parent to wait until all the processes are done
730 for (int i=0;i<processors;i++) {
731 int temp = processIDS[i];
735 for (int i = 0; i < processIDS.size(); i++) {
737 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
738 m->openInputFile(tempFile, in);
739 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
740 in.close(); m->mothurRemove(tempFile);
746 catch(exception& e) {
747 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
752 /**************************************************************************************************/