2 * chimerapintailcommand.cpp
5 * Created by westcott on 4/1/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "chimerapintailcommand.h"
14 //**********************************************************************************************************************
15 vector<string> ChimeraPintailCommand::setParameters(){
17 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
18 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
19 CommandParameter pconservation("conservation", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pconservation);
20 CommandParameter pquantile("quantile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pquantile);
21 CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfilter);
22 CommandParameter pwindow("window", "Number", "", "0", "", "", "",false,false); parameters.push_back(pwindow);
23 CommandParameter pincrement("increment", "Number", "", "25", "", "", "",false,false); parameters.push_back(pincrement);
24 CommandParameter pmask("mask", "String", "", "", "", "", "",false,false); parameters.push_back(pmask);
25 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28 CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "ChimeraPintailCommand", "setParameters");
39 //**********************************************************************************************************************
40 string ChimeraPintailCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The chimera.pintail command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
44 helpString += "This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n";
45 helpString += "The chimera.pintail command parameters are fasta, reference, filter, mask, processors, window, increment, conservation and quantile.\n";
46 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
47 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
48 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n";
49 helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
50 helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied. You can apply an ecoli mask by typing, mask=default. \n";
51 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
53 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
55 helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
56 helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
57 helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
58 helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
59 helpString += "The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n";
60 helpString += "The chimera.pintail command should be in the following format: \n";
61 helpString += "chimera.pintail(fasta=yourFastaFile, reference=yourTemplate) \n";
62 helpString += "Example: chimera.pintail(fasta=AD.align, reference=silva.bacteria.fasta) \n";
63 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
67 m->errorOut(e, "ChimeraPintailCommand", "getHelpString");
71 //**********************************************************************************************************************
72 string ChimeraPintailCommand::getOutputFileNameTag(string type, string inputName=""){
74 string outputFileName = "";
75 map<string, vector<string> >::iterator it;
77 //is this a type this command creates
78 it = outputTypes.find(type);
79 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
81 if (type == "chimera") { outputFileName = "pintail.chimeras"; }
82 else if (type == "accnos") { outputFileName = "pintail.accnos"; }
83 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
85 return outputFileName;
88 m->errorOut(e, "ChimeraPintailCommand", "getOutputFileNameTag");
92 //**********************************************************************************************************************
93 ChimeraPintailCommand::ChimeraPintailCommand(){
95 abort = true; calledHelp = true;
97 vector<string> tempOutNames;
98 outputTypes["chimera"] = tempOutNames;
99 outputTypes["accnos"] = tempOutNames;
101 catch(exception& e) {
102 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
106 //***************************************************************************************************************
107 ChimeraPintailCommand::ChimeraPintailCommand(string option) {
109 abort = false; calledHelp = false;
110 rdb = ReferenceDB::getInstance();
112 //allow user to run help
113 if(option == "help") { help(); abort = true; calledHelp = true; }
114 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
117 vector<string> myArray = setParameters();
119 OptionParser parser(option);
120 map<string,string> parameters = parser.getParameters();
122 ValidParameters validParameter("chimera.pintail");
123 map<string,string>::iterator it;
125 //check to make sure all parameters are valid for command
126 for (it = parameters.begin(); it != parameters.end(); it++) {
127 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
130 vector<string> tempOutNames;
131 outputTypes["chimera"] = tempOutNames;
132 outputTypes["accnos"] = tempOutNames;
134 //if the user changes the input directory command factory will send this info to us in the output parameter
135 inputDir = validParameter.validFile(parameters, "inputdir", false);
136 if (inputDir == "not found"){ inputDir = ""; }
139 it = parameters.find("reference");
140 //user has given a template file
141 if(it != parameters.end()){
142 path = m->hasPath(it->second);
143 //if the user has not given a path then, add inputdir. else leave path alone.
144 if (path == "") { parameters["reference"] = inputDir + it->second; }
147 it = parameters.find("conservation");
148 //user has given a template file
149 if(it != parameters.end()){
150 path = m->hasPath(it->second);
151 //if the user has not given a path then, add inputdir. else leave path alone.
152 if (path == "") { parameters["conservation"] = inputDir + it->second; }
155 it = parameters.find("quantile");
156 //user has given a template file
157 if(it != parameters.end()){
158 path = m->hasPath(it->second);
159 //if the user has not given a path then, add inputdir. else leave path alone.
160 if (path == "") { parameters["quantile"] = inputDir + it->second; }
165 //check for required parameters
166 fastafile = validParameter.validFile(parameters, "fasta", false);
167 if (fastafile == "not found") {
168 //if there is a current fasta file, use it
169 string filename = m->getFastaFile();
170 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
171 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
173 m->splitAtDash(fastafile, fastaFileNames);
175 //go through files and make sure they are good, if not, then disregard them
176 for (int i = 0; i < fastaFileNames.size(); i++) {
179 if (fastaFileNames[i] == "current") {
180 fastaFileNames[i] = m->getFastaFile();
181 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
183 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
184 //erase from file list
185 fastaFileNames.erase(fastaFileNames.begin()+i);
192 if (inputDir != "") {
193 string path = m->hasPath(fastaFileNames[i]);
194 //if the user has not given a path then, add inputdir. else leave path alone.
195 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
201 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
203 //if you can't open it, try default location
204 if (ableToOpen == 1) {
205 if (m->getDefaultPath() != "") { //default path is set
206 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
207 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
209 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
211 fastaFileNames[i] = tryPath;
215 if (ableToOpen == 1) {
216 if (m->getOutputDir() != "") { //default path is set
217 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
218 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
220 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
222 fastaFileNames[i] = tryPath;
228 if (ableToOpen == 1) {
229 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
230 //erase from file list
231 fastaFileNames.erase(fastaFileNames.begin()+i);
234 m->setFastaFile(fastaFileNames[i]);
239 //make sure there is at least one valid file left
240 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
244 temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; }
245 filter = m->isTrue(temp);
247 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
248 m->setProcessors(temp);
249 m->mothurConvert(temp, processors);
251 temp = validParameter.validFile(parameters, "window", false); if (temp == "not found") { temp = "0"; }
252 m->mothurConvert(temp, window);
254 temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "25"; }
255 m->mothurConvert(temp, increment);
257 temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
258 save = m->isTrue(temp);
260 if (save) { //clear out old references
264 //this has to go after save so that if the user sets save=t and provides no reference we abort
265 templatefile = validParameter.validFile(parameters, "reference", true);
266 if (templatefile == "not found") {
267 //check for saved reference sequences
268 if (rdb->referenceSeqs.size() != 0) {
269 templatefile = "saved";
271 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
272 m->mothurOutEndLine();
275 }else if (templatefile == "not open") { abort = true; }
276 else { if (save) { rdb->setSavedReference(templatefile); } }
279 maskfile = validParameter.validFile(parameters, "mask", false);
280 if (maskfile == "not found") { maskfile = ""; }
281 else if (maskfile != "default") {
282 if (inputDir != "") {
283 string path = m->hasPath(maskfile);
284 //if the user has not given a path then, add inputdir. else leave path alone.
285 if (path == "") { maskfile = inputDir + maskfile; }
289 int ableToOpen = m->openInputFile(maskfile, in, "no error");
290 if (ableToOpen == 1) {
291 if (m->getDefaultPath() != "") { //default path is set
292 string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
293 m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
295 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
301 if (ableToOpen == 1) {
302 if (m->getOutputDir() != "") { //default path is set
303 string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
304 m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
306 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
314 if (ableToOpen == 1) {
315 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine();
321 //if the user changes the output directory command factory will send this info to us in the output parameter
322 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
324 consfile = validParameter.validFile(parameters, "conservation", true);
325 if (consfile == "not open") { abort = true; }
326 else if (consfile == "not found") {
329 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
330 ifstream FileTest(tempConsFile.c_str());
332 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
334 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest.close();
337 string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
338 ifstream FileTest2(tempConsFile.c_str());
340 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
342 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest2.close();
348 quanfile = validParameter.validFile(parameters, "quantile", true);
349 if (quanfile == "not open") { abort = true; }
350 else if (quanfile == "not found") { quanfile = ""; }
353 catch(exception& e) {
354 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
358 //***************************************************************************************************************
360 int ChimeraPintailCommand::execute(){
363 if (abort == true) { if (calledHelp) { return 0; } return 2; }
365 for (int s = 0; s < fastaFileNames.size(); s++) {
367 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
369 int start = time(NULL);
372 if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine(); }
374 //check for quantile to save the time
375 string baseName = templatefile;
376 if (templatefile == "saved") { baseName = rdb->getSavedReference(); }
378 string tempQuan = "";
379 if ((!filter) && (maskfile == "")) {
380 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
381 }else if ((!filter) && (maskfile != "")) {
382 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
383 }else if ((filter) && (maskfile != "")) {
384 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
385 }else if ((filter) && (maskfile == "")) {
386 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
389 ifstream FileTest(tempQuan.c_str());
391 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
393 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest.close();
396 string tryPath = m->getDefaultPath();
397 string tempQuan = "";
398 if ((!filter) && (maskfile == "")) {
399 tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
400 }else if ((!filter) && (maskfile != "")) {
401 tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
402 }else if ((filter) && (maskfile != "")) {
403 tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
404 }else if ((filter) && (maskfile == "")) {
405 tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
408 ifstream FileTest2(tempQuan.c_str());
410 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
412 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest2.close();
416 chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
418 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
419 string outputFileName, accnosFileName;
420 if (maskfile != "") {
421 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + getOutputFileNameTag("chimera");
422 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + getOutputFileNameTag("accnos");
424 outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("chimera");
425 accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + getOutputFileNameTag("accnos");
428 if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
430 if (chimera->getUnaligned()) {
431 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine();
435 templateSeqsLength = chimera->getLength();
438 int pid, numSeqsPerProcessor;
440 vector<unsigned long long> MPIPos;
443 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
444 MPI_Comm_size(MPI_COMM_WORLD, &processors);
448 MPI_File outMPIAccnos;
450 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
451 int inMode=MPI_MODE_RDONLY;
453 char outFilename[1024];
454 strcpy(outFilename, outputFileName.c_str());
456 char outAccnosFilename[1024];
457 strcpy(outAccnosFilename, accnosFileName.c_str());
459 char inFileName[1024];
460 strcpy(inFileName, fastaFileNames[s].c_str());
462 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
463 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
464 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
466 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } delete chimera; return 0; }
468 if (pid == 0) { //you are the root process
470 MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
472 //send file positions to all processes
473 for(int i = 1; i < processors; i++) {
474 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
475 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
478 //figure out how many sequences you have to align
479 numSeqsPerProcessor = numSeqs / processors;
480 int startIndex = pid * numSeqsPerProcessor;
481 if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
484 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
486 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } delete chimera; return 0; }
488 }else{ //you are a child process
489 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
490 MPIPos.resize(numSeqs+1);
491 MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
493 //figure out how many sequences you have to align
494 numSeqsPerProcessor = numSeqs / processors;
495 int startIndex = pid * numSeqsPerProcessor;
496 if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
499 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
501 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } delete chimera; return 0; }
505 MPI_File_close(&inMPI);
506 MPI_File_close(&outMPI);
507 MPI_File_close(&outMPIAccnos);
508 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
512 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
513 vector<unsigned long long> positions = m->divideFile(fastaFileNames[s], processors);
515 for (int i = 0; i < (positions.size()-1); i++) {
516 lines.push_back(new linePair(positions[i], positions[(i+1)]));
521 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
523 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
526 processIDS.resize(0);
528 numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName);
530 rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
531 rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
533 //append output files
534 for(int i=1;i<processors;i++){
535 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
536 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
539 //append output files
540 for(int i=1;i<processors;i++){
541 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
542 m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp"));
545 if (m->control_pressed) {
546 m->mothurRemove(outputFileName);
547 m->mothurRemove(accnosFileName);
548 for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear();
549 for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
556 lines.push_back(new linePair(0, 1000));
557 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
559 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
565 for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
567 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
568 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
570 m->mothurOutEndLine();
571 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
574 //set accnos file as new current accnosfile
576 itTypes = outputTypes.find("accnos");
577 if (itTypes != outputTypes.end()) {
578 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
581 m->mothurOutEndLine();
582 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
583 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
584 m->mothurOutEndLine();
589 catch(exception& e) {
590 m->errorOut(e, "ChimeraPintailCommand", "execute");
594 //**********************************************************************************************************************
596 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
599 m->openOutputFile(outputFName, out);
602 m->openOutputFile(accnos, out2);
605 m->openInputFile(filename, inFASTA);
607 inFASTA.seekg(filePos->start);
614 if (m->control_pressed) { return 1; }
616 Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA);
618 if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
620 if (candidateSeq->getAligned().length() != templateSeqsLength) { //chimeracheck does not require seqs to be aligned
621 m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
624 chimera->getChimeras(candidateSeq);
626 if (m->control_pressed) { delete candidateSeq; return 1; }
629 chimera->print(out, out2);
635 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
636 unsigned long long pos = inFASTA.tellg();
637 if ((pos == -1) || (pos >= filePos->end)) { break; }
639 if (inFASTA.eof()) { break; }
643 if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
646 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
654 catch(exception& e) {
655 m->errorOut(e, "ChimeraPintailCommand", "driver");
659 //**********************************************************************************************************************
661 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long long>& MPIPos){
666 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
668 for(int i=0;i<num;i++){
670 if (m->control_pressed) { return 1; }
673 int length = MPIPos[start+i+1] - MPIPos[start+i];
675 char* buf4 = new char[length];
676 MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
678 string tempBuf = buf4;
679 if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
680 istringstream iss (tempBuf,istringstream::in);
683 Sequence* candidateSeq = new Sequence(iss); m->gobble(iss);
685 if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
687 if (candidateSeq->getAligned().length() != templateSeqsLength) { //chimeracheck does not require seqs to be aligned
688 m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
691 chimera->getChimeras(candidateSeq);
693 if (m->control_pressed) { delete candidateSeq; return 1; }
696 chimera->print(outMPI, outAccMPI);
702 if((i+1) % 100 == 0){ cout << "Processing sequence: " << (i+1) << endl; m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n"); }
705 if(num % 100 != 0){ cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n"); }
710 catch(exception& e) {
711 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
717 /**************************************************************************************************/
719 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
721 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
725 //loop through and create all the processes you want
726 while (process != processors) {
730 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
733 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
735 //pass numSeqs to parent
737 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
738 m->openOutputFile(tempFile, out);
744 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
745 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
750 //force parent to wait until all the processes are done
751 for (int i=0;i<processors;i++) {
752 int temp = processIDS[i];
756 for (int i = 0; i < processIDS.size(); i++) {
758 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
759 m->openInputFile(tempFile, in);
760 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
761 in.close(); m->mothurRemove(tempFile);
767 catch(exception& e) {
768 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
773 /**************************************************************************************************/