2 * chimerapintailcommand.cpp
5 * Created by westcott on 4/1/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "chimerapintailcommand.h"
14 //**********************************************************************************************************************
15 vector<string> ChimeraPintailCommand::setParameters(){
17 CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
18 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
19 CommandParameter pconservation("conservation", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pconservation);
20 CommandParameter pquantile("quantile", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pquantile);
21 CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfilter);
22 CommandParameter pwindow("window", "Number", "", "0", "", "", "","","",false,false); parameters.push_back(pwindow);
23 CommandParameter pincrement("increment", "Number", "", "25", "", "", "","",false,false); parameters.push_back(pincrement);
24 CommandParameter pmask("mask", "String", "", "", "", "", "","",false,false); parameters.push_back(pmask);
25 CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
26 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
27 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
28 CommandParameter psave("save", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psave);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "ChimeraPintailCommand", "setParameters");
39 //**********************************************************************************************************************
40 string ChimeraPintailCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The chimera.pintail command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
44 helpString += "This command was created using the algorythms described in the 'At Least 1 in 20 16S rRNA Sequence Records Currently Held in the Public Repositories is Estimated To Contain Substantial Anomalies' paper by Kevin E. Ashelford 1, Nadia A. Chuzhanova 3, John C. Fry 1, Antonia J. Jones 2 and Andrew J. Weightman 1.\n";
45 helpString += "The chimera.pintail command parameters are fasta, reference, filter, mask, processors, window, increment, conservation and quantile.\n";
46 helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
47 helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
48 helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. \n";
49 helpString += "The filter parameter allows you to specify if you would like to apply a vertical and 50% soft filter. \n";
50 helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences, by default no mask is applied. You can apply an ecoli mask by typing, mask=default. \n";
51 helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
53 helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
55 helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
56 helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
57 helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
58 helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
59 helpString += "The quantile parameter allows you to enter a file containing quantiles for a template files sequences, if you use the filter the quantile file generated becomes unique to the fasta file you used.\n";
60 helpString += "The chimera.pintail command should be in the following format: \n";
61 helpString += "chimera.pintail(fasta=yourFastaFile, reference=yourTemplate) \n";
62 helpString += "Example: chimera.pintail(fasta=AD.align, reference=silva.bacteria.fasta) \n";
63 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFastaFile).\n";
67 m->errorOut(e, "ChimeraPintailCommand", "getHelpString");
71 //**********************************************************************************************************************
72 string ChimeraPintailCommand::getOutputPattern(string type) {
76 if (type == "chimera") { pattern = "[filename],[tag],pintail.chimeras-[filename],pintail.chimeras"; }
77 else if (type == "accnos") { pattern = "[filename],[tag],pintail.accnos-[filename],pintail.accnos"; }
78 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
83 m->errorOut(e, "ChimeraPintailCommand", "getOutputPattern");
87 //**********************************************************************************************************************
88 ChimeraPintailCommand::ChimeraPintailCommand(){
90 abort = true; calledHelp = true;
92 vector<string> tempOutNames;
93 outputTypes["chimera"] = tempOutNames;
94 outputTypes["accnos"] = tempOutNames;
98 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
102 //***************************************************************************************************************
103 ChimeraPintailCommand::ChimeraPintailCommand(string option) {
105 abort = false; calledHelp = false;
106 rdb = ReferenceDB::getInstance();
108 //allow user to run help
109 if(option == "help") { help(); abort = true; calledHelp = true; }
110 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
113 vector<string> myArray = setParameters();
115 OptionParser parser(option);
116 map<string,string> parameters = parser.getParameters();
118 ValidParameters validParameter("chimera.pintail");
119 map<string,string>::iterator it;
121 //check to make sure all parameters are valid for command
122 for (it = parameters.begin(); it != parameters.end(); it++) {
123 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
126 vector<string> tempOutNames;
127 outputTypes["chimera"] = tempOutNames;
128 outputTypes["accnos"] = tempOutNames;
131 //if the user changes the input directory command factory will send this info to us in the output parameter
132 inputDir = validParameter.validFile(parameters, "inputdir", false);
133 if (inputDir == "not found"){ inputDir = ""; }
136 it = parameters.find("reference");
137 //user has given a template file
138 if(it != parameters.end()){
139 path = m->hasPath(it->second);
140 //if the user has not given a path then, add inputdir. else leave path alone.
141 if (path == "") { parameters["reference"] = inputDir + it->second; }
144 it = parameters.find("conservation");
145 //user has given a template file
146 if(it != parameters.end()){
147 path = m->hasPath(it->second);
148 //if the user has not given a path then, add inputdir. else leave path alone.
149 if (path == "") { parameters["conservation"] = inputDir + it->second; }
152 it = parameters.find("quantile");
153 //user has given a template file
154 if(it != parameters.end()){
155 path = m->hasPath(it->second);
156 //if the user has not given a path then, add inputdir. else leave path alone.
157 if (path == "") { parameters["quantile"] = inputDir + it->second; }
162 //check for required parameters
163 fastafile = validParameter.validFile(parameters, "fasta", false);
164 if (fastafile == "not found") {
165 //if there is a current fasta file, use it
166 string filename = m->getFastaFile();
167 if (filename != "") { fastaFileNames.push_back(filename); m->mothurOut("Using " + filename + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
168 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
170 m->splitAtDash(fastafile, fastaFileNames);
172 //go through files and make sure they are good, if not, then disregard them
173 for (int i = 0; i < fastaFileNames.size(); i++) {
176 if (fastaFileNames[i] == "current") {
177 fastaFileNames[i] = m->getFastaFile();
178 if (fastaFileNames[i] != "") { m->mothurOut("Using " + fastaFileNames[i] + " as input file for the fasta parameter where you had given current."); m->mothurOutEndLine(); }
180 m->mothurOut("You have no current fastafile, ignoring current."); m->mothurOutEndLine(); ignore=true;
181 //erase from file list
182 fastaFileNames.erase(fastaFileNames.begin()+i);
189 if (inputDir != "") {
190 string path = m->hasPath(fastaFileNames[i]);
191 //if the user has not given a path then, add inputdir. else leave path alone.
192 if (path == "") { fastaFileNames[i] = inputDir + fastaFileNames[i]; }
198 ableToOpen = m->openInputFile(fastaFileNames[i], in, "noerror");
200 //if you can't open it, try default location
201 if (ableToOpen == 1) {
202 if (m->getDefaultPath() != "") { //default path is set
203 string tryPath = m->getDefaultPath() + m->getSimpleName(fastaFileNames[i]);
204 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
206 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
208 fastaFileNames[i] = tryPath;
212 if (ableToOpen == 1) {
213 if (m->getOutputDir() != "") { //default path is set
214 string tryPath = m->getOutputDir() + m->getSimpleName(fastaFileNames[i]);
215 m->mothurOut("Unable to open " + fastaFileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
217 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
219 fastaFileNames[i] = tryPath;
225 if (ableToOpen == 1) {
226 m->mothurOut("Unable to open " + fastaFileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
227 //erase from file list
228 fastaFileNames.erase(fastaFileNames.begin()+i);
231 m->setFastaFile(fastaFileNames[i]);
236 //make sure there is at least one valid file left
237 if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
241 temp = validParameter.validFile(parameters, "filter", false); if (temp == "not found") { temp = "F"; }
242 filter = m->isTrue(temp);
244 temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
245 m->setProcessors(temp);
246 m->mothurConvert(temp, processors);
248 temp = validParameter.validFile(parameters, "window", false); if (temp == "not found") { temp = "0"; }
249 m->mothurConvert(temp, window);
251 temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "25"; }
252 m->mothurConvert(temp, increment);
254 temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
255 save = m->isTrue(temp);
257 if (save) { //clear out old references
261 //this has to go after save so that if the user sets save=t and provides no reference we abort
262 templatefile = validParameter.validFile(parameters, "reference", true);
263 if (templatefile == "not found") {
264 //check for saved reference sequences
265 if (rdb->referenceSeqs.size() != 0) {
266 templatefile = "saved";
268 m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
269 m->mothurOutEndLine();
272 }else if (templatefile == "not open") { abort = true; }
273 else { if (save) { rdb->setSavedReference(templatefile); } }
276 maskfile = validParameter.validFile(parameters, "mask", false);
277 if (maskfile == "not found") { maskfile = ""; }
278 else if (maskfile != "default") {
279 if (inputDir != "") {
280 string path = m->hasPath(maskfile);
281 //if the user has not given a path then, add inputdir. else leave path alone.
282 if (path == "") { maskfile = inputDir + maskfile; }
286 int ableToOpen = m->openInputFile(maskfile, in, "no error");
287 if (ableToOpen == 1) {
288 if (m->getDefaultPath() != "") { //default path is set
289 string tryPath = m->getDefaultPath() + m->getSimpleName(maskfile);
290 m->mothurOut("Unable to open " + maskfile + ". Trying default " + tryPath); m->mothurOutEndLine();
292 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
298 if (ableToOpen == 1) {
299 if (m->getOutputDir() != "") { //default path is set
300 string tryPath = m->getOutputDir() + m->getSimpleName(maskfile);
301 m->mothurOut("Unable to open " + maskfile + ". Trying output directory " + tryPath); m->mothurOutEndLine();
303 ableToOpen = m->openInputFile(tryPath, in2, "noerror");
311 if (ableToOpen == 1) {
312 m->mothurOut("Unable to open " + maskfile + "."); m->mothurOutEndLine();
318 //if the user changes the output directory command factory will send this info to us in the output parameter
319 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
321 consfile = validParameter.validFile(parameters, "conservation", true);
322 if (consfile == "not open") { abort = true; }
323 else if (consfile == "not found") {
326 string tempConsFile = m->getRootName(inputDir + m->getSimpleName(templatefile)) + "freq";
327 ifstream FileTest(tempConsFile.c_str());
329 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
331 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest.close();
334 string tempConsFile = m->getDefaultPath() + m->getRootName(m->getSimpleName(templatefile)) + "freq";
335 ifstream FileTest2(tempConsFile.c_str());
337 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
339 m->mothurOut("I found " + tempConsFile + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); consfile = tempConsFile; FileTest2.close();
345 quanfile = validParameter.validFile(parameters, "quantile", true);
346 if (quanfile == "not open") { abort = true; }
347 else if (quanfile == "not found") { quanfile = ""; }
350 catch(exception& e) {
351 m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
355 //***************************************************************************************************************
357 int ChimeraPintailCommand::execute(){
360 if (abort == true) { if (calledHelp) { return 0; } return 2; }
362 for (int s = 0; s < fastaFileNames.size(); s++) {
364 m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
366 int start = time(NULL);
369 if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine(); }
371 //check for quantile to save the time
372 string baseName = templatefile;
373 if (templatefile == "saved") { baseName = rdb->getSavedReference(); }
375 string tempQuan = "";
376 if ((!filter) && (maskfile == "")) {
377 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
378 }else if ((!filter) && (maskfile != "")) {
379 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
380 }else if ((filter) && (maskfile != "")) {
381 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
382 }else if ((filter) && (maskfile == "")) {
383 tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
386 ifstream FileTest(tempQuan.c_str());
388 bool GoodFile = m->checkReleaseVersion(FileTest, m->getVersion());
390 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest.close();
393 string tryPath = m->getDefaultPath();
394 string tempQuan = "";
395 if ((!filter) && (maskfile == "")) {
396 tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
397 }else if ((!filter) && (maskfile != "")) {
398 tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
399 }else if ((filter) && (maskfile != "")) {
400 tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
401 }else if ((filter) && (maskfile == "")) {
402 tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
405 ifstream FileTest2(tempQuan.c_str());
407 bool GoodFile = m->checkReleaseVersion(FileTest2, m->getVersion());
409 m->mothurOut("I found " + tempQuan + " in your input file directory. I will use it to save time."); m->mothurOutEndLine(); quanfile = tempQuan; FileTest2.close();
413 chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
415 if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
416 string outputFileName, accnosFileName;
417 map<string, string> variables;
418 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
419 if (maskfile != "") { variables["[tag]"] = m->getSimpleName(m->getRootName(maskfile)); }
420 outputFileName = getOutputFileName("chimera", variables);
421 accnosFileName = getOutputFileName("accnos", variables);
424 if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
426 if (chimera->getUnaligned()) {
427 m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine();
431 templateSeqsLength = chimera->getLength();
434 int pid, numSeqsPerProcessor;
436 vector<unsigned long long> MPIPos;
439 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
440 MPI_Comm_size(MPI_COMM_WORLD, &processors);
444 MPI_File outMPIAccnos;
446 int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
447 int inMode=MPI_MODE_RDONLY;
449 char outFilename[1024];
450 strcpy(outFilename, outputFileName.c_str());
452 char outAccnosFilename[1024];
453 strcpy(outAccnosFilename, accnosFileName.c_str());
455 char inFileName[1024];
456 strcpy(inFileName, fastaFileNames[s].c_str());
458 MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
459 MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
460 MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
462 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } delete chimera; return 0; }
464 if (pid == 0) { //you are the root process
466 MPIPos = m->setFilePosFasta(fastaFileNames[s], numSeqs); //fills MPIPos, returns numSeqs
468 //send file positions to all processes
469 for(int i = 1; i < processors; i++) {
470 MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
471 MPI_Send(&MPIPos[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
474 //figure out how many sequences you have to align
475 numSeqsPerProcessor = numSeqs / processors;
476 int startIndex = pid * numSeqsPerProcessor;
477 if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
480 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
482 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } delete chimera; return 0; }
484 }else{ //you are a child process
485 MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
486 MPIPos.resize(numSeqs+1);
487 MPI_Recv(&MPIPos[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
489 //figure out how many sequences you have to align
490 numSeqsPerProcessor = numSeqs / processors;
491 int startIndex = pid * numSeqsPerProcessor;
492 if(pid == (processors - 1)){ numSeqsPerProcessor = numSeqs - pid * numSeqsPerProcessor; }
495 driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
497 if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } delete chimera; return 0; }
501 MPI_File_close(&inMPI);
502 MPI_File_close(&outMPI);
503 MPI_File_close(&outMPIAccnos);
504 MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
508 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
509 vector<unsigned long long> positions = m->divideFile(fastaFileNames[s], processors);
511 for (int i = 0; i < (positions.size()-1); i++) {
512 lines.push_back(new linePair(positions[i], positions[(i+1)]));
517 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
519 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
522 processIDS.resize(0);
524 numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName);
526 rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str());
527 rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str());
529 //append output files
530 for(int i=1;i<processors;i++){
531 m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
532 m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
535 //append output files
536 for(int i=1;i<processors;i++){
537 m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
538 m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp"));
541 if (m->control_pressed) {
542 m->mothurRemove(outputFileName);
543 m->mothurRemove(accnosFileName);
544 for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear();
545 for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
552 lines.push_back(new linePair(0, 1000));
553 numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
555 if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
561 for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
563 outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
564 outputNames.push_back(accnosFileName); outputTypes["accnos"].push_back(accnosFileName);
566 m->mothurOutEndLine();
567 m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to check " + toString(numSeqs) + " sequences."); m->mothurOutEndLine();
570 //set accnos file as new current accnosfile
572 itTypes = outputTypes.find("accnos");
573 if (itTypes != outputTypes.end()) {
574 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
577 m->mothurOutEndLine();
578 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
579 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
580 m->mothurOutEndLine();
585 catch(exception& e) {
586 m->errorOut(e, "ChimeraPintailCommand", "execute");
590 //**********************************************************************************************************************
592 int ChimeraPintailCommand::driver(linePair* filePos, string outputFName, string filename, string accnos){
595 m->openOutputFile(outputFName, out);
598 m->openOutputFile(accnos, out2);
601 m->openInputFile(filename, inFASTA);
603 inFASTA.seekg(filePos->start);
610 if (m->control_pressed) { return 1; }
612 Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA);
614 if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
616 if (candidateSeq->getAligned().length() != templateSeqsLength) { //chimeracheck does not require seqs to be aligned
617 m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
620 chimera->getChimeras(candidateSeq);
622 if (m->control_pressed) { delete candidateSeq; return 1; }
625 chimera->print(out, out2);
631 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
632 unsigned long long pos = inFASTA.tellg();
633 if ((pos == -1) || (pos >= filePos->end)) { break; }
635 if (inFASTA.eof()) { break; }
639 if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
642 if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
650 catch(exception& e) {
651 m->errorOut(e, "ChimeraPintailCommand", "driver");
655 //**********************************************************************************************************************
657 int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long long>& MPIPos){
662 MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
664 for(int i=0;i<num;i++){
666 if (m->control_pressed) { return 1; }
669 int length = MPIPos[start+i+1] - MPIPos[start+i];
671 char* buf4 = new char[length];
672 MPI_File_read_at(inMPI, MPIPos[start+i], buf4, length, MPI_CHAR, &status);
674 string tempBuf = buf4;
675 if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
676 istringstream iss (tempBuf,istringstream::in);
679 Sequence* candidateSeq = new Sequence(iss); m->gobble(iss);
681 if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file
683 if (candidateSeq->getAligned().length() != templateSeqsLength) { //chimeracheck does not require seqs to be aligned
684 m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); m->mothurOutEndLine();
687 chimera->getChimeras(candidateSeq);
689 if (m->control_pressed) { delete candidateSeq; return 1; }
692 chimera->print(outMPI, outAccMPI);
698 if((i+1) % 100 == 0){ cout << "Processing sequence: " << (i+1) << endl; m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n"); }
701 if(num % 100 != 0){ cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n"); }
706 catch(exception& e) {
707 m->errorOut(e, "ChimeraPintailCommand", "driverMPI");
713 /**************************************************************************************************/
715 int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
717 #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
721 //loop through and create all the processes you want
722 while (process != processors) {
726 processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
729 num = driver(lines[process], outputFileName + toString(getpid()) + ".temp", filename, accnos + toString(getpid()) + ".temp");
731 //pass numSeqs to parent
733 string tempFile = outputFileName + toString(getpid()) + ".num.temp";
734 m->openOutputFile(tempFile, out);
740 m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
741 for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
746 //force parent to wait until all the processes are done
747 for (int i=0;i<processors;i++) {
748 int temp = processIDS[i];
752 for (int i = 0; i < processIDS.size(); i++) {
754 string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
755 m->openInputFile(tempFile, in);
756 if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
757 in.close(); m->mothurRemove(tempFile);
763 catch(exception& e) {
764 m->errorOut(e, "ChimeraPintailCommand", "createProcesses");
769 /**************************************************************************************************/