#include "chimerapintailcommand.h"
#include "pintail.h"
+
//**********************************************************************************************************************
vector<string> ChimeraPintailCommand::setParameters(){
try {
- CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
- CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
- CommandParameter pconservation("conservation", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pconservation);
- CommandParameter pquantile("quantile", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pquantile);
- CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfilter);
- CommandParameter pwindow("window", "Number", "", "0", "", "", "",false,false); parameters.push_back(pwindow);
- CommandParameter pincrement("increment", "Number", "", "25", "", "", "",false,false); parameters.push_back(pincrement);
- CommandParameter pmask("mask", "String", "", "", "", "", "",false,false); parameters.push_back(pmask);
- CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
- CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
- CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
-
+ CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
+ CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-accnos",false,true,true); parameters.push_back(pfasta);
+ CommandParameter pconservation("conservation", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pconservation);
+ CommandParameter pquantile("quantile", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pquantile);
+ CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfilter);
+ CommandParameter pwindow("window", "Number", "", "0", "", "", "","","",false,false); parameters.push_back(pwindow);
+ CommandParameter pincrement("increment", "Number", "", "25", "", "", "","",false,false); parameters.push_back(pincrement);
+ CommandParameter pmask("mask", "String", "", "", "", "", "","",false,false); parameters.push_back(pmask);
+ CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+ CommandParameter psave("save", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psave);
+
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
return myArray;
#ifdef USE_MPI
helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
#endif
+ helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
}
}
//**********************************************************************************************************************
+string ChimeraPintailCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
+
+ if (type == "chimera") { pattern = "[filename],[tag],pintail.chimeras-[filename],pintail.chimeras"; }
+ else if (type == "accnos") { pattern = "[filename],[tag],pintail.accnos-[filename],pintail.accnos"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraPintailCommand", "getOutputPattern");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
ChimeraPintailCommand::ChimeraPintailCommand(){
try {
abort = true; calledHelp = true;
vector<string> tempOutNames;
outputTypes["chimera"] = tempOutNames;
outputTypes["accnos"] = tempOutNames;
+
}
catch(exception& e) {
m->errorOut(e, "ChimeraPintailCommand", "ChimeraPintailCommand");
ChimeraPintailCommand::ChimeraPintailCommand(string option) {
try {
abort = false; calledHelp = false;
+ rdb = ReferenceDB::getInstance();
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
vector<string> tempOutNames;
outputTypes["chimera"] = tempOutNames;
outputTypes["accnos"] = tempOutNames;
+
//if the user changes the input directory command factory will send this info to us in the output parameter
inputDir = validParameter.validFile(parameters, "inputdir", false);
//erase from file list
fastaFileNames.erase(fastaFileNames.begin()+i);
i--;
+ }else {
+ m->setFastaFile(fastaFileNames[i]);
}
}
}
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
- convert(temp, processors);
+ m->mothurConvert(temp, processors);
temp = validParameter.validFile(parameters, "window", false); if (temp == "not found") { temp = "0"; }
- convert(temp, window);
+ m->mothurConvert(temp, window);
temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "25"; }
- convert(temp, increment);
+ m->mothurConvert(temp, increment);
+
+ temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
+ save = m->isTrue(temp);
+ rdb->save = save;
+ if (save) { //clear out old references
+ rdb->clearMemory();
+ }
+
+ //this has to go after save so that if the user sets save=t and provides no reference we abort
+ templatefile = validParameter.validFile(parameters, "reference", true);
+ if (templatefile == "not found") {
+ //check for saved reference sequences
+ if (rdb->referenceSeqs.size() != 0) {
+ templatefile = "saved";
+ }else {
+ m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
+ m->mothurOutEndLine();
+ abort = true;
+ }
+ }else if (templatefile == "not open") { abort = true; }
+ else { if (save) { rdb->setSavedReference(templatefile); } }
+
maskfile = validParameter.validFile(parameters, "mask", false);
if (maskfile == "not found") { maskfile = ""; }
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
- templatefile = validParameter.validFile(parameters, "reference", true);
- if (templatefile == "not open") { abort = true; }
- else if (templatefile == "not found") { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true; }
-
consfile = validParameter.validFile(parameters, "conservation", true);
if (consfile == "not open") { abort = true; }
else if (consfile == "not found") {
if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine(); }
//check for quantile to save the time
+ string baseName = templatefile;
+ if (templatefile == "saved") { baseName = rdb->getSavedReference(); }
+
string tempQuan = "";
if ((!filter) && (maskfile == "")) {
- tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
+ tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
}else if ((!filter) && (maskfile != "")) {
- tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
+ tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
}else if ((filter) && (maskfile != "")) {
- tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
+ tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
}else if ((filter) && (maskfile == "")) {
- tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
+ tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
}
ifstream FileTest(tempQuan.c_str());
string tryPath = m->getDefaultPath();
string tempQuan = "";
if ((!filter) && (maskfile == "")) {
- tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
+ tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
}else if ((!filter) && (maskfile != "")) {
- tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
+ tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
}else if ((filter) && (maskfile != "")) {
- tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
+ tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
}else if ((filter) && (maskfile == "")) {
- tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
+ tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
}
ifstream FileTest2(tempQuan.c_str());
}
}
}
-
chimera = new Pintail(fastaFileNames[s], templatefile, filter, processors, maskfile, consfile, quanfile, window, increment, outputDir);
if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
string outputFileName, accnosFileName;
- if (maskfile != "") {
- outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.chimeras";
- accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + m->getSimpleName(m->getRootName(maskfile)) + ".pintail.accnos";
- }else {
- outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "pintail.chimeras";
- accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "pintail.accnos";
- }
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
+ if (maskfile != "") { variables["[tag]"] = m->getSimpleName(m->getRootName(maskfile)); }
+ outputFileName = getOutputFileName("chimera", variables);
+ accnosFileName = getOutputFileName("accnos", variables);
+
- if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
+ if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
if (chimera->getUnaligned()) {
m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine();
#ifdef USE_MPI
int pid, numSeqsPerProcessor;
int tag = 2001;
- vector<unsigned long int> MPIPos;
+ vector<unsigned long long> MPIPos;
MPI_Status status;
MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
- if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } delete chimera; return 0; }
if (pid == 0) { //you are the root process
//do your part
driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
- if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } delete chimera; return 0; }
}else{ //you are a child process
MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
//do your part
driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
- if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } delete chimera; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } delete chimera; return 0; }
}
//close files
MPI_File_close(&outMPIAccnos);
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
- vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
-
- for (int i = 0; i < (positions.size()-1); i++) {
- lines.push_back(new linePair(positions[i], positions[(i+1)]));
- }
-
+
//break up file
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ vector<unsigned long long> positions = m->divideFile(fastaFileNames[s], processors);
+
+ for (int i = 0; i < (positions.size()-1); i++) {
+ lines.push_back(new linePair(positions[i], positions[(i+1)]));
+ }
+
if(processors == 1){
numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
- if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
}else{
processIDS.resize(0);
//append output files
for(int i=1;i<processors;i++){
m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
- remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+ m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
}
//append output files
for(int i=1;i<processors;i++){
m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
- remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
+ m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp"));
}
if (m->control_pressed) {
- remove(outputFileName.c_str());
- remove(accnosFileName.c_str());
- for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } outputTypes.clear();
+ m->mothurRemove(outputFileName);
+ m->mothurRemove(accnosFileName);
+ for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear();
for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
delete chimera;
return 0;
}
#else
+ lines.push_back(new linePair(0, 1000));
numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
- if (m->control_pressed) { outputTypes.clear(); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; }
#endif
#endif
}
delete candidateSeq;
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- unsigned long int pos = inFASTA.tellg();
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ unsigned long long pos = inFASTA.tellg();
if ((pos == -1) || (pos >= filePos->end)) { break; }
#else
if (inFASTA.eof()) { break; }
#endif
//report progress
- if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
+ if((count) % 100 == 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n"); }
}
//report progress
- if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
+ if((count) % 100 != 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n"); }
out.close();
out2.close();
}
//**********************************************************************************************************************
#ifdef USE_MPI
-int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
+int ChimeraPintailCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long long>& MPIPos){
try {
MPI_Status status;
delete candidateSeq;
//report progress
- if((i+1) % 100 == 0){ cout << "Processing sequence: " << (i+1) << endl; m->mothurOutJustToLog("Processing sequence: " + toString(i+1) + "\n"); }
+ if((i+1) % 100 == 0){ cout << "Processing sequence: " << (i+1) << endl; }
}
//report progress
- if(num % 100 != 0){ cout << "Processing sequence: " << num << endl; m->mothurOutJustToLog("Processing sequence: " + toString(num) + "\n"); }
+ if(num % 100 != 0){ cout << "Processing sequence: " << num << endl; }
return 0;
int ChimeraPintailCommand::createProcesses(string outputFileName, string filename, string accnos) {
try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
int process = 0;
int num = 0;
string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
m->openInputFile(tempFile, in);
if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
- in.close(); remove(tempFile.c_str());
+ in.close(); m->mothurRemove(tempFile);
}
return num;