#include "chimeraccodecommand.h"
#include "ccode.h"
+#include "referencedb.h"
//**********************************************************************************************************************
vector<string> ChimeraCcodeCommand::setParameters(){
try {
- CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
- CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
- CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pfilter);
- CommandParameter pwindow("window", "Number", "", "0", "", "", "",false,false); parameters.push_back(pwindow);
- CommandParameter pnumwanted("numwanted", "Number", "", "20", "", "", "",false,false); parameters.push_back(pnumwanted);
- CommandParameter pmask("mask", "String", "", "", "", "", "",false,false); parameters.push_back(pmask);
- CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
- CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
- CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+ CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
+ CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","chimera-mapinfo-accnos",false,true,true); parameters.push_back(pfasta);
+ CommandParameter pfilter("filter", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pfilter);
+ CommandParameter pwindow("window", "Number", "", "0", "", "", "","",false,false); parameters.push_back(pwindow);
+ CommandParameter pnumwanted("numwanted", "Number", "", "20", "", "", "","",false,false); parameters.push_back(pnumwanted);
+ CommandParameter pmask("mask", "String", "", "", "", "", "","",false,false); parameters.push_back(pmask);
+ CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+ CommandParameter psave("save", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psave);
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
try {
string helpString = "";
helpString += "The chimera.ccode command reads a fastafile and referencefile and outputs potentially chimeric sequences.\n";
- helpString += "This command was created using the algorythms described in the 'Evaluating putative chimeric sequences from PCR-amplified products' paper by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez.\n";
+ helpString += "This command was created using the algorithms described in the 'Evaluating putative chimeric sequences from PCR-amplified products' paper by Juan M. Gonzalez, Johannes Zimmerman and Cesareo Saiz-Jimenez.\n";
helpString += "The chimera.ccode command parameters are fasta, reference, filter, mask, processors, window and numwanted.\n";
helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required unless you have a valid current fasta file. \n";
helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n";
helpString += "The window parameter allows you to specify the window size for searching for chimeras. \n";
helpString += "The numwanted parameter allows you to specify how many sequences you would each query sequence compared with.\n";
+ helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
helpString += "The chimera.ccode command should be in the following format: \n";
helpString += "chimera.ccode(fasta=yourFastaFile, reference=yourTemplate) \n";
helpString += "Example: chimera.ccode(fasta=AD.align, reference=core_set_aligned.imputed.fasta) \n";
}
}
//**********************************************************************************************************************
+string ChimeraCcodeCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
+
+ if (type == "chimera") { pattern = "[filename],[tag],ccode.chimeras-[filename],ccode.chimeras"; }
+ else if (type == "accnos") { pattern = "[filename],[tag],ccode.accnos-[filename],ccode.accnos"; }
+ else if (type == "mapinfo") { pattern = "[filename],mapinfo"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraCcodeCommand", "getOutputPattern");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
ChimeraCcodeCommand::ChimeraCcodeCommand(){
try {
abort = true; calledHelp = true;
outputTypes["chimera"] = tempOutNames;
outputTypes["mapinfo"] = tempOutNames;
outputTypes["accnos"] = tempOutNames;
+
}
catch(exception& e) {
m->errorOut(e, "ChimeraCcodeCommand", "ChimeraCcodeCommand");
ChimeraCcodeCommand::ChimeraCcodeCommand(string option) {
try {
abort = false; calledHelp = false;
+ ReferenceDB* rdb = ReferenceDB::getInstance();
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
outputTypes["chimera"] = tempOutNames;
outputTypes["mapinfo"] = tempOutNames;
outputTypes["accnos"] = tempOutNames;
+
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
//erase from file list
fastaFileNames.erase(fastaFileNames.begin()+i);
i--;
+ }else {
+ m->setFastaFile(fastaFileNames[i]);
}
}
}
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
-
- templatefile = validParameter.validFile(parameters, "reference", true);
- if (templatefile == "not open") { abort = true; }
- else if (templatefile == "not found") { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.ccode command."); m->mothurOutEndLine(); abort = true; }
maskfile = validParameter.validFile(parameters, "mask", false);
if (maskfile == "not found") { maskfile = ""; }
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
- convert(temp, processors);
+ m->mothurConvert(temp, processors);
temp = validParameter.validFile(parameters, "window", false); if (temp == "not found") { temp = "0"; }
- convert(temp, window);
+ m->mothurConvert(temp, window);
temp = validParameter.validFile(parameters, "numwanted", false); if (temp == "not found") { temp = "20"; }
- convert(temp, numwanted);
+ m->mothurConvert(temp, numwanted);
+
+ temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
+ save = m->isTrue(temp);
+ rdb->save = save;
+ if (save) { //clear out old references
+ rdb->clearMemory();
+ }
+
+ //this has to go after save so that if the user sets save=t and provides no reference we abort
+ templatefile = validParameter.validFile(parameters, "reference", true);
+ if (templatefile == "not found") {
+ //check for saved reference sequences
+ if (rdb->referenceSeqs.size() != 0) {
+ templatefile = "saved";
+ }else {
+ m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
+ m->mothurOutEndLine();
+ abort = true;
+ }
+ }else if (templatefile == "not open") { abort = true; }
+ else { if (save) { rdb->setSavedReference(templatefile); } }
+
}
}
if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it
string outputFileName, accnosFileName;
- if (maskfile != "") {
- outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + maskfile + ".ccode.chimeras";
- accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + maskfile + ".ccode.accnos";
- }else {
- outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "ccode.chimeras";
- accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "ccode.accnos";
- }
-
- string mapInfo = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "mapinfo";
-
- if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } outputTypes.clear(); return 0; }
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s]));
+ string mapInfo = getOutputFileName("mapinfo", variables);
+ if (maskfile != "") { variables["[tag]"] = maskfile; }
+ outputFileName = getOutputFileName("chimera", variables);
+ accnosFileName = getOutputFileName("accnos", variables);
+
+ if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear(); return 0; }
#ifdef USE_MPI
int pid, numSeqsPerProcessor;
int tag = 2001;
- vector<unsigned long int> MPIPos;
+ vector<unsigned long long> MPIPos;
MPI_Status status;
MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
MPI_File_open(MPI_COMM_WORLD, outAccnosFilename, outMode, MPI_INFO_NULL, &outMPIAccnos);
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } outputTypes.clear(); delete chimera; return 0; }
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear(); delete chimera; return 0; }
if (pid == 0) { //you are the root process
string outTemp = "For full window mapping info refer to " + mapInfo + "\n";
//align your part
driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); remove(outputFileName.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } outputTypes.clear(); delete chimera; return 0; }
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear(); delete chimera; return 0; }
}else{ //you are a child process
MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
//align your part
driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, outMPIAccnos, MPIPos);
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } outputTypes.clear(); delete chimera; return 0; }
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); MPI_File_close(&outMPIAccnos); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear(); delete chimera; return 0; }
}
//close files
outHeader.close();
- vector<unsigned long int> positions = m->divideFile(fastaFileNames[s], processors);
-
- for (int i = 0; i < (positions.size()-1); i++) {
- lines.push_back(new linePair(positions[i], positions[(i+1)]));
- }
+
//break up file
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ vector<unsigned long long> positions = m->divideFile(fastaFileNames[s], processors);
+
+ for (int i = 0; i < (positions.size()-1); i++) {
+ lines.push_back(new linePair(positions[i], positions[(i+1)]));
+ }
+
if(processors == 1){
numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
- if (m->control_pressed) { remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } outputTypes.clear(); lines.clear(); delete chimera; return 0; }
+ if (m->control_pressed) { m->mothurRemove(outputFileName); m->mothurRemove(tempHeader); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } outputTypes.clear(); lines.clear(); delete chimera; return 0; }
}else{
processIDS.resize(0);
//append output files
for(int i=1;i<processors;i++){
m->appendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName);
- remove((outputFileName + toString(processIDS[i]) + ".temp").c_str());
+ m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp"));
}
//append output files
for(int i=1;i<processors;i++){
m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName);
- remove((accnosFileName + toString(processIDS[i]) + ".temp").c_str());
+ m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp"));
}
if (m->control_pressed) {
- remove(outputFileName.c_str());
- remove(accnosFileName.c_str());
- for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } outputTypes.clear();
+ m->mothurRemove(outputFileName);
+ m->mothurRemove(accnosFileName);
+ for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear();
for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear();
delete chimera;
return 0;
}
#else
+ lines.push_back(new linePair(0, 1000));
numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName);
- if (m->control_pressed) { remove(outputFileName.c_str()); remove(tempHeader.c_str()); remove(accnosFileName.c_str()); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } outputTypes.clear(); lines.clear(); delete chimera; return 0; }
+ if (m->control_pressed) { m->mothurRemove(outputFileName); m->mothurRemove(tempHeader); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } outputTypes.clear(); lines.clear(); delete chimera; return 0; }
#endif
m->appendFiles(outputFileName, tempHeader);
- remove(outputFileName.c_str());
+ m->mothurRemove(outputFileName);
rename(tempHeader.c_str(), outputFileName.c_str());
#endif
}
delete candidateSeq;
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- unsigned long int pos = inFASTA.tellg();
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ unsigned long long pos = inFASTA.tellg();
if ((pos == -1) || (pos >= filePos->end)) { break; }
#else
if (inFASTA.eof()) { break; }
#endif
//report progress
- if((count) % 100 == 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
+ if((count) % 100 == 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n"); }
}
//report progress
- if((count) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(count)); m->mothurOutEndLine(); }
+ if((count) % 100 != 0){ m->mothurOutJustToScreen("Processing sequence: " + toString(count) + "\n"); }
out.close();
out2.close();
}
//**********************************************************************************************************************
#ifdef USE_MPI
-int ChimeraCcodeCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long int>& MPIPos){
+int ChimeraCcodeCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, MPI_File& outAccMPI, vector<unsigned long long>& MPIPos){
try {
MPI_Status status;
int ChimeraCcodeCommand::createProcesses(string outputFileName, string filename, string accnos) {
try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
int process = 0;
int num = 0;
string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
m->openInputFile(tempFile, in);
if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
- in.close(); remove(tempFile.c_str());
+ in.close(); m->mothurRemove(tempFile);
}
return num;