*/
#include "chimeracheckcommand.h"
+#include "referencedb.h"
//**********************************************************************************************************************
vector<string> ChimeraCheckCommand::setParameters(){
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
-
+ CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
+
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
return myArray;
helpString += "The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence, default is False.\n";
helpString += "The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n";
helpString += "You may enter multiple name files by separating their names with dashes. ie. fasta=abrecovery.svg.names-amzon.svg.names \n";
+ helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
helpString += "The chimera.check command should be in the following format: \n";
helpString += "chimera.check(fasta=yourFastaFile, reference=yourTemplateFile, processors=yourProcessors, ksize=yourKmerSize) \n";
helpString += "Example: chimera.check(fasta=AD.fasta, reference=core_set_aligned,imputed.fasta, processors=4, ksize=8) \n";
}
}
//**********************************************************************************************************************
+string ChimeraCheckCommand::getOutputFileNameTag(string type, string inputName=""){
+ try {
+ string outputFileName = "";
+ map<string, vector<string> >::iterator it;
+
+ //is this a type this command creates
+ it = outputTypes.find(type);
+ if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
+ else {
+ if (type == "chimera") { outputFileName = "chimeracheck.chimeras"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
+ }
+ return outputFileName;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ChimeraCcodeCommand", "getOutputFileNameTag");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
ChimeraCheckCommand::ChimeraCheckCommand(){
try {
abort = true; calledHelp = true;
//***************************************************************************************************************
ChimeraCheckCommand::ChimeraCheckCommand(string option) {
try {
- abort = false; calledHelp = false;
+ abort = false; calledHelp = false;
+ ReferenceDB* rdb = ReferenceDB::getInstance();
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
else {
vector<string> myArray = setParameters();
//erase from file list
fastaFileNames.erase(fastaFileNames.begin()+i);
i--;
+ }else {
+ m->setFastaFile(fastaFileNames[i]);
}
}
}
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
-
- templatefile = validParameter.validFile(parameters, "reference", true);
- if (templatefile == "not open") { abort = true; }
- else if (templatefile == "not found") { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.check command."); m->mothurOutEndLine(); abort = true; }
namefile = validParameter.validFile(parameters, "name", false);
if (namefile == "not found") { namefile = ""; }
//erase from file list
nameFileNames.erase(nameFileNames.begin()+i);
i--;
+ }else {
+ m->setNameFile(nameFileNames[i]);
}
}
}
string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
- convert(temp, processors);
+ m->mothurConvert(temp, processors);
+
+ temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
+ save = m->isTrue(temp);
+ rdb->save = save;
+ if (save) { //clear out old references
+ rdb->clearMemory();
+ }
+
+ //this has to go after save so that if the user sets save=t and provides no reference we abort
+ templatefile = validParameter.validFile(parameters, "reference", true);
+ if (templatefile == "not found") {
+ //check for saved reference sequences
+ if (rdb->referenceSeqs.size() != 0) {
+ templatefile = "saved";
+ }else {
+ m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
+ m->mothurOutEndLine();
+ abort = true;
+ }
+ }else if (templatefile == "not open") { abort = true; }
+ else { if (save) { rdb->setSavedReference(templatefile); } }
+
temp = validParameter.validFile(parameters, "ksize", false); if (temp == "not found") { temp = "7"; }
- convert(temp, ksize);
+ m->mothurConvert(temp, ksize);
temp = validParameter.validFile(parameters, "svg", false); if (temp == "not found") { temp = "F"; }
svg = m->isTrue(temp);
if (nameFileNames.size() != 0) { svg = true; }
temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "10"; }
- convert(temp, increment);
+ m->mothurConvert(temp, increment);
}
}
catch(exception& e) {
if (m->control_pressed) { delete chimera; return 0; }
if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[i]); }//if user entered a file with a path then preserve it
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + "chimeracheck.chimeras";
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[i])) + getOutputFileNameTag("chimera");
outputNames.push_back(outputFileName); outputTypes["chimera"].push_back(outputFileName);
#ifdef USE_MPI
int pid, numSeqsPerProcessor;
int tag = 2001;
- vector<unsigned long int> MPIPos;
+ vector<unsigned long long> MPIPos;
MPI_Status status;
MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
MPI_File_open(MPI_COMM_WORLD, outFilename, outMode, MPI_INFO_NULL, &outMPI);
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } outputTypes.clear(); delete chimera; return 0; }
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear(); delete chimera; return 0; }
if (pid == 0) { //you are the root process
MPIPos = m->setFilePosFasta(fastaFileNames[i], numSeqs); //fills MPIPos, returns numSeqs
//align your part
driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } outputTypes.clear(); delete chimera; return 0; }
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear(); delete chimera; return 0; }
//wait on chidren
for(int j = 1; j < processors; j++) {
//align your part
driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPI, MPIPos);
- if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } outputTypes.clear(); delete chimera; return 0; }
+ if (m->control_pressed) { MPI_File_close(&inMPI); MPI_File_close(&outMPI); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear(); delete chimera; return 0; }
//tell parent you are done.
char buf[5];
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
- vector<unsigned long int> positions = m->divideFile(fastaFileNames[i], processors);
-
- for (int s = 0; s < (positions.size()-1); s++) {
- lines.push_back(new linePair(positions[s], positions[(s+1)]));
- }
+
//break up file
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ vector<unsigned long long> positions = m->divideFile(fastaFileNames[i], processors);
+
+ for (int s = 0; s < (positions.size()-1); s++) {
+ lines.push_back(new linePair(positions[s], positions[(s+1)]));
+ }
+
if(processors == 1){
numSeqs = driver(lines[0], outputFileName, fastaFileNames[i]);
- if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } for (int j = 0; j < lines.size(); j++) { delete lines[j]; } outputTypes.clear(); lines.clear(); delete chimera; return 0; }
+ if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int j = 0; j < lines.size(); j++) { delete lines[j]; } outputTypes.clear(); lines.clear(); delete chimera; return 0; }
}else{
processIDS.resize(0);
//append output files
for(int j=1;j<processors;j++){
m->appendFiles((outputFileName + toString(processIDS[j]) + ".temp"), outputFileName);
- remove((outputFileName + toString(processIDS[j]) + ".temp").c_str());
+ m->mothurRemove((outputFileName + toString(processIDS[j]) + ".temp"));
}
if (m->control_pressed) {
- for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } outputTypes.clear();
+ for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear();
for (int j = 0; j < lines.size(); j++) { delete lines[j]; } lines.clear();
delete chimera;
return 0;
}
#else
+ lines.push_back(new linePair(0, 1000));
numSeqs = driver(lines[0], outputFileName, fastaFileNames[i]);
- if (m->control_pressed) { for (int j = 0; j < lines.size(); j++) { delete lines[j]; } lines.clear(); for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } outputTypes.clear(); delete chimera; return 0; }
+ if (m->control_pressed) { for (int j = 0; j < lines.size(); j++) { delete lines[j]; } lines.clear(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } outputTypes.clear(); delete chimera; return 0; }
#endif
#endif
delete chimera;
}
delete candidateSeq;
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- unsigned long int pos = inFASTA.tellg();
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+ unsigned long long pos = inFASTA.tellg();
if ((pos == -1) || (pos >= filePos->end)) { break; }
#else
if (inFASTA.eof()) { break; }
}
//**********************************************************************************************************************
#ifdef USE_MPI
-int ChimeraCheckCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, vector<unsigned long int>& MPIPos){
+int ChimeraCheckCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& outMPI, vector<unsigned long long>& MPIPos){
try {
MPI_File outAccMPI;
MPI_Status status;
int ChimeraCheckCommand::createProcesses(string outputFileName, string filename) {
try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
int process = 0;
int num = 0;
string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp";
m->openInputFile(tempFile, in);
if (!in.eof()) { int tempNum = 0; in >> tempNum; num += tempNum; }
- in.close(); remove(tempFile.c_str());
+ in.close(); m->mothurRemove(tempFile);
}
return num;