A713EBED12DC7C5E000092AC /* nmdscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A713EBEC12DC7C5E000092AC /* nmdscommand.cpp */; };
A71CB160130B04A2001E7287 /* anosimcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71CB15E130B04A2001E7287 /* anosimcommand.cpp */; };
A71FE12C12EDF72400963CA7 /* mergegroupscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */; };
+ A721765713BB9F7D0014DAAE /* referencedb.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A721765613BB9F7D0014DAAE /* referencedb.cpp */; };
A727864412E9E28C00F86ABA /* removerarecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A727864312E9E28C00F86ABA /* removerarecommand.cpp */; };
+ A73DDBBA13C4A0D1006AAE38 /* clearmemorycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A73DDBB913C4A0D1006AAE38 /* clearmemorycommand.cpp */; };
A74D3687137DAB8300332B0C /* addtargets2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3655137DAB8300332B0C /* addtargets2.cpp */; };
A74D3688137DAB8400332B0C /* alignchime.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3656137DAB8300332B0C /* alignchime.cpp */; };
A74D3689137DAB8400332B0C /* alignchimel.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3657137DAB8300332B0C /* alignchimel.cpp */; };
A71CB15F130B04A2001E7287 /* anosimcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = anosimcommand.h; sourceTree = "<group>"; };
A71FE12A12EDF72400963CA7 /* mergegroupscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = mergegroupscommand.h; sourceTree = "<group>"; };
A71FE12B12EDF72400963CA7 /* mergegroupscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mergegroupscommand.cpp; sourceTree = "<group>"; };
+ A721765513BB9F7D0014DAAE /* referencedb.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = referencedb.h; sourceTree = "<group>"; };
+ A721765613BB9F7D0014DAAE /* referencedb.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = referencedb.cpp; sourceTree = "<group>"; };
A727864212E9E28C00F86ABA /* removerarecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = removerarecommand.h; sourceTree = "<group>"; };
A727864312E9E28C00F86ABA /* removerarecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = removerarecommand.cpp; sourceTree = "<group>"; };
+ A73DDBB813C4A0D1006AAE38 /* clearmemorycommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clearmemorycommand.h; sourceTree = "<group>"; };
+ A73DDBB913C4A0D1006AAE38 /* clearmemorycommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clearmemorycommand.cpp; sourceTree = "<group>"; };
A74D3655137DAB8300332B0C /* addtargets2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = addtargets2.cpp; sourceTree = "<group>"; };
A74D3656137DAB8300332B0C /* alignchime.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignchime.cpp; sourceTree = "<group>"; };
A74D3657137DAB8300332B0C /* alignchimel.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignchimel.cpp; sourceTree = "<group>"; };
A7E9B69212D37EC400DA6239 /* classifyseqscommand.cpp */,
A7E9B69712D37EC400DA6239 /* clearcutcommand.h */,
A7E9B69612D37EC400DA6239 /* clearcutcommand.cpp */,
+ A73DDBB813C4A0D1006AAE38 /* clearmemorycommand.h */,
+ A73DDBB913C4A0D1006AAE38 /* clearmemorycommand.cpp */,
A7E9B69D12D37EC400DA6239 /* clustercommand.h */,
A7E9B69C12D37EC400DA6239 /* clustercommand.cpp */,
A7E9B69F12D37EC400DA6239 /* clusterdoturcommand.h */,
A7E9B7A012D37EC400DA6239 /* qualityscores.h */,
A7E9B7A312D37EC400DA6239 /* rabundvector.cpp */,
A7E9B7A412D37EC400DA6239 /* rabundvector.hpp */,
+ A721765513BB9F7D0014DAAE /* referencedb.h */,
+ A721765613BB9F7D0014DAAE /* referencedb.cpp */,
A7E9B7CB12D37EC400DA6239 /* reportfile.cpp */,
A7E9B7CC12D37EC400DA6239 /* reportfile.h */,
A7E9B7CF12D37EC400DA6239 /* sabundvector.cpp */,
A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */,
A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */,
A7730EFF13967241007433A3 /* countseqscommand.cpp in Sources */,
+ A721765713BB9F7D0014DAAE /* referencedb.cpp in Sources */,
+ A73DDBBA13C4A0D1006AAE38 /* clearmemorycommand.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"MOTHUR_FILES=\"\\\"../release\\\"\"",
- "VERSION=\"\\\"1.20.0\\\"\"",
- "RELEASE_DATE=\"\\\"6/20/2011\\\"\"",
+ "VERSION=\"\\\"1.20.2\\\"\"",
+ "RELEASE_DATE=\"\\\"7/05/2011\\\"\"",
);
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
#include "nast.hpp"
#include "nastreport.hpp"
+#include "referencedb.h"
//**********************************************************************************************************************
vector<string> AlignCommand::setParameters(){
CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
+ CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
CommandParameter pthreshold("threshold", "Number", "", "0.50", "", "", "",false,false); parameters.push_back(pthreshold);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
helpString += "The flip parameter is used to specify whether or not you want mothur to try the reverse complement if a sequence falls below the threshold. The default is false.";
helpString += "The threshold is used to specify a cutoff at which an alignment is deemed 'bad' and the reverse complement may be tried. The default threshold is 0.50, meaning 50% of the bases are removed in the alignment.";
helpString += "If the flip parameter is set to true the reverse complement of the sequence is aligned and the better alignment is reported.";
+ helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
helpString += "The default for the threshold parameter is 0.50, meaning at least 50% of the bases must remain or the sequence is reported as potentially reversed.";
helpString += "The align.seqs command should be in the following format:";
helpString += "align.seqs(reference=yourTemplateFile, fasta=yourCandidateFile, align=yourAlignmentMethod, search=yourSearchmethod, ksize=yourKmerSize, match=yourMatchBonus, mismatch=yourMismatchpenalty, gapopen=yourGapopenPenalty, gapextend=yourGapExtendPenalty)";
//**********************************************************************************************************************
AlignCommand::AlignCommand(string option) {
try {
- abort = false; calledHelp = false;
-
+ abort = false; calledHelp = false;
+ ReferenceDB* rdb = ReferenceDB::getInstance();
+
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true;}
else if(option == "citation") { citation(); abort = true; calledHelp = true;}
}
}
- //check for required parameters
- templateFileName = validParameter.validFile(parameters, "reference", true);
-
- if (templateFileName == "not found") {
- m->mothurOut("reference is a required parameter for the align.seqs command.");
- m->mothurOutEndLine();
- abort = true;
- }else if (templateFileName == "not open") { abort = true; }
-
candidateFileName = validParameter.validFile(parameters, "fasta", false);
if (candidateFileName == "not found") {
//if there is a current fasta file, use it
temp = validParameter.validFile(parameters, "flip", false); if (temp == "not found"){ temp = "f"; }
flip = m->isTrue(temp);
+ temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
+ save = m->isTrue(temp);
+ rdb->save = save;
+ if (save) { //clear out old references
+ rdb->clearMemory();
+ }
+
+ //this has to go after save so that if the user sets save=t and provides no reference we abort
+ templateFileName = validParameter.validFile(parameters, "reference", true);
+ if (templateFileName == "not found") {
+ //check for saved reference sequences
+ if (rdb->referenceSeqs.size() != 0) {
+ templateFileName = "saved";
+ }else {
+ m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required for the align.seqs command.");
+ m->mothurOutEndLine();
+ abort = true;
+ }
+ }else if (templateFileName == "not open") { abort = true; }
+ else { if (save) { rdb->setSavedReference(templateFileName); } }
+
temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found"){ temp = "0.50"; }
convert(temp, threshold);
vector<string> candidateFileNames;
vector<string> outputNames;
- bool abort, flip, calledHelp;
+ bool abort, flip, calledHelp, save;
};
#include "kmerdb.hpp"
#include "suffixdb.hpp"
#include "blastdb.hpp"
+#include "referencedb.h"
/**************************************************************************************************/
longest = 0;
method = s;
bool needToGenerate = true;
+ ReferenceDB* rdb = ReferenceDB::getInstance();
- m->mothurOutEndLine();
- m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t"); cout.flush();
+ if (fastaFileName == "saved") {
+ int start = time(NULL);
+ m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory."); m->mothurOutEndLine();
+
+ for (int i = 0; i < rdb->referenceSeqs.size(); i++) {
+ templateSequences.push_back(rdb->referenceSeqs[i]);
+ //save longest base
+ if (rdb->referenceSeqs[i].getUnaligned().length() >= longest) { longest = (rdb->referenceSeqs[i].getUnaligned().length()+1); }
+ }
+ fastaFileName = rdb->getSavedReference();
+
+ numSeqs = templateSequences.size();
+ m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences.");m->mothurOutEndLine();
+
+ }else {
+ int start = time(NULL);
+ m->mothurOutEndLine();
+ m->mothurOut("Reading in the " + fastaFileName + " template sequences...\t"); cout.flush();
+
+ #ifdef USE_MPI
+ int pid, processors;
+ vector<unsigned long int> positions;
+
+ MPI_Status status;
+ MPI_File inMPI;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
+ int tag = 2001;
- #ifdef USE_MPI
- int pid, processors;
- vector<unsigned long int> positions;
+ char inFileName[1024];
+ strcpy(inFileName, fastaFileName.c_str());
- MPI_Status status;
- MPI_File inMPI;
- MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
- MPI_Comm_size(MPI_COMM_WORLD, &processors);
- int tag = 2001;
-
- char inFileName[1024];
- strcpy(inFileName, fastaFileName.c_str());
-
- MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
+ MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
+
+ if (pid == 0) {
+ positions = m->setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs
+
+ //send file positions to all processes
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
+ }else{
+ MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+ positions.resize(numSeqs+1);
+ MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+ }
+
+ //read file
+ for(int i=0;i<numSeqs;i++){
+
+ if (m->control_pressed) { templateSequences.clear(); break; }
+
+ //read next sequence
+ int length = positions[i+1] - positions[i];
+ char* buf4 = new char[length];
+
+ MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
- if (pid == 0) {
- positions = m->setFilePosFasta(fastaFileName, numSeqs); //fills MPIPos, returns numSeqs
+ string tempBuf = buf4;
+ if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
+ delete buf4;
- //send file positions to all processes
- for(int i = 1; i < processors; i++) {
- MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
- MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ istringstream iss (tempBuf,istringstream::in);
+
+ Sequence temp(iss);
+ if (temp.getName() != "") {
+ templateSequences.push_back(temp);
+
+ if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
+
+ //save longest base
+ if (temp.getUnaligned().length() >= longest) { longest = temp.getUnaligned().length()+1; }
+ }
}
- }else{
- MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
- positions.resize(numSeqs+1);
- MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
- }
-
- //read file
- for(int i=0;i<numSeqs;i++){
- if (m->control_pressed) { templateSequences.clear(); break; }
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
- //read next sequence
- int length = positions[i+1] - positions[i];
- char* buf4 = new char[length];
+ MPI_File_close(&inMPI);
- MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
-
- string tempBuf = buf4;
- if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
- delete buf4;
+ #else
+ ifstream fastaFile;
+ m->openInputFile(fastaFileName, fastaFile);
- istringstream iss (tempBuf,istringstream::in);
-
- Sequence temp(iss);
+ while (!fastaFile.eof()) {
+ Sequence temp(fastaFile); m->gobble(fastaFile);
+
+ if (m->control_pressed) { templateSequences.clear(); break; }
+
if (temp.getName() != "") {
templateSequences.push_back(temp);
+
+ if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
+
//save longest base
- if (temp.getUnaligned().length() >= longest) { longest = temp.getUnaligned().length()+1; }
+ if (temp.getUnaligned().length() >= longest) { longest = (temp.getUnaligned().length()+1); }
}
}
-
- MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
-
- MPI_File_close(&inMPI);
+ fastaFile.close();
+ #endif
- #else
- ifstream fastaFile;
- m->openInputFile(fastaFileName, fastaFile);
-
- while (!fastaFile.eof()) {
- Sequence temp(fastaFile); m->gobble(fastaFile);
-
- if (m->control_pressed) { templateSequences.clear(); break; }
+ numSeqs = templateSequences.size();
+ //all of this is elsewhere already!
- if (temp.getName() != "") {
- templateSequences.push_back(temp);
- //save longest base
- if (temp.getUnaligned().length() >= longest) { longest = (temp.getUnaligned().length()+1); }
- }
+ m->mothurOut("DONE.");
+ m->mothurOutEndLine(); cout.flush();
+ m->mothurOut("It took " + toString(time(NULL) - start) + " to read " + toString(rdb->referenceSeqs.size()) + " sequences."); m->mothurOutEndLine();
+
}
- fastaFile.close();
- #endif
-
- numSeqs = templateSequences.size();
- //all of this is elsewhere already!
- m->mothurOut("DONE.");
- m->mothurOutEndLine(); cout.flush();
//in case you delete the seqs and then ask for them
emptySequence = Sequence();
search->setNumSeqs(numSeqs);
}
+
}
catch(exception& e) {
m->errorOut(e, "AlignmentDB", "AlignmentDB");
try{
vector<int> spot = search->findClosestSequences(seq, 1);
-
+
if (spot.size() != 0) { return templateSequences[spot[0]]; }
else { return emptySequence; }
#include "bayesian.h"
#include "kmer.hpp"
#include "phylosummary.h"
-
+#include "referencedb.h"
/**************************************************************************************************/
Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, int i) :
Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) {
try {
+ ReferenceDB* rdb = ReferenceDB::getInstance();
+
+ string baseName = tempFile;
+ if (baseName == "saved") { baseName = rdb->getSavedReference(); }
+
+ string baseTName = tfile;
+ if (baseTName == "saved") { baseTName = rdb->getSavedTaxonomy(); }
/************calculate the probablity that each word will be in a specific taxonomy*************/
- string tfileroot = tfile.substr(0,tfile.find_last_of(".")+1);
- string tempfileroot = m->getRootName(m->getSimpleName(tempFile));
+ string tfileroot = baseTName.substr(0,baseTName.find_last_of(".")+1);
+ string tempfileroot = m->getRootName(m->getSimpleName(baseName));
string phyloTreeName = tfileroot + "tree.train";
string phyloTreeSumName = tfileroot + "tree.sum";
string probFileName = tfileroot + tempfileroot + char('0'+ kmerSize) + "mer.prob";
FilesGood = checkReleaseDate(probFileTest, probFileTest2, phyloTreeTest, probFileTest3);
}
+ //if you want to save, but you dont need to calculate then just read
+ if (rdb->save && probFileTest && probFileTest2 && phyloTreeTest && probFileTest3 && FilesGood) {
+ ifstream saveIn;
+ m->openInputFile(tempFile, saveIn);
+
+ while (!saveIn.eof()) {
+ Sequence temp(saveIn);
+ m->gobble(saveIn);
+
+ rdb->referenceSeqs.push_back(temp);
+ }
+ saveIn.close();
+ }
+
if(probFileTest && probFileTest2 && phyloTreeTest && probFileTest3 && FilesGood){
+ if (tempFile == "saved") { m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory."); m->mothurOutEndLine(); }
+
m->mothurOut("Reading template taxonomy... "); cout.flush();
phyloTree = new PhyloTree(phyloTreeTest, phyloTreeName);
genusNodes = phyloTree->getGenusNodes();
genusTotals = phyloTree->getGenusTotals();
-
- m->mothurOut("Reading template probabilities... "); cout.flush();
- readProbFile(probFileTest, probFileTest2, probFileName, probFileName2);
+ if (tfile == "saved") {
+ m->mothurOutEndLine(); m->mothurOut("Using probabilties from " + rdb->getSavedTaxonomy() + " that are saved in memory... "); cout.flush();;
+ wordGenusProb = rdb->wordGenusProb;
+ }else {
+ m->mothurOut("Reading template probabilities... "); cout.flush();
+ readProbFile(probFileTest, probFileTest2, probFileName, probFileName2);
+ }
+
+ //save probabilities
+ if (rdb->save) { rdb->wordGenusProb = wordGenusProb; }
}else{
//create search database and names vector
delete phyloTree;
phyloTree = new PhyloTree(phyloTreeTest, phyloTreeName);
+
+ //save probabilities
+ if (rdb->save) { rdb->wordGenusProb = wordGenusProb; }
}
}
#include "ignoregaps.h"
#include "eachgapdist.h"
-
//***************************************************************************************************************
Ccode::Ccode(string filename, string temp, bool f, string mask, int win, int numW, string o) : Chimera() {
+ try {
+
fastafile = filename;
outputDir = o;
- templateFileName = temp; templateSeqs = readSeqs(temp);
+ templateFileName = temp; templateSeqs = readSeqs(temp);
setMask(mask);
filter = f;
window = win;
out2 << "Place in masked, filtered and trimmed sequence\tPlace in original alignment" << endl;
out2.close();
#endif
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Ccode", "Ccode");
+ exit(1);
+ }
}
//***************************************************************************************************************
Ccode::~Ccode() {
*/
#include "chimera.h"
+#include "referencedb.h"
//***************************************************************************************************************
//this is a vertical soft filter
//***************************************************************************************************************
vector<Sequence*> Chimera::readSeqs(string file) {
try {
-
+
vector<Sequence*> container;
int count = 0;
length = 0;
unaligned = false;
-
- m->mothurOut("Reading sequences from " + file + "..."); cout.flush();
+ ReferenceDB* rdb = ReferenceDB::getInstance();
- #ifdef USE_MPI
- int pid, processors;
- vector<unsigned long int> positions;
- int numSeqs;
- int tag = 2001;
-
- MPI_Status status;
- MPI_File inMPI;
- MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
- MPI_Comm_size(MPI_COMM_WORLD, &processors);
-
- //char* inFileName = new char[file.length()];
- //memcpy(inFileName, file.c_str(), file.length());
+ if (file == "saved") {
- char inFileName[1024];
- strcpy(inFileName, file.c_str());
-
- MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
- //delete inFileName;
-
- if (pid == 0) {
- positions = m->setFilePosFasta(file, numSeqs); //fills MPIPos, returns numSeqs
-
- //send file positions to all processes
- for(int i = 1; i < processors; i++) {
- MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
- MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
- }
- }else{
- MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
- positions.resize(numSeqs+1);
- MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+
+ m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory."); m->mothurOutEndLine();
+
+ for (int i = 0; i < rdb->referenceSeqs.size(); i++) {
+ Sequence* temp = new Sequence(rdb->referenceSeqs[i].getName(), rdb->referenceSeqs[i].getAligned());
+
+ if (count == 0) { length = temp->getAligned().length(); count++; } //gets first seqs length
+ else if (length != temp->getAligned().length()) { unaligned = true; }
+
+ if (temp->getName() != "") { container.push_back(temp); }
}
- //read file
- for(int i=0;i<numSeqs;i++){
+ templateFileName = rdb->getSavedReference();
- if (m->control_pressed) { MPI_File_close(&inMPI); return container; }
-
- //read next sequence
- int seqlength = positions[i+1] - positions[i];
- char* buf4 = new char[seqlength];
+ }else {
+
+ m->mothurOut("Reading sequences from " + file + "..."); cout.flush();
+
+ #ifdef USE_MPI
+ int pid, processors;
+ vector<unsigned long int> positions;
+ int numSeqs;
+ int tag = 2001;
+
+ MPI_Status status;
+ MPI_File inMPI;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
- MPI_File_read_at(inMPI, positions[i], buf4, seqlength, MPI_CHAR, &status);
+ //char* inFileName = new char[file.length()];
+ //memcpy(inFileName, file.c_str(), file.length());
- string tempBuf = buf4;
- if (tempBuf.length() > seqlength) { tempBuf = tempBuf.substr(0, seqlength); }
- delete buf4;
+ char inFileName[1024];
+ strcpy(inFileName, file.c_str());
+
+ MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
+ //delete inFileName;
+
+ if (pid == 0) {
+ positions = m->setFilePosFasta(file, numSeqs); //fills MPIPos, returns numSeqs
- istringstream iss (tempBuf,istringstream::in);
+ //send file positions to all processes
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
+ }else{
+ MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+ positions.resize(numSeqs+1);
+ MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
+ }
+
+ //read file
+ for(int i=0;i<numSeqs;i++){
+
+ if (m->control_pressed) { MPI_File_close(&inMPI); return container; }
- Sequence* current = new Sequence(iss);
- if (current->getName() != "") {
- if (count == 0) { length = current->getAligned().length(); count++; } //gets first seqs length
- else if (length != current->getAligned().length()) { unaligned = true; }
+ //read next sequence
+ int seqlength = positions[i+1] - positions[i];
+ char* buf4 = new char[seqlength];
+
+ MPI_File_read_at(inMPI, positions[i], buf4, seqlength, MPI_CHAR, &status);
+
+ string tempBuf = buf4;
+ if (tempBuf.length() > seqlength) { tempBuf = tempBuf.substr(0, seqlength); }
+ delete buf4;
+
+ istringstream iss (tempBuf,istringstream::in);
+ Sequence* current = new Sequence(iss);
+ if (current->getName() != "") {
+ if (count == 0) { length = current->getAligned().length(); count++; } //gets first seqs length
+ else if (length != current->getAligned().length()) { unaligned = true; }
+
+ container.push_back(current);
+ if (rdb->save) { rdb->referenceSeqs.push_back(*current); }
+ }
+ }
+
+ MPI_File_close(&inMPI);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+ #else
+
+ ifstream in;
+ m->openInputFile(file, in);
+
+ //read in seqs and store in vector
+ while(!in.eof()){
+
+ if (m->control_pressed) { return container; }
+
+ Sequence* current = new Sequence(in); m->gobble(in);
+
+ if (count == 0) { length = current->getAligned().length(); count++; } //gets first seqs length
+ else if (length != current->getAligned().length()) { unaligned = true; }
+
+ if (current->getName() != "") {
container.push_back(current);
+ if (rdb->save) { rdb->referenceSeqs.push_back(*current); }
}
}
-
- MPI_File_close(&inMPI);
- MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
- #else
-
- ifstream in;
- m->openInputFile(file, in);
+ in.close();
+ #endif
- //read in seqs and store in vector
- while(!in.eof()){
+ m->mothurOut("Done."); m->mothurOutEndLine();
- if (m->control_pressed) { return container; }
-
- Sequence* current = new Sequence(in); m->gobble(in);
-
- if (count == 0) { length = current->getAligned().length(); count++; } //gets first seqs length
- else if (length != current->getAligned().length()) { unaligned = true; }
-
- if (current->getName() != "") { container.push_back(current); }
+ filterString = (string(container[0]->getAligned().length(), '1'));
}
- in.close();
- #endif
-
- m->mothurOut("Done."); m->mothurOutEndLine();
-
- filterString = (string(container[0]->getAligned().length(), '1'));
return container;
}
#include "chimeraccodecommand.h"
#include "ccode.h"
+#include "referencedb.h"
//**********************************************************************************************************************
vector<string> ChimeraCcodeCommand::setParameters(){
try {
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
+ CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
helpString += "The mask parameter allows you to specify a file containing one sequence you wish to use as a mask for the your sequences. \n";
helpString += "The window parameter allows you to specify the window size for searching for chimeras. \n";
helpString += "The numwanted parameter allows you to specify how many sequences you would each query sequence compared with.\n";
+ helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
helpString += "The chimera.ccode command should be in the following format: \n";
helpString += "chimera.ccode(fasta=yourFastaFile, reference=yourTemplate) \n";
helpString += "Example: chimera.ccode(fasta=AD.align, reference=core_set_aligned.imputed.fasta) \n";
ChimeraCcodeCommand::ChimeraCcodeCommand(string option) {
try {
abort = false; calledHelp = false;
+ ReferenceDB* rdb = ReferenceDB::getInstance();
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
-
- templatefile = validParameter.validFile(parameters, "reference", true);
- if (templatefile == "not open") { abort = true; }
- else if (templatefile == "not found") { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.ccode command."); m->mothurOutEndLine(); abort = true; }
maskfile = validParameter.validFile(parameters, "mask", false);
if (maskfile == "not found") { maskfile = ""; }
temp = validParameter.validFile(parameters, "numwanted", false); if (temp == "not found") { temp = "20"; }
convert(temp, numwanted);
+
+ temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
+ save = m->isTrue(temp);
+ rdb->save = save;
+ if (save) { //clear out old references
+ rdb->clearMemory();
+ }
+
+ //this has to go after save so that if the user sets save=t and provides no reference we abort
+ templatefile = validParameter.validFile(parameters, "reference", true);
+ if (templatefile == "not found") {
+ //check for saved reference sequences
+ if (rdb->referenceSeqs.size() != 0) {
+ templatefile = "saved";
+ }else {
+ m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
+ m->mothurOutEndLine();
+ abort = true;
+ }
+ }else if (templatefile == "not open") { abort = true; }
+ else { if (save) { rdb->setSavedReference(templatefile); } }
+
}
}
int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long int>&);
#endif
- bool abort, filter;
+ bool abort, filter, save;
string fastafile, templatefile, outputDir, maskfile;
int processors, window, numwanted, numSeqs, templateSeqsLength;
Chimera* chimera;
*/
#include "chimeracheckcommand.h"
+#include "referencedb.h"
//**********************************************************************************************************************
vector<string> ChimeraCheckCommand::setParameters(){
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
-
+ CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
+
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
return myArray;
helpString += "The svg parameter allows you to specify whether or not you would like a svg file outputted for each query sequence, default is False.\n";
helpString += "The name parameter allows you to enter a file containing names of sequences you would like .svg files for.\n";
helpString += "You may enter multiple name files by separating their names with dashes. ie. fasta=abrecovery.svg.names-amzon.svg.names \n";
+ helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
helpString += "The chimera.check command should be in the following format: \n";
helpString += "chimera.check(fasta=yourFastaFile, reference=yourTemplateFile, processors=yourProcessors, ksize=yourKmerSize) \n";
helpString += "Example: chimera.check(fasta=AD.fasta, reference=core_set_aligned,imputed.fasta, processors=4, ksize=8) \n";
//***************************************************************************************************************
ChimeraCheckCommand::ChimeraCheckCommand(string option) {
try {
- abort = false; calledHelp = false;
+ abort = false; calledHelp = false;
+ ReferenceDB* rdb = ReferenceDB::getInstance();
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
-
- templatefile = validParameter.validFile(parameters, "reference", true);
- if (templatefile == "not open") { abort = true; }
- else if (templatefile == "not found") { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.check command."); m->mothurOutEndLine(); abort = true; }
namefile = validParameter.validFile(parameters, "name", false);
if (namefile == "not found") { namefile = ""; }
m->setProcessors(temp);
convert(temp, processors);
+ temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
+ save = m->isTrue(temp);
+ rdb->save = save;
+ if (save) { //clear out old references
+ rdb->clearMemory();
+ }
+
+ //this has to go after save so that if the user sets save=t and provides no reference we abort
+ templatefile = validParameter.validFile(parameters, "reference", true);
+ if (templatefile == "not found") {
+ //check for saved reference sequences
+ if (rdb->referenceSeqs.size() != 0) {
+ templatefile = "saved";
+ }else {
+ m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
+ m->mothurOutEndLine();
+ abort = true;
+ }
+ }else if (templatefile == "not open") { abort = true; }
+ else { if (save) { rdb->setSavedReference(templatefile); } }
+
+
temp = validParameter.validFile(parameters, "ksize", false); if (temp == "not found") { temp = "7"; }
convert(temp, ksize);
int driverMPI(int, int, MPI_File&, MPI_File&, vector<unsigned long int>&);
#endif
- bool abort, svg;
+ bool abort, svg, save;
string fastafile, templatefile, namefile, outputDir;
int processors, increment, ksize, numSeqs, templateSeqsLength;
Chimera* chimera;
#include "chimerapintailcommand.h"
#include "pintail.h"
+
//**********************************************************************************************************************
vector<string> ChimeraPintailCommand::setParameters(){
try {
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
-
+ CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
+
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
return myArray;
#ifdef USE_MPI
helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
#endif
+ helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
helpString += "The window parameter allows you to specify the window size for searching for chimeras, default=300. \n";
helpString += "The increment parameter allows you to specify how far you move each window while finding chimeric sequences, default=25.\n";
helpString += "The conservation parameter allows you to enter a frequency file containing the highest bases frequency at each place in the alignment.\n";
ChimeraPintailCommand::ChimeraPintailCommand(string option) {
try {
abort = false; calledHelp = false;
+ rdb = ReferenceDB::getInstance();
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
temp = validParameter.validFile(parameters, "increment", false); if (temp == "not found") { temp = "25"; }
convert(temp, increment);
+ temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
+ save = m->isTrue(temp);
+ rdb->save = save;
+ if (save) { //clear out old references
+ rdb->clearMemory();
+ }
+
+ //this has to go after save so that if the user sets save=t and provides no reference we abort
+ templatefile = validParameter.validFile(parameters, "reference", true);
+ if (templatefile == "not found") {
+ //check for saved reference sequences
+ if (rdb->referenceSeqs.size() != 0) {
+ templatefile = "saved";
+ }else {
+ m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
+ m->mothurOutEndLine();
+ abort = true;
+ }
+ }else if (templatefile == "not open") { abort = true; }
+ else { if (save) { rdb->setSavedReference(templatefile); } }
+
+
maskfile = validParameter.validFile(parameters, "mask", false);
if (maskfile == "not found") { maskfile = ""; }
else if (maskfile != "default") {
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
- templatefile = validParameter.validFile(parameters, "reference", true);
- if (templatefile == "not open") { abort = true; }
- else if (templatefile == "not found") { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.pintail command."); m->mothurOutEndLine(); abort = true; }
-
consfile = validParameter.validFile(parameters, "conservation", true);
if (consfile == "not open") { abort = true; }
else if (consfile == "not found") {
if (maskfile == "default") { m->mothurOut("I am using the default 236627 EU009184.1 Shigella dysenteriae str. FBD013."); m->mothurOutEndLine(); }
//check for quantile to save the time
+ string baseName = templatefile;
+ if (templatefile == "saved") { baseName = rdb->getSavedReference(); }
+
string tempQuan = "";
if ((!filter) && (maskfile == "")) {
- tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
+ tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
}else if ((!filter) && (maskfile != "")) {
- tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
+ tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
}else if ((filter) && (maskfile != "")) {
- tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
+ tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
}else if ((filter) && (maskfile == "")) {
- tempQuan = inputDir + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
+ tempQuan = inputDir + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
}
ifstream FileTest(tempQuan.c_str());
string tryPath = m->getDefaultPath();
string tempQuan = "";
if ((!filter) && (maskfile == "")) {
- tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.quan";
+ tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.quan";
}else if ((!filter) && (maskfile != "")) {
- tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.masked.quan";
+ tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.masked.quan";
}else if ((filter) && (maskfile != "")) {
- tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
+ tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "masked.quan";
}else if ((filter) && (maskfile == "")) {
- tempQuan = tryPath + m->getRootName(m->getSimpleName(templatefile)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
+ tempQuan = tryPath + m->getRootName(m->getSimpleName(baseName)) + "pintail.filtered." + m->getSimpleName(m->getRootName(fastaFileNames[s])) + "quan";
}
ifstream FileTest2(tempQuan.c_str());
#include "mothur.h"
#include "command.hpp"
#include "chimera.h"
-
+#include "referencedb.h"
/***********************************************************/
int execute();
void help() { m->mothurOut(getHelpString()); }
private:
-
+ ReferenceDB* rdb;
+
struct linePair {
unsigned long int start;
unsigned long int end;
int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long int>&);
#endif
- bool abort, filter;
+ bool abort, filter, save;
string fastafile, templatefile, consfile, quanfile, maskfile, outputDir, inputDir;
int processors, window, increment, numSeqs, templateSeqsLength;
Chimera* chimera;
}
}
//***************************************************************************************************************
+//template=self
ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map<string, int>& prior, string mode, int k, int ms, int mms, int win, float div,
int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r) : Chimera() {
try {
}
}
+ //avoids nuisance error from formatdb for making blank blast database
+ if (userTemplate.size() == 0) {
+ return userTemplate;
+ }
+
string kmerDBNameLeft;
string kmerDBNameRight;
else { thisTemplate = getTemplate(*query, thisFilteredTemplate); } //fills this template and creates the databases
if (m->control_pressed) { return 0; }
-
if (thisTemplate.size() == 0) { return 0; } //not chimeric
//moved this out of maligner - 4/29/11
#include "chimeraslayercommand.h"
#include "chimeraslayer.h"
#include "deconvolutecommand.h"
+#include "referencedb.h"
//**********************************************************************************************************************
vector<string> ChimeraSlayerCommand::setParameters(){
CommandParameter pincrement("increment", "Number", "", "5", "", "", "",false,false); parameters.push_back(pincrement);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
-
+ CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
+
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
return myArray;
helpString += "This command was modeled after the chimeraSlayer written by the Broad Institute.\n";
helpString += "The chimera.slayer command parameters are fasta, name, template, processors, trim, ksize, window, match, mismatch, divergence. minsim, mincov, minbs, minsnp, parents, search, iters, increment, numwanted, and realign.\n";
helpString += "The fasta parameter allows you to enter the fasta file containing your potentially chimeric sequences, and is required, unless you have a valid current fasta file. \n";
- helpString += "The name parameter allows you to provide a name file, if you are using template=self. \n";
+ helpString += "The name parameter allows you to provide a name file, if you are using reference=self. \n";
helpString += "You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amazon.fasta \n";
helpString += "The reference parameter allows you to enter a reference file containing known non-chimeric sequences, and is required. You may also set template=self, in this case the abundant sequences will be used as potential parents. \n";
helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n";
helpString += "The minsnp parameter allows you to specify percent of SNPs to sample on each side of breakpoint for computing bootstrap support (default: 10) \n";
helpString += "The search parameter allows you to specify search method for finding the closest parent. Choices are blast, and kmer, default blast. \n";
helpString += "The realign parameter allows you to realign the query to the potential parents. Choices are true or false, default true. \n";
+ helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
helpString += "The chimera.slayer command should be in the following format: \n";
helpString += "chimera.slayer(fasta=yourFastaFile, reference=yourTemplate, search=yourSearch) \n";
helpString += "Example: chimera.slayer(fasta=AD.align, reference=core_set_aligned.imputed.fasta, search=kmer) \n";
ChimeraSlayerCommand::ChimeraSlayerCommand(string option) {
try {
abort = false; calledHelp = false;
+ ReferenceDB* rdb = ReferenceDB::getInstance();
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
+ string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
+ m->setProcessors(temp);
+ convert(temp, processors);
+
+ temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
+ save = m->isTrue(temp);
+ rdb->save = save;
+ if (save) { //clear out old references
+ rdb->clearMemory();
+ }
string path;
it = parameters.find("reference");
//user has given a template file
if(it != parameters.end()){
- if (it->second == "self") { templatefile = "self"; }
+ if (it->second == "self") {
+ templatefile = "self";
+ if (save) {
+ m->mothurOut("[WARNING]: You can't save reference=self, ignoring save.");
+ m->mothurOutEndLine();
+ save = false;
+ }
+ }
else {
path = m->hasPath(it->second);
//if the user has not given a path then, add inputdir. else leave path alone.
templatefile = validParameter.validFile(parameters, "reference", true);
if (templatefile == "not open") { abort = true; }
- else if (templatefile == "not found") { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.slayer command, unless and namefile is given."); m->mothurOutEndLine(); abort = true; }
+ else if (templatefile == "not found") { //check for saved reference sequences
+ if (rdb->referenceSeqs.size() != 0) {
+ templatefile = "saved";
+ }else {
+ m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
+ m->mothurOutEndLine();
+ abort = true;
+ }
+ }else { if (save) { rdb->setSavedReference(templatefile); } }
+ }
+ }else if (hasName) { templatefile = "self";
+ if (save) {
+ m->mothurOut("[WARNING]: You can't save reference=self, ignoring save.");
+ m->mothurOutEndLine();
+ save = false;
}
- }else if (hasName) { templatefile = "self"; }
- else { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.slayer command."); m->mothurOutEndLine(); abort = true; }
+ }
+ else {
+ if (rdb->referenceSeqs.size() != 0) {
+ templatefile = "saved";
+ }else {
+ m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
+ m->mothurOutEndLine();
+ templatefile = ""; abort = true;
+ }
+ }
+
- string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
- m->setProcessors(temp);
- convert(temp, processors);
temp = validParameter.validFile(parameters, "ksize", false); if (temp == "not found") { temp = "7"; }
convert(temp, ksize);
convert(temp, numwanted);
if ((search != "blast") && (search != "kmer")) { m->mothurOut(search + " is not a valid search."); m->mothurOutEndLine(); abort = true; }
+
+ if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
}
}
catch(exception& e) {
int ChimeraSlayerCommand::execute(){
try{
if (abort == true) { if (calledHelp) { return 0; } return 2; }
-
+
for (int s = 0; s < fastaFileNames.size(); s++) {
m->mothurOut("Checking sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long int>&);
#endif
- bool abort, realign, trim, trimera;
+ bool abort, realign, trim, trimera, save;
string fastafile, templatefile, outputDir, search, namefile;
int processors, window, iters, increment, numwanted, ksize, match, mismatch, parents, minSimilarity, minCoverage, minBS, minSNP, numSeqs, templateSeqsLength;
float divR;
#include "deconvolutecommand.h"
#include "uc.h"
#include "sequence.hpp"
+#include "referencedb.h"
//**********************************************************************************************************************
CommandParameter pmaxlen("maxlen", "Number", "", "10000", "", "", "",false,false); parameters.push_back(pmaxlen);
CommandParameter pucl("ucl", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pucl);
CommandParameter pqueryfract("queryfract", "Number", "", "0.5", "", "", "",false,false); parameters.push_back(pqueryfract);
-
+
vector<string> myArray;
for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
return myArray;
//***************************************************************************************************************
ChimeraUchimeCommand::ChimeraUchimeCommand(string option) {
try {
- abort = false; calledHelp = false;
+ abort = false; calledHelp = false;
+ ReferenceDB* rdb = ReferenceDB::getInstance();
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
-
string path;
it = parameters.find("reference");
//user has given a template file
templatefile = validParameter.validFile(parameters, "reference", true);
if (templatefile == "not open") { abort = true; }
- else if (templatefile == "not found") { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.uchime command."); m->mothurOutEndLine(); abort = true; }
+ else if (templatefile == "not found") { //check for saved reference sequences
+ if (rdb->getSavedReference() != "") {
+ templatefile = rdb->getSavedReference();
+ m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
+ }else {
+ m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
+ m->mothurOutEndLine();
+ abort = true;
+ }
+ }
}
}else if (hasName) { templatefile = "self"; }
- else { templatefile = ""; m->mothurOut("reference is a required parameter for the chimera.uchime command, unless you have a namefile."); m->mothurOutEndLine(); abort = true; }
-
-
+ else {
+ if (rdb->getSavedReference() != "") {
+ templatefile = rdb->getSavedReference();
+ m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + "."); m->mothurOutEndLine();
+ }else {
+ m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
+ m->mothurOutEndLine();
+ templatefile = ""; abort = true;
+ }
+ }
+
string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
convert(temp, processors);
temp = validParameter.validFile(parameters, "skipgaps2", false); if (temp == "not found") { temp = "t"; }
skipgaps2 = m->isTrue(temp);
-
+
+ if (hasName && (templatefile != "self")) { m->mothurOut("You have provided a namefile and the reference parameter is not set to self. I am not sure what reference you are trying to use, aborting."); m->mothurOutEndLine(); abort=true; }
}
}
catch(exception& e) {
#include "suffixdb.hpp"
#include "blastdb.hpp"
#include "distancedb.hpp"
+#include "referencedb.h"
/**************************************************************************************************/
void Classify::generateDatabaseAndNames(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch) {
try {
- taxFile = tfile;
- readTaxonomy(taxFile);
+ ReferenceDB* rdb = ReferenceDB::getInstance();
- templateFile = tempFile;
+ if (tfile == "saved") { tfile = rdb->getSavedTaxonomy(); }
- int start = time(NULL);
+ taxFile = tfile;
+ readTaxonomy(taxFile);
int numSeqs = 0;
- m->mothurOut("Generating search database... "); cout.flush();
-#ifdef USE_MPI
- int pid, processors;
- vector<unsigned long int> positions;
- int tag = 2001;
-
- MPI_Status status;
- MPI_File inMPI;
- MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
- MPI_Comm_size(MPI_COMM_WORLD, &processors);
-
- //char* inFileName = new char[tempFile.length()];
- //memcpy(inFileName, tempFile.c_str(), tempFile.length());
+ if (tempFile == "saved") {
+ int start = time(NULL);
+ m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory."); m->mothurOutEndLine();
+
+ numSeqs = rdb->referenceSeqs.size();
+ templateFile = rdb->getSavedReference();
+ tempFile = rdb->getSavedReference();
+
+ bool needToGenerate = true;
+ string kmerDBName;
+ if(method == "kmer") {
+ database = new KmerDB(tempFile, kmerSize);
+
+ kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
+ ifstream kmerFileTest(kmerDBName.c_str());
+ if(kmerFileTest){
+ bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion());
+ if (GoodFile) { needToGenerate = false; }
+ }
+ }
+ else if(method == "suffix") { database = new SuffixDB(numSeqs); }
+ else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch); }
+ else if(method == "distance") { database = new DistanceDB(); }
+ else {
+ m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
+ m->mothurOutEndLine();
+ database = new KmerDB(tempFile, 8);
+ }
+
+ if (needToGenerate) {
+ for (int k = 0; k < rdb->referenceSeqs.size(); k++) {
+ Sequence temp(rdb->referenceSeqs[k].getName(), rdb->referenceSeqs[k].getAligned());
+ names.push_back(temp.getName());
+ database->addSequence(temp);
+ }
+ database->generateDB();
+ }else if ((method == "kmer") && (!needToGenerate)) {
+ ifstream kmerFileTest(kmerDBName.c_str());
+ database->readKmerDB(kmerFileTest);
+
+ for (int k = 0; k < rdb->referenceSeqs.size(); k++) {
+ names.push_back(rdb->referenceSeqs[k].getName());
+ }
+ }
+
+ database->setNumSeqs(numSeqs);
+
+ //sanity check
+ bool okay = phyloTree->ErrorCheck(names);
+
+ if (!okay) { m->control_pressed = true; }
- char inFileName[1024];
- strcpy(inFileName, tempFile.c_str());
+ m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences and generate the search databases.");m->mothurOutEndLine();
+
+ }else {
+
+ templateFile = tempFile;
+
+ int start = time(NULL);
+
+ m->mothurOut("Generating search database... "); cout.flush();
+ #ifdef USE_MPI
+ int pid, processors;
+ vector<unsigned long int> positions;
+ int tag = 2001;
+
+ MPI_Status status;
+ MPI_File inMPI;
+ MPI_Comm_rank(MPI_COMM_WORLD, &pid); //find out who we are
+ MPI_Comm_size(MPI_COMM_WORLD, &processors);
+
+ //char* inFileName = new char[tempFile.length()];
+ //memcpy(inFileName, tempFile.c_str(), tempFile.length());
+
+ char inFileName[1024];
+ strcpy(inFileName, tempFile.c_str());
- MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
- //delete inFileName;
+ MPI_File_open(MPI_COMM_WORLD, inFileName, MPI_MODE_RDONLY, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
+ //delete inFileName;
- if (pid == 0) { //only one process needs to scan file
- positions = m->setFilePosFasta(tempFile, numSeqs); //fills MPIPos, returns numSeqs
+ if (pid == 0) { //only one process needs to scan file
+ positions = m->setFilePosFasta(tempFile, numSeqs); //fills MPIPos, returns numSeqs
- //send file positions to all processes
- for(int i = 1; i < processors; i++) {
- MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
- MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ //send file positions to all processes
+ for(int i = 1; i < processors; i++) {
+ MPI_Send(&numSeqs, 1, MPI_INT, i, tag, MPI_COMM_WORLD);
+ MPI_Send(&positions[0], (numSeqs+1), MPI_LONG, i, tag, MPI_COMM_WORLD);
+ }
+ }else{
+ MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
+ positions.resize(numSeqs+1);
+ MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
}
- }else{
- MPI_Recv(&numSeqs, 1, MPI_INT, 0, tag, MPI_COMM_WORLD, &status);
- positions.resize(numSeqs+1);
- MPI_Recv(&positions[0], (numSeqs+1), MPI_LONG, 0, tag, MPI_COMM_WORLD, &status);
- }
+
+ //create database
+ if(method == "kmer") { database = new KmerDB(tempFile, kmerSize); }
+ else if(method == "suffix") { database = new SuffixDB(numSeqs); }
+ else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch); }
+ else if(method == "distance") { database = new DistanceDB(); }
+ else {
+ m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); m->mothurOutEndLine();
+ database = new KmerDB(tempFile, 8);
+ }
+
+ //read file
+ for(int i=0;i<numSeqs;i++){
+ //read next sequence
+ int length = positions[i+1] - positions[i];
+ char* buf4 = new char[length];
+ MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
+
+ string tempBuf = buf4;
+ if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
+ delete buf4;
+ istringstream iss (tempBuf,istringstream::in);
+
+ Sequence temp(iss);
+ if (temp.getName() != "") {
+ if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
+ names.push_back(temp.getName());
+ database->addSequence(temp);
+ }
+ }
+
+ database->generateDB();
+ MPI_File_close(&inMPI);
+ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
+ #else
- //create database
- if(method == "kmer") { database = new KmerDB(tempFile, kmerSize); }
+ //need to know number of template seqs for suffixdb
+ if (method == "suffix") {
+ ifstream inFASTA;
+ m->openInputFile(tempFile, inFASTA);
+ m->getNumSeqs(inFASTA, numSeqs);
+ inFASTA.close();
+ }
+
+ bool needToGenerate = true;
+ string kmerDBName;
+ if(method == "kmer") {
+ database = new KmerDB(tempFile, kmerSize);
+
+ kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
+ ifstream kmerFileTest(kmerDBName.c_str());
+ if(kmerFileTest){
+ bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion());
+ if (GoodFile) { needToGenerate = false; }
+ }
+ }
else if(method == "suffix") { database = new SuffixDB(numSeqs); }
else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch); }
else if(method == "distance") { database = new DistanceDB(); }
else {
- m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); m->mothurOutEndLine();
+ m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
+ m->mothurOutEndLine();
database = new KmerDB(tempFile, 8);
}
-
- //read file
- for(int i=0;i<numSeqs;i++){
- //read next sequence
- int length = positions[i+1] - positions[i];
- char* buf4 = new char[length];
- MPI_File_read_at(inMPI, positions[i], buf4, length, MPI_CHAR, &status);
-
- string tempBuf = buf4;
- if (tempBuf.length() > length) { tempBuf = tempBuf.substr(0, length); }
- delete buf4;
- istringstream iss (tempBuf,istringstream::in);
+
+ if (needToGenerate) {
+ ifstream fastaFile;
+ m->openInputFile(tempFile, fastaFile);
- Sequence temp(iss);
- if (temp.getName() != "") {
+ while (!fastaFile.eof()) {
+ Sequence temp(fastaFile);
+ m->gobble(fastaFile);
+
+ if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
+
names.push_back(temp.getName());
+
database->addSequence(temp);
}
- }
-
- database->generateDB();
- MPI_File_close(&inMPI);
- MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
- #else
-
- //need to know number of template seqs for suffixdb
- if (method == "suffix") {
- ifstream inFASTA;
- m->openInputFile(tempFile, inFASTA);
- m->getNumSeqs(inFASTA, numSeqs);
- inFASTA.close();
- }
+ fastaFile.close();
- bool needToGenerate = true;
- string kmerDBName;
- if(method == "kmer") {
- database = new KmerDB(tempFile, kmerSize);
+ database->generateDB();
+
+ }else if ((method == "kmer") && (!needToGenerate)) {
+ ifstream kmerFileTest(kmerDBName.c_str());
+ database->readKmerDB(kmerFileTest);
- kmerDBName = tempFile.substr(0,tempFile.find_last_of(".")+1) + char('0'+ kmerSize) + "mer";
- ifstream kmerFileTest(kmerDBName.c_str());
- if(kmerFileTest){
- bool GoodFile = m->checkReleaseVersion(kmerFileTest, m->getVersion());
- if (GoodFile) { needToGenerate = false; }
+ ifstream fastaFile;
+ m->openInputFile(tempFile, fastaFile);
+
+ while (!fastaFile.eof()) {
+ Sequence temp(fastaFile);
+ m->gobble(fastaFile);
+
+ if (rdb->save) { rdb->referenceSeqs.push_back(temp); }
+ names.push_back(temp.getName());
+ }
+ fastaFile.close();
}
- }
- else if(method == "suffix") { database = new SuffixDB(numSeqs); }
- else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch); }
- else if(method == "distance") { database = new DistanceDB(); }
- else {
- m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8.");
- m->mothurOutEndLine();
- database = new KmerDB(tempFile, 8);
- }
+ #endif
- if (needToGenerate) {
- ifstream fastaFile;
- m->openInputFile(tempFile, fastaFile);
+ database->setNumSeqs(names.size());
- while (!fastaFile.eof()) {
- Sequence temp(fastaFile);
- m->gobble(fastaFile);
+ //sanity check
+ bool okay = phyloTree->ErrorCheck(names);
- names.push_back(temp.getName());
-
- database->addSequence(temp);
- }
- fastaFile.close();
-
- database->generateDB();
+ if (!okay) { m->control_pressed = true; }
- }else if ((method == "kmer") && (!needToGenerate)) {
- ifstream kmerFileTest(kmerDBName.c_str());
- database->readKmerDB(kmerFileTest);
-
- ifstream fastaFile;
- m->openInputFile(tempFile, fastaFile);
-
- while (!fastaFile.eof()) {
- Sequence temp(fastaFile);
- m->gobble(fastaFile);
-
- names.push_back(temp.getName());
- }
- fastaFile.close();
+ m->mothurOut("DONE."); m->mothurOutEndLine();
+ m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine();
}
-#endif
-
- database->setNumSeqs(names.size());
-
- //sanity check
- bool okay = phyloTree->ErrorCheck(names);
-
- if (!okay) { m->control_pressed = true; }
-
- m->mothurOut("DONE."); m->mothurOutEndLine();
- m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine();
}
catch(exception& e) {
#include "knn.h"
+
//**********************************************************************************************************************
vector<string> ClassifySeqsCommand::setParameters(){
try {
CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "",false,true); parameters.push_back(pcutoff);
CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pprobs);
CommandParameter piters("iters", "Number", "", "100", "", "", "",false,true); parameters.push_back(piters);
+ CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
CommandParameter pnumwanted("numwanted", "Number", "", "10", "", "", "",false,true); parameters.push_back(pnumwanted);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
#ifdef USE_MPI
helpString += "When using MPI, the processors parameter is set to the number of MPI processes running. \n";
#endif
+ helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
helpString += "The match parameter allows you to specify the bonus for having the same base. The default is 1.0.\n";
helpString += "The mistmatch parameter allows you to specify the penalty for having different bases. The default is -1.0.\n";
helpString += "The gapopen parameter allows you to specify the penalty for opening a gap in an alignment. The default is -2.0.\n";
ClassifySeqsCommand::ClassifySeqsCommand(string option) {
try {
abort = false; calledHelp = false;
+ rdb = ReferenceDB::getInstance();
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
}
}
- //check for required parameters
- templateFileName = validParameter.validFile(parameters, "reference", true);
- if (templateFileName == "not found") {
- m->mothurOut("reference is a required parameter for the classify.seqs command.");
- m->mothurOutEndLine();
- abort = true;
- }
- else if (templateFileName == "not open") { abort = true; }
-
-
fastaFileName = validParameter.validFile(parameters, "fasta", false);
if (fastaFileName == "not found") {
//if there is a current fasta file, use it
if (fastaFileNames.size() == 0) { m->mothurOut("no valid files."); m->mothurOutEndLine(); abort = true; }
}
-
- taxonomyFileName = validParameter.validFile(parameters, "taxonomy", true);
- if (taxonomyFileName == "not found") {
- m->mothurOut("taxonomy is a required parameter for the classify.seqs command.");
- m->mothurOutEndLine();
- abort = true;
- }
- else if (taxonomyFileName == "not open") { abort = true; }
-
-
namefile = validParameter.validFile(parameters, "name", false);
if (namefile == "not found") { namefile = ""; }
temp = validParameter.validFile(parameters, "ksize", false); if (temp == "not found"){ temp = "8"; }
convert(temp, kmerSize);
+ temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
+ save = m->isTrue(temp);
+ rdb->save = save;
+ if (save) { //clear out old references
+ rdb->clearMemory();
+ }
+
+ //this has to go after save so that if the user sets save=t and provides no reference we abort
+ templateFileName = validParameter.validFile(parameters, "reference", true);
+ if (templateFileName == "not found") {
+ //check for saved reference sequences
+ if (rdb->referenceSeqs.size() != 0) {
+ templateFileName = "saved";
+ }else {
+ m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required for the classify.seqs command.");
+ m->mothurOutEndLine();
+ abort = true;
+ }
+ }else if (templateFileName == "not open") { abort = true; }
+ else { if (save) { rdb->setSavedReference(templateFileName); } }
+
+ //this has to go after save so that if the user sets save=t and provides no reference we abort
+ taxonomyFileName = validParameter.validFile(parameters, "taxonomy", true);
+ if (taxonomyFileName == "not found") {
+ //check for saved reference sequences
+ if (rdb->wordGenusProb.size() != 0) {
+ taxonomyFileName = "saved";
+ }else {
+ m->mothurOut("[ERROR]: You don't have any saved taxonomy information and the taxonomy parameter is a required for the classify.seqs command.");
+ m->mothurOutEndLine();
+ abort = true;
+ }
+ }else if (taxonomyFileName == "not open") { abort = true; }
+ else { if (save) { rdb->setSavedTaxonomy(taxonomyFileName); } }
+
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
convert(temp, processors);
m->mothurOut("Classifying sequences from " + fastaFileNames[s] + " ..." ); m->mothurOutEndLine();
- string RippedTaxName = m->getRootName(m->getSimpleName(taxonomyFileName));
+ string baseTName = taxonomyFileName;
+ if (taxonomyFileName == "saved") {baseTName = rdb->getSavedTaxonomy(); }
+
+ string RippedTaxName = m->getRootName(m->getSimpleName(baseTName));
RippedTaxName = m->getExtension(RippedTaxName.substr(0, RippedTaxName.length()-1));
if (RippedTaxName[0] == '.') { RippedTaxName = RippedTaxName.substr(1, RippedTaxName.length()); }
RippedTaxName += ".";
string group = "";
if (groupfile != "") { group = groupfileNames[s]; }
- PhyloSummary taxaSum(taxonomyFileName, group);
+ PhyloSummary taxaSum(baseTName, group);
if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } delete classify; return 0; }
#include "mothur.h"
#include "command.hpp"
#include "classify.h"
+#include "referencedb.h"
//KNN and Bayesian methods modeled from algorithms in
//Naı¨ve Bayesian Classifier for Rapid Assignment of rRNA Sequences
map<string, vector<string> >::iterator itNames;
Classify* classify;
+ ReferenceDB* rdb;
string fastaFileName, templateFileName, distanceFileName, namefile, search, method, taxonomyFileName, outputDir, groupfile;
int processors, kmerSize, numWanted, cutoff, iters;
float match, misMatch, gapOpen, gapExtend;
- bool abort, probs;
+ bool abort, probs, save;
int driver(linePair*, string, string, string);
void appendTaxFiles(string, string);
--- /dev/null
+/*
+ * clearmemorycommand.cpp
+ * Mothur
+ *
+ * Created by westcott on 7/6/11.
+ * Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "clearmemorycommand.h"
+#include "referencedb.h"
+
+//**********************************************************************************************************************
+vector<string> ClearMemoryCommand::setParameters(){
+ try {
+ vector<string> myArray;
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClearMemoryCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string ClearMemoryCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The clear.memory command removes saved reference data from memory.\n";
+ helpString += "The clear.memory command should be in the following format: clear.memory().\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClearMemoryCommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+ClearMemoryCommand::ClearMemoryCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClearMemoryCommand", "ClearMemoryCommand");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+
+int ClearMemoryCommand::execute(){
+ try {
+
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ ReferenceDB* rdb = ReferenceDB::getInstance();
+ rdb->clearMemory();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "ClearMemoryCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************/
--- /dev/null
+#ifndef CLEARMEMORYCOMMAND_H
+#define CLEARMEMORYCOMMAND_H
+
+/*
+ * clearmemorycommand.h
+ * Mothur
+ *
+ * Created by westcott on 7/6/11.
+ * Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "command.hpp"
+
+class ClearMemoryCommand : public Command {
+public:
+ ClearMemoryCommand(string);
+ ClearMemoryCommand(){ abort = true; calledHelp = true; }
+ ~ClearMemoryCommand(){}
+
+ vector<string> setParameters();
+ string getCommandName() { return "clear.memory"; }
+ string getCommandCategory() { return "General"; }
+ string getHelpString();
+ string getCitation() { return "http://www.mothur.org/wiki/Clear.memory"; }
+ string getDescription() { return "remove saved references from memory"; }
+
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+
+private:
+ bool abort;
+ vector<string> outputNames;
+};
+
+#endif
+
#include "getcommandinfocommand.h"
#include "deuniquetreecommand.h"
#include "countseqscommand.h"
+#include "clearmemorycommand.h"
/*******************************************************/
commands["get.commandinfo"] = "get.commandinfo";
commands["deunique.tree"] = "deunique.tree";
commands["count.seqs"] = "count.seqs";
+ commands["clear.memory"] = "clear.memory";
commands["pairwise.seqs"] = "MPIEnabled";
commands["pipeline.pds"] = "MPIEnabled";
commands["classify.seqs"] = "MPIEnabled";
else if(commandName == "get.commandinfo") { command = new GetCommandInfoCommand(optionString); }
else if(commandName == "deunique.tree") { command = new DeuniqueTreeCommand(optionString); }
else if(commandName == "count.seqs") { command = new CountSeqsCommand(optionString); }
+ else if(commandName == "clear.memory") { command = new ClearMemoryCommand(optionString); }
else { command = new NoCommand(optionString); }
return command;
else if(commandName == "get.commandinfo") { pipecommand = new GetCommandInfoCommand(optionString); }
else if(commandName == "deunique.tree") { pipecommand = new DeuniqueTreeCommand(optionString); }
else if(commandName == "count.seqs") { pipecommand = new CountSeqsCommand(optionString); }
+ else if(commandName == "clear.memory") { pipecommand = new ClearMemoryCommand(optionString); }
else { pipecommand = new NoCommand(optionString); }
return pipecommand;
else if(commandName == "get.commandinfo") { shellcommand = new GetCommandInfoCommand(); }
else if(commandName == "deunique.tree") { shellcommand = new DeuniqueTreeCommand(); }
else if(commandName == "count.seqs") { shellcommand = new CountSeqsCommand(); }
+ else if(commandName == "clear.memory") { shellcommand = new ClearMemoryCommand(); }
else { shellcommand = new NoCommand(); }
return shellcommand;
*/
#include "getsharedotucommand.h"
+#include "sharedutilities.h"
//**********************************************************************************************************************
vector<string> GetSharedOTUCommand::setParameters(){
userGroups = "unique.";
for(int i = 0; i < Groups.size(); i++) { userGroups += Groups[i] + "-"; }
userGroups = userGroups.substr(0, userGroups.length()-1);
+ }else{
+ //sanity check for group names
+ SharedUtil util;
+ util.setGroups(Groups, groupMap->namesOfGroups);
}
//put groups in map to find easier
USEREADLINE ?= yes
CYGWIN_BUILD ?= no
USECOMPRESSION ?= no
-MOTHUR_FILES="\"../release\""
-RELEASE_DATE = "\"2/7/2011\""
-VERSION = "\"1.16.0\""
+MOTHUR_FILES="\"Enter_your_default_path_here\""
+RELEASE_DATE = "\"7/5/2011\""
+VERSION = "\"1.20.0\""
# Optimize to level 3:
CXXFLAGS += -O3
#include "mothur.h"
#include "engine.hpp"
#include "mothurout.h"
+#include "referencedb.h"
/**************************************************************************************************/
CommandFactory* CommandFactory::_uniqueInstance = 0;
MothurOut* MothurOut::_uniqueInstance = 0;
-
+ReferenceDB* ReferenceDB::myInstance = 0;
/***********************************************************************/
volatile int ctrlc_pressed = 0;
void ctrlc_handler ( int sig ) {
#include "mothur.h"
-
/***********************************************/
class MothurOut {
gui = false;
printedHeaders = false;
sharedHeaderMode = "";
- };
+ }
~MothurOut();
string logFileName;
// Skip exit(), which can be very slow in DEBUG build\r
// VERY DANGEROUS practice, because it skips global destructors.\r
// But if you know the rules, you can break 'em, right?\r
- ExitProcess(0);\r
+ //ExitProcess(0);\r
#endif\r
}\r
\r
\r
void MyCmdLine(int argc, char **argv)\r
{\r
- g_Opts.clear();\r
+ g_Opts.clear(); g_Argv.clear();\r
static unsigned RecurseDepth = 0;\r
++RecurseDepth;\r
\r
--- /dev/null
+/*
+ * referencedb.cpp
+ * Mothur
+ *
+ * Created by westcott on 6/29/11.
+ * Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+#include "referencedb.h"
+
+/******************************************************/
+ReferenceDB* ReferenceDB::getInstance() {
+ if(myInstance == NULL) {
+ myInstance = new ReferenceDB();
+ }
+ return myInstance;
+ }
+/******************************************************/
+void ReferenceDB::clearMemory() {
+ referenceSeqs.clear();
+ setSavedReference("");
+ for(int i = 0; i < wordGenusProb.size(); i++) { wordGenusProb[i].clear(); }
+ wordGenusProb.clear();
+ setSavedTaxonomy("");
+}
+/*******************************************************
+ReferenceDB::~ReferenceDB() { myInstance = NULL; }
+/*******************************************************/
+
--- /dev/null
+#ifndef MYREFERENCEDB_H
+#define MYREFERENCEDB_H
+
+/*
+ * referencedb.h
+ * Mothur
+ *
+ * Created by westcott on 6/29/11.
+ * Copyright 2011 Schloss Lab. All rights reserved.
+ *
+ */
+
+
+#include "mothur.h"
+#include "sequence.hpp"
+
+/***********************************************/
+
+class ReferenceDB {
+
+ public:
+
+ static ReferenceDB* getInstance();
+ void clearMemory();
+
+ bool save;
+ vector<Sequence> referenceSeqs;
+ vector< vector<float> > wordGenusProb;
+
+ string getSavedReference() { return referencefile; }
+ void setSavedReference(string p) { referencefile = p; }
+ string getSavedTaxonomy() { return taxonomyfile; }
+ void setSavedTaxonomy(string p) { taxonomyfile = p; }
+
+ private:
+
+ static ReferenceDB* myInstance;
+ ReferenceDB() { referencefile = ""; taxonomyfile = ""; save = false; }
+ ReferenceDB(const ReferenceDB&){}// Disable copy constructor
+ void operator=(const ReferenceDB&){} // Disable assignment operator
+ ~ReferenceDB(){ myInstance = 0; }
+
+ string referencefile, taxonomyfile;
+};
+/***********************************************/
+
+#endif
+
#include "refchimeratest.h"
#include "filterseqscommand.h"
+
//**********************************************************************************************************************
vector<string> SeqErrorCommand::setParameters(){
try {
CommandParameter pignorechimeras("ignorechimeras", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pignorechimeras);
CommandParameter pthreshold("threshold", "Number", "", "1.0", "", "", "",false,false); parameters.push_back(pthreshold);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
+ CommandParameter psave("save", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(psave);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
try {
string helpString = "";
helpString += "The seq.error command reads a query alignment file and a reference alignment file and creates .....\n";
+ helpString += "The fasta parameter...\n";
+ helpString += "The reference parameter...\n";
+ helpString += "The qfile parameter...\n";
+ helpString += "The report parameter...\n";
+ helpString += "The name parameter...\n";
+ helpString += "The ignorechimeras parameter...\n";
+ helpString += "The threshold parameter...\n";
+ helpString += "The processors parameter...\n";
+ helpString += "If the save parameter is set to true the reference sequences will be saved in memory, to clear them later you can use the clear.memory command. Default=f.";
helpString += "Example seq.error(...).\n";
helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
helpString += "For more details please check out the wiki http://www.mothur.org/wiki/seq.error .\n";
SeqErrorCommand::SeqErrorCommand(){
try {
abort = true; calledHelp = true;
+ setParameters();
vector<string> tempOutNames;
outputTypes["error.summary"] = tempOutNames;
outputTypes["error.seq"] = tempOutNames;
try {
abort = false; calledHelp = false;
+ rdb = ReferenceDB::getInstance();
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
}
else if (queryFileName == "not open") { abort = true; }
else { m->setFastaFile(queryFileName); }
-
- referenceFileName = validParameter.validFile(parameters, "reference", true);
- if (referenceFileName == "not found") { m->mothurOut("reference is a required parameter for the seq.error command."); m->mothurOutEndLine(); abort = true; }
- else if (referenceFileName == "not open") { abort = true; }
-
//check for optional parameters
namesFileName = validParameter.validFile(parameters, "name", true);
temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found") { temp = "1.00"; }
convert(temp, threshold);
+ temp = validParameter.validFile(parameters, "save", false); if (temp == "not found"){ temp = "f"; }
+ save = m->isTrue(temp);
+ rdb->save = save;
+ if (save) { //clear out old references
+ rdb->clearMemory();
+ }
+
+ //this has to go after save so that if the user sets save=t and provides no reference we abort
+ referenceFileName = validParameter.validFile(parameters, "reference", true);
+ if (referenceFileName == "not found") {
+ //check for saved reference sequences
+ if (rdb->referenceSeqs.size() != 0) {
+ referenceFileName = "saved";
+ }else {
+ m->mothurOut("[ERROR]: You don't have any saved reference sequences and the reference parameter is a required.");
+ m->mothurOutEndLine();
+ abort = true;
+ }
+ }else if (referenceFileName == "not open") { abort = true; }
+ else { if (save) { rdb->setSavedReference(referenceFileName); } }
+
+
temp = validParameter.validFile(parameters, "ignorechimeras", false); if (temp == "not found") { temp = "T"; }
ignoreChimeras = m->isTrue(temp);
void SeqErrorCommand::getReferences(){
try {
-
- ifstream referenceFile;
- m->openInputFile(referenceFileName, referenceFile);
-
int numAmbigSeqs = 0;
int maxStartPos = 0;
int minEndPos = 100000;
- while(referenceFile){
- Sequence currentSeq(referenceFile);
- int numAmbigs = currentSeq.getAmbigBases();
- if(numAmbigs > 0){ numAmbigSeqs++; }
+ if (referenceFileName == "saved") {
+ int start = time(NULL);
+ m->mothurOutEndLine(); m->mothurOut("Using sequences from " + rdb->getSavedReference() + " that are saved in memory."); m->mothurOutEndLine();
+
+ for (int i = 0; i < rdb->referenceSeqs.size(); i++) {
+ int numAmbigs = rdb->referenceSeqs[i].getAmbigBases();
+ if(numAmbigs > 0){ numAmbigSeqs++; }
+
+ // int startPos = rdb->referenceSeqs[i].getStartPos();
+ // if(startPos > maxStartPos) { maxStartPos = startPos; }
+ //
+ // int endPos = rdb->referenceSeqs[i].getEndPos();
+ // if(endPos < minEndPos) { minEndPos = endPos; }
+
+ referenceSeqs.push_back(rdb->referenceSeqs[i]);
+ }
+ referenceFileName = rdb->getSavedReference();
+
+ m->mothurOut("It took " + toString(time(NULL) - start) + " to load " + toString(rdb->referenceSeqs.size()) + " sequences.");m->mothurOutEndLine();
+
+ }else {
+ int start = time(NULL);
+
+ ifstream referenceFile;
+ m->openInputFile(referenceFileName, referenceFile);
-// int startPos = currentSeq.getStartPos();
-// if(startPos > maxStartPos) { maxStartPos = startPos; }
-//
-// int endPos = currentSeq.getEndPos();
-// if(endPos < minEndPos) { minEndPos = endPos; }
- referenceSeqs.push_back(currentSeq);
+ while(referenceFile){
+ Sequence currentSeq(referenceFile);
+ int numAmbigs = currentSeq.getAmbigBases();
+ if(numAmbigs > 0){ numAmbigSeqs++; }
+
+ // int startPos = currentSeq.getStartPos();
+ // if(startPos > maxStartPos) { maxStartPos = startPos; }
+ //
+ // int endPos = currentSeq.getEndPos();
+ // if(endPos < minEndPos) { minEndPos = endPos; }
+ referenceSeqs.push_back(currentSeq);
- m->gobble(referenceFile);
+ if (rdb->save) { rdb->referenceSeqs.push_back(currentSeq); }
+
+ m->gobble(referenceFile);
+ }
+ referenceFile.close();
+
+ m->mothurOut("It took " + toString(time(NULL) - start) + " to read " + toString(referenceSeqs.size()) + " sequences.");m->mothurOutEndLine();
}
- referenceFile.close();
+
numRefs = referenceSeqs.size();
-
for(int i=0;i<numRefs;i++){
referenceSeqs[i].padToPos(maxStartPos);
if(numAmbigSeqs != 0){
m->mothurOut("Warning: " + toString(numAmbigSeqs) + " reference sequences have ambiguous bases, these bases will be ignored\n");
- }
+ }
}
catch(exception& e) {
#include "mothur.h"
#include "command.hpp"
#include "sequence.hpp"
+#include "referencedb.h"
struct Compare {
int AA, AT, AG, AC, TA, TT, TG, TC, GA, GT, GG, GC, CA, CT, CG, CC, NA, NT, NG, NC, Ai, Ti, Gi, Ci, Ni, dA, dT, dG, dC;
private:
bool abort;
+ ReferenceDB* rdb;
struct linePair {
unsigned long int start;
string queryFileName, referenceFileName, qualFileName, reportFileName, namesFileName, outputDir;
double threshold;
- bool ignoreChimeras;
+ bool ignoreChimeras, save;
int numRefs, processors;
int maxLength, totalBases, totalMatches;
//ofstream errorSummaryFile, errorSeqFile;
if((QuerySeqCount) % 100 != 0){ m->mothurOut("Processing sequence: " + toString(QuerySeqCount) + ", " + toString(HitCount) + " chimeras found."); m->mothurOutEndLine(); }\r
}\r
\r
-\r
Log("\n");\r
Log("%s: %u/%u chimeras found (%.1f%%)\n",\r
opt_input.c_str(), HitCount, QuerySeqCount, Pct(HitCount, QuerySeqCount));\r