GCC_OPTIMIZATION_LEVEL = 3;
GCC_PREPROCESSOR_DEFINITIONS = (
"MOTHUR_FILES=\"\\\"../release\\\"\"",
- "VERSION=\"\\\"1.22.0\\\"\"",
- "RELEASE_DATE=\"\\\"10/12/2011\\\"\"",
+ "VERSION=\"\\\"1.23.0\\\"\"",
+ "RELEASE_DATE=\"\\\"1/9/2012\\\"\"",
);
"GCC_VERSION[arch=*]" = "";
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
#include "phylosummary.h"
#include "referencedb.h"
/**************************************************************************************************/
-Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, int i, int tid) :
-Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) {
+Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, int i, int tid, bool f) :
+Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) {
try {
ReferenceDB* rdb = ReferenceDB::getInstance();
threadID = tid;
+ flip = f;
string baseName = tempFile;
if (baseName == "saved") { baseName = rdb->getSavedReference(); }
if (tfile == "saved") {
m->mothurOutEndLine(); m->mothurOut("Using probabilties from " + rdb->getSavedTaxonomy() + " that are saved in memory... "); cout.flush();;
wordGenusProb = rdb->wordGenusProb;
+ WordPairDiffArr = rdb->WordPairDiffArr;
}else {
m->mothurOut("Reading template probabilities... "); cout.flush();
readProbFile(probFileTest, probFileTest2, probFileName, probFileName2);
}
//save probabilities
- if (rdb->save) { rdb->wordGenusProb = wordGenusProb; }
+ if (rdb->save) { rdb->wordGenusProb = wordGenusProb; rdb->WordPairDiffArr = WordPairDiffArr; }
}else{
//create search database and names vector
//initialze probabilities
wordGenusProb.resize(numKmers);
+ WordPairDiffArr.resize(numKmers);
//cout << numKmers << '\t' << genusNodes.size() << endl;
for (int j = 0; j < wordGenusProb.size(); j++) { wordGenusProb[j].resize(genusNodes.size()); }
//cout << numKmers << '\t' << genusNodes.size() << endl;
- //ofstream out;
- //ofstream out2;
+ ofstream out;
+ ofstream out2;
#ifdef USE_MPI
int pid;
#endif
- //m->openOutputFile(probFileName, out);
+ m->openOutputFile(probFileName, out);
//output mothur version
- //out << "#" << m->getVersion() << endl;
+ out << "#" << m->getVersion() << endl;
- //out << numKmers << endl;
+ out << numKmers << endl;
- //m->openOutputFile(probFileName2, out2);
+ m->openOutputFile(probFileName2, out2);
//output mothur version
- //out2 << "#" << m->getVersion() << endl;
+ out2 << "#" << m->getVersion() << endl;
#ifdef USE_MPI
}
if (pid == 0) {
#endif
- //out << i << '\t';
+ out << i << '\t';
#ifdef USE_MPI
}
//probabilityInTemplate = (# of seqs with that word in template + 0.50) / (total number of seqs in template + 1);
float probabilityInTemplate = (seqsWithWordi.size() + 0.50) / (float) (names.size() + 1);
-
+ diffPair tempProb(log(probabilityInTemplate), 0.0);
+ WordPairDiffArr[i] = tempProb;
+
int numNotZero = 0;
for (int k = 0; k < genusNodes.size(); k++) {
//probabilityInThisTaxonomy = (# of seqs with that word in this taxonomy + probabilityInTemplate) / (total number of seqs in this taxonomy + 1);
if (pid == 0) {
#endif
- //out << k << '\t' << wordGenusProb[i][k] << '\t';
+ out << k << '\t' << wordGenusProb[i][k] << '\t' ;
#ifdef USE_MPI
}
if (pid == 0) {
#endif
- //out << endl;
- //out2 << probabilityInTemplate << '\t' << numNotZero << endl;
+ out << endl;
+ out2 << probabilityInTemplate << '\t' << numNotZero << '\t' << log(probabilityInTemplate) << endl;
#ifdef USE_MPI
}
if (pid == 0) {
#endif
- //out.close();
- //out2.close();
+ out.close();
+ out2.close();
#ifdef USE_MPI
}
phyloTree = new PhyloTree(phyloTreeTest, phyloTreeName);
//save probabilities
- if (rdb->save) { rdb->wordGenusProb = wordGenusProb; }
+ if (rdb->save) { rdb->wordGenusProb = wordGenusProb; rdb->WordPairDiffArr = WordPairDiffArr; }
}
}
-
+
+ generateWordPairDiffArr();
+
+ //save probabilities
+ if (rdb->save) { rdb->wordGenusProb = wordGenusProb; rdb->WordPairDiffArr = WordPairDiffArr; }
+
m->mothurOut("DONE."); m->mothurOutEndLine();
m->mothurOut("It took " + toString(time(NULL) - start) + " seconds get probabilities. "); m->mothurOutEndLine();
}
try {
string tax = "";
Kmer kmer(kmerSize);
+ flipped = false;
//get words contained in query
//getKmerString returns a string where the index in the string is hte kmer number
//and the character at that index can be converted to be the number of times that kmer was seen
-
string queryKmerString = kmer.getKmerString(seq->getUnaligned());
-
+
vector<int> queryKmers;
for (int i = 0; i < queryKmerString.length()-1; i++) { // the -1 is to ignore any kmer with an N in it
if (queryKmerString[i] != '!') { //this kmer is in the query
}
}
- if (queryKmers.size() == 0) { m->mothurOut(seq->getName() + "is bad."); m->mothurOutEndLine(); return "bad seq"; }
+ //if user wants to test reverse compliment and its reversed use that instead
+ if (flip) {
+ if (isReversed(queryKmers)) {
+ flipped = true;
+ seq->reverseComplement();
+ queryKmerString = kmer.getKmerString(seq->getUnaligned());
+ queryKmers.clear();
+ for (int i = 0; i < queryKmerString.length()-1; i++) { // the -1 is to ignore any kmer with an N in it
+ if (queryKmerString[i] != '!') { //this kmer is in the query
+ queryKmers.push_back(i);
+ }
+ }
+ }
+ }
+
+ if (queryKmers.size() == 0) { m->mothurOut(seq->getName() + "is bad."); m->mothurOutEndLine(); simpleTax = "unknown;"; return "unknown;"; }
int index = getMostProbableTaxonomy(queryKmers);
int numToSelect = queryKmers.size() / 8;
tax = bootstrapResults(queryKmers, index, numToSelect);
-
+
return tax;
}
catch(exception& e) {
seqTax = phyloTree->get(seqTax.parent);
}
- if (confidenceTax == "") { confidenceTax = "unclassified;"; simpleTax = "unclassified;"; }
+ if (confidenceTax == "") { confidenceTax = "unknown;"; simpleTax = "unknown;"; }
+
return confidenceTax;
}
exit(1);
}
}
+//********************************************************************************************************************
+//if it is more probable that the reverse compliment kmers are in the template, then we assume the sequence is reversed.
+bool Bayesian::isReversed(vector<int>& queryKmers){
+ try{
+ bool reversed = false;
+ float prob = 0;
+ float reverseProb = 0;
+
+ for (int i = 0; i < queryKmers.size(); i++){
+ int kmer = queryKmers[i];
+ if (kmer >= 0){
+ prob += WordPairDiffArr[kmer].prob;
+ reverseProb += WordPairDiffArr[kmer].reverseProb;
+ }
+ }
+
+ if (reverseProb > prob){ reversed = true; }
+
+ return reversed;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "Bayesian", "isReversed");
+ exit(1);
+ }
+}
+//********************************************************************************************************************
+int Bayesian::generateWordPairDiffArr(){
+ try{
+ Kmer kmer(kmerSize);
+ for (int i = 0; i < WordPairDiffArr.size(); i++) {
+ int reversedWord = kmer.getReverseKmerNumber(i);
+ WordPairDiffArr[i].reverseProb = WordPairDiffArr[reversedWord].prob;
+ }
+
+ return 0;
+ }catch(exception& e) {
+ m->errorOut(e, "Bayesian", "generateWordPairDiffArr");
+ exit(1);
+ }
+}
/*************************************************************************************************
map<string, int> Bayesian::parseTaxMap(string newTax) {
try{
int kmer, name;
vector<int> numbers; numbers.resize(numKmers);
float prob;
- vector<float> zeroCountProb; zeroCountProb.resize(numKmers);
+ vector<float> zeroCountProb; zeroCountProb.resize(numKmers);
+ WordPairDiffArr.resize(numKmers);
//read version
length = positions[1] - positions[0];
delete buf4;
istringstream iss (tempBuf,istringstream::in);
- iss >> zeroCountProb[i] >> numbers[i];
+ float probTemp;
+ iss >> zeroCountProb[i] >> numbers[i] >> probTemp;
+ WordPairDiffArr[i].prob = tempProb;
+
}
MPI_File_close(&inMPI);
int kmer, name, count; count = 0;
vector<int> num; num.resize(numKmers);
float prob;
- vector<float> zeroCountProb; zeroCountProb.resize(numKmers);
+ vector<float> zeroCountProb; zeroCountProb.resize(numKmers);
+ WordPairDiffArr.resize(numKmers);
//read version
string line2 = m->getline(inNum); m->gobble(inNum);
+ float probTemp;
//cout << threadID << '\t' << line2 << '\t' << this << endl;
while (inNum) {
- inNum >> zeroCountProb[count] >> num[count];
+ inNum >> zeroCountProb[count] >> num[count] >> probTemp;
+ WordPairDiffArr[count].prob = probTemp;
count++;
m->gobble(inNum);
//cout << threadID << '\t' << count << endl;
class Bayesian : public Classify {
public:
- Bayesian(string, string, string, int, int, int, int);
+ Bayesian(string, string, string, int, int, int, int, bool);
~Bayesian();
string getTaxonomy(Sequence*);
vector<int> genusTotals;
vector<int> genusNodes; //indexes in phyloTree where genus' are located
+ vector<diffPair> WordPairDiffArr;
+
int kmerSize, numKmers, confidenceThreshold, iters;
string bootstrapResults(vector<int>, int, int);
int getMostProbableTaxonomy(vector<int>);
void readProbFile(ifstream&, ifstream&, string, string);
bool checkReleaseDate(ifstream&, ifstream&, ifstream&, ifstream&);
+ bool isReversed(vector<int>&);
+ vector<int> createWordIndexArr(Sequence*);
+ int generateWordPairDiffArr();
};
}
namesfile = validParameter.validFile(parameters, "name", true);
- if (namesfile == "not open") { abort = true; }
+ if (namesfile == "not open") { namesfile = ""; abort = true; }
else if (namesfile == "not found") { namesfile = ""; }
else { m->setNameFile(namesfile); }
else if (groupfile == "not found") { groupfile = ""; }
else { m->setGroupFile(groupfile); }
+ if (namesfile == ""){
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
+
}
}
catch(exception& e) {
names.push_back(temp.getName());
database->addSequence(temp);
}
-// database->generateDB();
+ database->generateDB();
}else if ((method == "kmer") && (!needToGenerate)) {
ifstream kmerFileTest(kmerDBName.c_str());
database->readKmerDB(kmerFileTest);
}
}
-// database->generateDB();
+ database->generateDB();
MPI_File_close(&inMPI);
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
}
fastaFile.close();
-// database->generateDB();
+ database->generateDB();
}else if ((method == "kmer") && (!needToGenerate)) {
ifstream kmerFileTest(kmerDBName.c_str());
database->setNumSeqs(names.size());
//sanity check
- //bool okay = phyloTree->ErrorCheck(names);
+ bool okay = phyloTree->ErrorCheck(names);
- //if (!okay) { m->control_pressed = true; }
+ if (!okay) { m->control_pressed = true; }
m->mothurOut("DONE."); m->mothurOutEndLine();
m->mothurOut("It took " + toString(time(NULL) - start) + " seconds generate search database. "); m->mothurOutEndLine();
}
}
/**************************************************************************************************/
-Classify::Classify() { m = MothurOut::getInstance(); database = NULL; }
+Classify::Classify() { m = MothurOut::getInstance(); database = NULL; flipped=false; }
/**************************************************************************************************/
int Classify::readTaxonomy(string file) {
virtual ~Classify(){};
virtual string getTaxonomy(Sequence*) = 0;
virtual string getSimpleTax() { return simpleTax; }
+ virtual bool getFlipped() { return flipped; }
virtual void generateDatabaseAndNames(string, string, string, int, float, float, float, float);
virtual void setDistName(string s) {} //for knn, so if distance method is selected with knn you can create the smallest distance file in the right place.
string taxFile, templateFile, simpleTax;
vector<string> names;
int threadID;
+ bool flip, flipped;
int readTaxonomy(string);
vector<string> parseTax(string);
else if (refTaxonomy == "not open") { abort = true; }
namefile = validParameter.validFile(parameters, "name", true);
- if (namefile == "not open") { abort = true; }
+ if (namefile == "not open") { namefile = ""; abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true; }
+ if (namefile == ""){
+ vector<string> files; files.push_back(taxfile);
+ parser.getNameFile(files);
+ }
+
}
}
catch(exception& e) {
CommandParameter pmismatch("mismatch", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pmismatch);
CommandParameter pgapopen("gapopen", "Number", "", "-2.0", "", "", "",false,false); parameters.push_back(pgapopen);
CommandParameter pgapextend("gapextend", "Number", "", "-1.0", "", "", "",false,false); parameters.push_back(pgapextend);
+ //CommandParameter pflip("flip", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pflip);
CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "",false,true); parameters.push_back(pcutoff);
CommandParameter pprobs("probs", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pprobs);
CommandParameter piters("iters", "Number", "", "100", "", "", "",false,true); parameters.push_back(piters);
helpString += "The cutoff parameter allows you to specify a bootstrap confidence threshold for your taxonomy. The default is 0.\n";
helpString += "The probs parameter shuts off the bootstrapping results for the bayesian method. The default is true, meaning you want the bootstrapping to be shown.\n";
helpString += "The iters parameter allows you to specify how many iterations to do when calculating the bootstrap confidence score for your taxonomy with the bayesian method. The default is 100.\n";
+ //helpString += "The flip parameter allows you shut off mothur's The default is T.\n";
helpString += "The classify.seqs command should be in the following format: \n";
helpString += "classify.seqs(reference=yourTemplateFile, fasta=yourFastaFile, method=yourClassificationMethod, search=yourSearchmethod, ksize=yourKmerSize, taxonomy=yourTaxonomyFile, processors=yourProcessors) \n";
helpString += "Example classify.seqs(fasta=amazon.fasta, reference=core.filtered, method=knn, search=gotoh, ksize=8, processors=2)\n";
setParameters();
vector<string> tempOutNames;
outputTypes["taxonomy"] = tempOutNames;
+ outputTypes["accnos"] = tempOutNames;
outputTypes["taxsummary"] = tempOutNames;
outputTypes["matchdist"] = tempOutNames;
}
outputTypes["taxonomy"] = tempOutNames;
outputTypes["taxsummary"] = tempOutNames;
outputTypes["matchdist"] = tempOutNames;
+ outputTypes["accnos"] = tempOutNames;
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
temp = validParameter.validFile(parameters, "probs", false); if (temp == "not found"){ temp = "true"; }
probs = m->isTrue(temp);
+ //temp = validParameter.validFile(parameters, "flip", false); if (temp == "not found"){ temp = "T"; }
+ //flip = m->isTrue(temp);
+ flip = true;
+
temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "100"; }
m->mothurConvert(temp, iters);
-
if ((method == "bayesian") && (search != "kmer")) {
m->mothurOut("The bayesian method requires the kmer search." + search + "will be disregarded." ); m->mothurOutEndLine();
search = "kmer";
}
+
+ if (namefileNames.size() == 0){
+ vector<string> files; files.push_back(fastaFileNames[fastaFileNames.size()-1]);
+ parser.getNameFile(files);
+ }
+
}
}
try {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
- if(method == "bayesian"){ classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand()); }
+ if(method == "bayesian"){ classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip); }
else if(method == "knn"){ classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand()); }
else {
m->mothurOut(search + " is not a valid method option. I will run the command using bayesian.");
m->mothurOutEndLine();
- classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand());
+ classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand(), flip);
}
if (m->control_pressed) { delete classify; return 0; }
if (outputDir == "") { outputDir += m->hasPath(fastaFileNames[s]); }
string newTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "taxonomy";
+ string newaccnosFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "flip.accnos";
string tempTaxonomyFile = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "taxonomy.temp";
string taxSummary = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + RippedTaxName + "tax.summary";
}
outputNames.push_back(newTaxonomyFile); outputTypes["taxonomy"].push_back(newTaxonomyFile);
+ outputNames.push_back(newaccnosFile); outputTypes["accnos"].push_back(newaccnosFile);
outputNames.push_back(taxSummary); outputTypes["taxsummary"].push_back(taxSummary);
int start = time(NULL);
MPI_File inMPI;
MPI_File outMPINewTax;
MPI_File outMPITempTax;
+ MPI_File outMPIAcc;
int outMode=MPI_MODE_CREATE|MPI_MODE_WRONLY;
int inMode=MPI_MODE_RDONLY;
char outTempTax[1024];
strcpy(outTempTax, tempTaxonomyFile.c_str());
+
+ char outAcc[1024];
+ strcpy(outAcc, newaccnosFile.c_str());
char inFileName[1024];
strcpy(inFileName, fastaFileNames[s].c_str());
MPI_File_open(MPI_COMM_WORLD, inFileName, inMode, MPI_INFO_NULL, &inMPI); //comm, filename, mode, info, filepointer
MPI_File_open(MPI_COMM_WORLD, outNewTax, outMode, MPI_INFO_NULL, &outMPINewTax);
MPI_File_open(MPI_COMM_WORLD, outTempTax, outMode, MPI_INFO_NULL, &outMPITempTax);
+ MPI_File_open(MPI_COMM_WORLD, outAcc, outMode, MPI_INFO_NULL, &outMPIAcc);
- if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPINewTax); MPI_File_close(&outMPITempTax); delete classify; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPINewTax); MPI_File_close(&outMPIAcc); MPI_File_close(&outMPITempTax); delete classify; return 0; }
if (pid == 0) { //you are the root process
//align your part
- driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, MPIPos);
+ driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, outMPIAcc, MPIPos);
- if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPINewTax); MPI_File_close(&outMPITempTax); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete classify; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPINewTax); MPI_File_close(&outMPIAcc); MPI_File_close(&outMPITempTax); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete classify; return 0; }
for (int i = 1; i < processors; i++) {
int done;
//align your part
- driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, MPIPos);
+ driverMPI(startIndex, numSeqsPerProcessor, inMPI, outMPINewTax, outMPITempTax, outMPIAcc, MPIPos);
- if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPINewTax); MPI_File_close(&outMPITempTax); delete classify; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); MPI_File_close(&inMPI); MPI_File_close(&outMPINewTax); MPI_File_close(&outMPIAcc); MPI_File_close(&outMPITempTax); delete classify; return 0; }
int done = 0;
MPI_Send(&done, 1, MPI_INT, 0, tag, MPI_COMM_WORLD);
MPI_File_close(&inMPI);
MPI_File_close(&outMPINewTax);
MPI_File_close(&outMPITempTax);
+ MPI_File_close(&outMPIAcc);
MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case
#else
}
#endif
if(processors == 1){
- numFastaSeqs = driver(lines[0], newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
+ numFastaSeqs = driver(lines[0], newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastaFileNames[s]);
}else{
- numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, fastaFileNames[s]);
+ numFastaSeqs = createProcesses(newTaxonomyFile, tempTaxonomyFile, newaccnosFile, fastaFileNames[s]);
}
#endif
+
+ if (!m->isBlank(newaccnosFile)) { m->mothurOutEndLine(); m->mothurOut("[WARNING]: mothur suspects some of your sequences may be reversed, please check " + newaccnosFile + " for the list of the sequences."); m->mothurOutEndLine(); }
m->mothurOutEndLine();
m->mothurOut("It took " + toString(time(NULL) - start) + " secs to classify " + toString(numFastaSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine();
start = time(NULL);
-
+
+
#ifdef USE_MPI
if (pid == 0) { //this part does not need to be paralellized
if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
}
+ current = "";
+ itTypes = outputTypes.find("accnos");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
+ }
+
delete classify;
return 0;
/**************************************************************************************************/
-int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, string filename) {
+int ClassifySeqsCommand::createProcesses(string taxFileName, string tempTaxFile, string accnos, string filename) {
try {
int num = 0;
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- num = driver(lines[process], taxFileName + toString(getpid()) + ".temp", tempTaxFile + toString(getpid()) + ".temp", filename);
+ num = driver(lines[process], taxFileName + toString(getpid()) + ".temp", tempTaxFile + toString(getpid()) + ".temp", accnos + toString(getpid()) + ".temp", filename);
//pass numSeqs to parent
ofstream out;
}
//parent does its part
- num = driver(lines[0], taxFileName, tempTaxFile, filename);
+ num = driver(lines[0], taxFileName, tempTaxFile, accnos, filename);
//force parent to wait until all the processes are done
for (int i=0;i<processIDS.size();i++) {
string extension = "";
if (i != 0) { extension = toString(i) + ".temp"; processIDS.push_back(i); }
- classifyData* tempclass = new classifyData(probs, method, templateFileName, taxonomyFileName, (taxFileName + extension), (tempTaxFile + extension), filename, search, kmerSize, iters, numWanted, m, lines[i]->start, lines[i]->end, match, misMatch, gapOpen, gapExtend, cutoff, i);
+ classifyData* tempclass = new classifyData((accnos + extension), probs, method, templateFileName, taxonomyFileName, (taxFileName + extension), (tempTaxFile + extension), filename, search, kmerSize, iters, numWanted, m, lines[i]->start, lines[i]->end, match, misMatch, gapOpen, gapExtend, cutoff, i, flipThreshold);
pDataArray.push_back(tempclass);
//MySeqSumThreadFunction is in header. It must be global or static to work with the threads.
}
//parent does its part
- num = driver(lines[processors-1], taxFileName + toString(processors-1) + ".temp", tempTaxFile + toString(processors-1) + ".temp", filename);
+ num = driver(lines[processors-1], taxFileName + toString(processors-1) + ".temp", tempTaxFile + toString(processors-1) + ".temp", accnos + toString(processors-1) + ".temp", filename);
processIDS.push_back((processors-1));
//Wait until all threads have terminated.
for(int i=0;i<processIDS.size();i++){
appendTaxFiles((taxFileName + toString(processIDS[i]) + ".temp"), taxFileName);
appendTaxFiles((tempTaxFile + toString(processIDS[i]) + ".temp"), tempTaxFile);
+ appendTaxFiles((accnos + toString(processIDS[i]) + ".temp"), accnos);
m->mothurRemove((m->getFullPathName(taxFileName) + toString(processIDS[i]) + ".temp"));
m->mothurRemove((m->getFullPathName(tempTaxFile) + toString(processIDS[i]) + ".temp"));
+ m->mothurRemove((m->getFullPathName(accnos) + toString(processIDS[i]) + ".temp"));
}
return num;
//**********************************************************************************************************************
-int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempTFName, string filename){
+int ClassifySeqsCommand::driver(linePair* filePos, string taxFName, string tempTFName, string accnos, string filename){
try {
ofstream outTax;
m->openOutputFile(taxFName, outTax);
ofstream outTaxSimple;
m->openOutputFile(tempTFName, outTaxSimple);
+
+ ofstream outAcc;
+ m->openOutputFile(accnos, outAcc);
ifstream inFASTA;
m->openInputFile(filename, inFASTA);
int count = 0;
while (!done) {
- if (m->control_pressed) { return 0; }
+ if (m->control_pressed) {
+ inFASTA.close();
+ outTax.close();
+ outTaxSimple.close();
+ outAcc.close(); return 0; }
Sequence* candidateSeq = new Sequence(inFASTA); m->gobble(inFASTA);
taxonomy = classify->getTaxonomy(candidateSeq);
if (m->control_pressed) { delete candidateSeq; return 0; }
-
- if (taxonomy != "bad seq") {
- //output confidence scores or not
- if (probs) {
- outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
- }else{
- outTax << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
- }
-
- outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
+
+ if (taxonomy == "unknown;") { m->mothurOut("[WARNING]: " + candidateSeq->getName() + " could not be classified. You can use the remove.lineage command with taxon=unknown; to remove such sequences."); m->mothurOutEndLine(); }
+
+ //output confidence scores or not
+ if (probs) {
+ outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
+ }else{
+ outTax << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
}
+
+ if (classify->getFlipped()) { outAcc << candidateSeq->getName() << endl; }
+
+ outTaxSimple << candidateSeq->getName() << '\t' << classify->getSimpleTax() << endl;
+
count++;
}
delete candidateSeq;
inFASTA.close();
outTax.close();
outTaxSimple.close();
+ outAcc.close();
return count;
}
}
//**********************************************************************************************************************
#ifdef USE_MPI
-int ClassifySeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& newFile, MPI_File& tempFile, vector<unsigned long long>& MPIPos){
+int ClassifySeqsCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_File& newFile, MPI_File& tempFile, MPI_File& accFile, vector<unsigned long long>& MPIPos){
try {
MPI_Status statusNew;
MPI_Status statusTemp;
+ MPI_Status statusAcc;
MPI_Status status;
int pid;
if (candidateSeq->getName() != "") {
taxonomy = classify->getTaxonomy(candidateSeq);
- if (taxonomy != "bad seq") {
- //output confidence scores or not
- if (probs) {
- outputString = candidateSeq->getName() + "\t" + taxonomy + "\n";
- }else{
- outputString = candidateSeq->getName() + "\t" + classify->getSimpleTax() + "\n";
- }
-
- int length = outputString.length();
- char* buf2 = new char[length];
- memcpy(buf2, outputString.c_str(), length);
+ if (taxonomy == "unknown;") { m->mothurOut("[WARNING]: " + candidateSeq->getName() + " could not be classified. You can use the remove.lineage command with taxon=unknown; to remove such sequences."); m->mothurOutEndLine(); }
- MPI_File_write_shared(newFile, buf2, length, MPI_CHAR, &statusNew);
- delete buf2;
-
+ //output confidence scores or not
+ if (probs) {
+ outputString = candidateSeq->getName() + "\t" + taxonomy + "\n";
+ }else{
outputString = candidateSeq->getName() + "\t" + classify->getSimpleTax() + "\n";
- length = outputString.length();
- char* buf = new char[length];
- memcpy(buf, outputString.c_str(), length);
+ }
+
+ int length = outputString.length();
+ char* buf2 = new char[length];
+ memcpy(buf2, outputString.c_str(), length);
+
+ MPI_File_write_shared(newFile, buf2, length, MPI_CHAR, &statusNew);
+ delete buf2;
+
+ outputString = candidateSeq->getName() + "\t" + classify->getSimpleTax() + "\n";
+ length = outputString.length();
+ char* buf = new char[length];
+ memcpy(buf, outputString.c_str(), length);
- MPI_File_write_shared(tempFile, buf, length, MPI_CHAR, &statusTemp);
- delete buf;
+ MPI_File_write_shared(tempFile, buf, length, MPI_CHAR, &statusTemp);
+ delete buf;
+
+ if (classify->getFlipped()) {
+ outputString = candidateSeq->getName() + "\n";
+ length = outputString.length();
+ char* buf3 = new char[length];
+ memcpy(buf3, outputString.c_str(), length);
+
+ MPI_File_write_shared(accFile, buf3, length, MPI_CHAR, &statusAcc);
+ delete buf3;
}
+
}
delete candidateSeq;
string fastaFileName, templateFileName, distanceFileName, namefile, search, method, taxonomyFileName, outputDir, groupfile;
int processors, kmerSize, numWanted, cutoff, iters;
float match, misMatch, gapOpen, gapExtend;
- bool abort, probs, save;
+ bool abort, probs, save, flip;
- int driver(linePair*, string, string, string);
+ int driver(linePair*, string, string, string, string);
void appendTaxFiles(string, string);
- int createProcesses(string, string, string);
+ int createProcesses(string, string, string, string);
string addUnclassifieds(string, int);
int MPIReadNamesFile(string);
#ifdef USE_MPI
- int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&);
+ int driverMPI(int, int, MPI_File&, MPI_File&, MPI_File&, MPI_File&, vector<unsigned long long>&);
#endif
};
string taxFName;
string tempTFName;
string filename;
- string search, taxonomyFileName, templateFileName, method;
+ string search, taxonomyFileName, templateFileName, method, accnos;
unsigned long long start;
unsigned long long end;
MothurOut* m;
float match, misMatch, gapOpen, gapExtend;
int count, kmerSize, threadID, cutoff, iters, numWanted;
- bool probs;
+ bool probs, flip;
classifyData(){}
- classifyData(bool p, string me, string te, string tx, string a, string r, string f, string se, int ks, int i, int numW, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int cut, int tid) {
+ classifyData(string acc, bool p, string me, string te, string tx, string a, string r, string f, string se, int ks, int i, int numW, MothurOut* mout, unsigned long long st, unsigned long long en, float ma, float misMa, float gapO, float gapE, int cut, int tid, bool fli) {
+ accnos = acc;
taxonomyFileName = tx;
templateFileName = te;
taxFName = a;
threadID = tid;
probs = p;
count = 0;
+ flip = fli;
}
};
ofstream outTaxSimple;
pDataArray->m->openOutputFile(pDataArray->tempTFName, outTaxSimple);
+ ofstream outAcc;
+ pDataArray->m->openOutputFile(pDataArray->accnos, outAcc);
+
ifstream inFASTA;
pDataArray->m->openInputFile(pDataArray->filename, inFASTA);
//make classify
Classify* myclassify;
- if(pDataArray->method == "bayesian"){ myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID); }
- else if(pDataArray->method == "knn"){ myclassify = new Knn(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID); }
+ if(pDataArray->method == "bayesian"){ myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip); }
+ else if(pDataArray->method == "knn"){ myclassify = new Knn(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->numWanted, pDataArray->threadID, pDataArray->flipThreshold); }
else {
pDataArray->m->mothurOut(pDataArray->search + " is not a valid method option. I will run the command using bayesian.");
pDataArray->m->mothurOutEndLine();
- myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID);
+ myclassify = new Bayesian(pDataArray->taxonomyFileName, pDataArray->templateFileName, pDataArray->search, pDataArray->kmerSize, pDataArray->cutoff, pDataArray->iters, pDataArray->threadID, pDataArray->flip);
}
if (pDataArray->m->control_pressed) { delete myclassify; return 0; }
if (pDataArray->m->control_pressed) { delete candidateSeq; return 0; }
- if (taxonomy != "bad seq") {
- //output confidence scores or not
- if (pDataArray->probs) {
- outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
- }else{
- outTax << candidateSeq->getName() << '\t' << myclassify->getSimpleTax() << endl;
- }
-
- outTaxSimple << candidateSeq->getName() << '\t' << myclassify->getSimpleTax() << endl;
+ if (taxonomy == "unknown;") { pDataArray->m->mothurOut("[WARNING]: " + candidateSeq->getName() + " could not be classified. You can use the remove.lineage command with taxon=unknown; to remove such sequences."); pDataArray->m->mothurOutEndLine(); }
+
+ //output confidence scores or not
+ if (pDataArray->probs) {
+ outTax << candidateSeq->getName() << '\t' << taxonomy << endl;
+ }else{
+ outTax << candidateSeq->getName() << '\t' << myclassify->getSimpleTax() << endl;
}
+
+ outTaxSimple << candidateSeq->getName() << '\t' << myclassify->getSimpleTax() << endl;
+
+ if (myclassify->getFlipped()) { outAcc << candidateSeq->getName() << endl; }
+
count++;
}
delete candidateSeq;
// ...at some point should added some additional type checking...
namefile = validParameter.validFile(parameters, "name", true);
if (namefile == "not found") { namefile = ""; }
- else if (namefile == "not open") { abort = true; }
+ else if (namefile == "not open") { namefile = ""; abort = true; }
else { readNameFile(); m->setNameFile(namefile); }
string temp;
temp = validParameter.validFile(parameters, "percent", false); if (temp == "not found"){ temp = "0"; }
m->mothurConvert(temp, percent);
+ if (namefile == "") {
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
+
}
}
else { distfile = fastafile; splitmethod = "fasta"; m->setFastaFile(fastafile); }
taxFile = validParameter.validFile(parameters, "taxonomy", true);
- if (taxFile == "not open") { abort = true; }
+ if (taxFile == "not open") { taxFile = ""; abort = true; }
else if (taxFile == "not found") { taxFile = ""; }
else { m->setTaxonomyFile(taxFile); }
}else { m->setFastaFile(fastafile); }
namefile = validParameter.validFile(parameters, "name", true);
- if (namefile == "not open") { abort = true; }
+ if (namefile == "not open") { namefile = ""; abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(fastafile); }
-
+
+ if (namefile == ""){
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
}
}
catch(exception& e) {
virtual void readKmerDB(ifstream&){};
virtual void setNumSeqs(int i) { numSeqs = i; }
virtual vector<int> getSequencesWithKmer(int){ vector<int> filler; return filler; };
+ virtual int getReversed(int) { return 0; }
virtual int getMaxKmer(){ return 1; }
protected:
}
oldNameMapFName = validParameter.validFile(parameters, "name", true);
- if (oldNameMapFName == "not open") { abort = true; }
+ if (oldNameMapFName == "not open") { oldNameMapFName = ""; abort = true; }
else if (oldNameMapFName == "not found"){ oldNameMapFName = ""; }
else { m->setNameFile(oldNameMapFName); }
+
+ if (oldNameMapFName == "") {
+ vector<string> files; files.push_back(inFastaName);
+ parser.getNameFile(files);
+ }
+
}
}
//**********************************************************************************************************************
vector<string> GetGroupsCommand::setParameters(){
try {
- CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
- CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT-sharedGroup", "none",false,false); parameters.push_back(pshared);
- CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT-sharedGroup", "none",false,false); parameters.push_back(pgroup);
- CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
- CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
+ CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(pfasta);
+ CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pshared);
+ CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pgroup("group", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pgroup);
+ CommandParameter plist("list", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(plist);
+ CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(ptaxonomy);
CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
else { m->setAccnosFile(accnosfile); }
fastafile = validParameter.validFile(parameters, "fasta", true);
- if (fastafile == "not open") { abort = true; }
+ if (fastafile == "not open") { fastafile = ""; abort = true; }
else if (fastafile == "not found") { fastafile = ""; }
else { m->setFastaFile(fastafile); }
namefile = validParameter.validFile(parameters, "name", true);
- if (namefile == "not open") { abort = true; }
+ if (namefile == "not open") { namefile = ""; abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
else { m->setListFile(listfile); }
taxfile = validParameter.validFile(parameters, "taxonomy", true);
- if (taxfile == "not open") { abort = true; }
+ if (taxfile == "not open") { taxfile = ""; abort = true; }
else if (taxfile == "not found") { taxfile = ""; }
else { m->setTaxonomyFile(taxfile); }
if ((fastafile == "") && (namefile == "") && (groupfile == "") && (sharedfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared or list."); m->mothurOutEndLine(); abort = true; }
if ((groupfile == "") && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != ""))) { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group file."); m->mothurOutEndLine(); abort = true; }
+ if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+ vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+ parser.getNameFile(files);
+ }
}
}
//check for required parameters
fastafile = validParameter.validFile(parameters, "fasta", true);
- if (fastafile == "not open") { abort = true; }
+ if (fastafile == "not open") { fastafile = ""; abort = true; }
else if (fastafile == "not found") { fastafile = ""; }
else { m->setFastaFile(fastafile); }
namefile = validParameter.validFile(parameters, "name", true);
- if (namefile == "not open") { abort = true; }
+ if (namefile == "not open") { namefile = ""; abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
else { m->setListFile(listfile); }
taxfile = validParameter.validFile(parameters, "taxonomy", true);
- if (taxfile == "not open") { abort = true; }
+ if (taxfile == "not open") { taxfile = ""; abort = true; }
else if (taxfile == "not found") {
taxfile = m->getTaxonomyFile();
if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
m->splitAtChar(taxons, listOfTaxons, '-');
if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
+
+ if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+ vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+ parser.getNameFile(files);
+ }
}
}
if (accnosfile2 == "not found") { accnosfile2 = ""; }
fastafile = validParameter.validFile(parameters, "fasta", true);
- if (fastafile == "not open") { abort = true; }
+ if (fastafile == "not open") { fastafile = ""; abort = true; }
else if (fastafile == "not found") { fastafile = ""; }
else { m->setFastaFile(fastafile); }
namefile = validParameter.validFile(parameters, "name", true);
- if (namefile == "not open") { abort = true; }
+ if (namefile == "not open") { namefile = ""; abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
else { m->setListFile(listfile); }
taxfile = validParameter.validFile(parameters, "taxonomy", true);
- if (taxfile == "not open") { abort = true; }
+ if (taxfile == "not open") { taxfile = ""; abort = true; }
else if (taxfile == "not found") { taxfile = ""; }
else { m->setTaxonomyFile(taxfile); }
dups = m->isTrue(temp);
if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "") && (accnosfile2 == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy, quality or listfile."); m->mothurOutEndLine(); abort = true; }
+
+ if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+ vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+ parser.getNameFile(files);
+ }
}
}
#define GROUPMAP_H
/*
* groupmap.h
- * Dotur
+ * Mothur
*
* Created by Sarah Westcott on 12/1/08.
* Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved.
}
return kmer;
}
+/**************************************************************************************************/
+
+int Kmer::getReverseKmerNumber(int kmerNumber){
+
+ string kmerString = getKmerBases(kmerNumber);
+
+ //get Reverse
+ string reverse = "";
+ for(int i=kmerString.length()-1;i>=0;i--){
+ if(kmerString[i] == 'A') { reverse += 'T'; }
+ else if(kmerString[i] == 'T'){ reverse += 'A'; }
+ else if(kmerString[i] == 'G'){ reverse += 'C'; }
+ else if(kmerString[i] == 'C'){ reverse += 'G'; }
+ else { reverse += 'N'; }
+ }
+
+ int reverseNumber = getKmerNumber(reverse, 0);
+
+ return reverseNumber;
+
+}
/**************************************************************************************************/
string getKmerString(string);
int getKmerNumber(string, int);
string getKmerBases(int);
+ int getReverseKmerNumber(int);
vector< map<int, int> > getKmerCounts(string sequence); //for use in chimeraCheck
private:
vector<int> KmerDB::findClosestSequences(Sequence* candidateSeq, int num){
try {
+ if (num > numSeqs) { m->mothurOut("[WARNING]: you requested " + toString(num) + " closest sequences, but the template only contains " + toString(numSeqs) + ", adjusting."); m->mothurOutEndLine(); num = numSeqs; }
+
vector<int> topMatches;
Kmer kmer(kmerSize);
searchScore = 0;
}
}
/**************************************************************************************************/
+int KmerDB::getReversed(int kmerNumber) {
+ try {
+ Kmer kmer(kmerSize);
+
+ if (kmerNumber < 0) { return 0; } //if user gives negative number
+ else if (kmerNumber > maxKmer) { return 0; } //or a kmer that is bigger than maxkmer
+ else { return kmer.getReverseKmerNumber(kmerNumber); } // kmer is in vector range
+ }
+ catch(exception& e) {
+ m->errorOut(e, "KmerDB", "getReversed");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
vector<int> KmerDB::getSequencesWithKmer(int kmer) {
try {
void readKmerDB(ifstream&);
int getCount(int); //returns number of sequences with that kmer number
vector<int> getSequencesWithKmer(int); //returns vector of sequences that contain kmer passed in
+ int getReversed(int); //returns reverse compliment kmerNumber
int getMaxKmer() { return maxKmer; }
private:
}
if (closestNames.size() == 0) {
- m->mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. " + seq->getName() + " will be disregarded."); m->mothurOutEndLine();
- tax = "bad seq";
+ m->mothurOut("Error: All the matches for sequence " + seq->getName() + " have been eliminated. "); m->mothurOutEndLine();
+ tax = "unknown;";
}else{
tax = findCommonTaxonomy(closestNames);
- if (tax == "") { m->mothurOut("There are no common levels for sequence " + seq->getName() + ". " + seq->getName() + " will be disregarded."); m->mothurOutEndLine(); tax = "bad seq"; }
+ if (tax == "") { m->mothurOut("There are no common levels for sequence " + seq->getName() + ". "); m->mothurOutEndLine(); tax = "unknown;"; }
}
simpleTax = tax;
/**************************************************************************************************/
string Knn::findCommonTaxonomy(vector<string> closest) {
try {
- vector< vector<string> > taxons; //taxon[0] = vector of taxonomy info for closest[0].
+ /*vector< vector<string> > taxons; //taxon[0] = vector of taxonomy info for closest[0].
//so if closest[0] taxonomy is Bacteria;Alphaproteobacteria;Rhizobiales;Azorhizobium_et_rel.;Methylobacterium_et_rel.;Bosea;
//taxon[0][0] = Bacteria, taxon[0][1] = Alphaproteobacteria....
if (m->control_pressed) { return "control"; }
string tax = taxonomy[closest[i]]; //we know its there since we checked in getTaxonomy
+ cout << tax << endl;
taxons[i] = parseTax(tax);
}
break;
}
+ }*/
+
+ string conTax;
+
+ //create a tree containing sequences from this bin
+ PhyloTree* p = new PhyloTree();
+
+ for (int i = 0; i < closest.size(); i++) {
+ p->addSeqToTree(closest[i], taxonomy[closest[i]]);
}
-
- return common;
+
+ //build tree
+ p->assignHeirarchyIDs(0);
+
+ TaxNode currentNode = p->get(0);
+
+ //at each level
+ while (currentNode.children.size() != 0) { //you still have more to explore
+
+ TaxNode bestChild;
+ int bestChildSize = 0;
+
+ //go through children
+ for (map<string, int>::iterator itChild = currentNode.children.begin(); itChild != currentNode.children.end(); itChild++) {
+
+ TaxNode temp = p->get(itChild->second);
+
+ //select child with largest accessions - most seqs assigned to it
+ if (temp.accessions.size() > bestChildSize) {
+ bestChild = p->get(itChild->second);
+ bestChildSize = temp.accessions.size();
+ }
+
+ }
+
+ if (bestChildSize == closest.size()) { //if yes, add it
+ conTax += bestChild.name + ";";
+ }else{ //if no, quit
+ break;
+ }
+
+ //move down a level
+ currentNode = bestChild;
+ }
+
+ delete p;
+
+ return conTax;
}
catch(exception& e) {
m->errorOut(e, "Knn", "findCommonTaxonomy");
#if you using cygwin to build Windows the following line
#CXX = x86_64-w64-mingw32-g++
#CC = x86_64-w64-mingw32-g++
+ #FORTAN_COMPILER = x86_64-w64-mingw32-gfortran
#TARGET_ARCH += -m64 -static
#if you are a linux user use the following line
int nonZeroA = 0;
int nonZeroB = 0;
int totalOtus = shared[0]->getNumBins();
- int totalGroups = shared.size();
+ //int totalGroups = shared.size();
//for each otu
for (int i = 0; i < shared[0]->getNumBins(); i++) {
//**********************************************************************************************************************
vector<string> MergeGroupsCommand::setParameters(){
try {
- CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
+ CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pshared);
+ CommandParameter pgroup("group", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pgroup);
CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pdesign);
CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
string MergeGroupsCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The merge.groups command reads a shared file and a design file and merges the groups in the shared file that are in the same grouping in the design file.\n";
+ helpString += "The merge.groups command reads a shared or group file and a design file and merges the groups that are in the same grouping in the design file.\n";
helpString += "The merge.groups command outputs a .shared file. \n";
- helpString += "The merge.groups command parameters are shared, groups, label and design. The design and shared parameter are required.\n";
+ helpString += "The merge.groups command parameters are shared, group, groups, label and design. The design parameter is required.\n";
helpString += "The design parameter allows you to assign your groups to sets. It is required. \n";
helpString += "The design file looks like the group file. It is a 2 column tab delimited file, where the first column is the group name and the second column is the set the group belongs to.\n";
- helpString += "The groups parameter allows you to specify which of the groups in your shared you would like included. The group names are separated by dashes.\n";
+ helpString += "The groups parameter allows you to specify which of the groups in your shared or group file you would like included. The group names are separated by dashes.\n";
helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
helpString += "The merge.groups command should be in the following format: merge.groups(design=yourDesignFile, shared=yourSharedFile).\n";
helpString += "Example merge.groups(design=temp.design, groups=A-B-C, shared=temp.shared).\n";
setParameters();
vector<string> tempOutNames;
outputTypes["shared"] = tempOutNames;
+ outputTypes["group"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "MergeGroupsCommand", "MetaStatsCommand");
//initialize outputTypes
vector<string> tempOutNames;
outputTypes["shared"] = tempOutNames;
+ outputTypes["group"] = tempOutNames;
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["shared"] = inputDir + it->second; }
}
+
+ it = parameters.find("group");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["group"] = inputDir + it->second; }
+ }
+
}
//check for required parameters
else { m->mothurOut("You have no current designfile and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
}else { m->setDesignFile(designfile); }
- //make sure the user has already run the read.otu command
sharedfile = validParameter.validFile(parameters, "shared", true);
- if (sharedfile == "not open") { abort = true; }
- else if (sharedfile == "not found") {
- //if there is a current shared file, use it
- sharedfile = m->getSharedFile();
- if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
- else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
- }else { m->setSharedFile(sharedfile); }
+ if (sharedfile == "not open") { abort = true; sharedfile = ""; }
+ else if (sharedfile == "not found") { sharedfile = ""; }
+ else { m->setSharedFile(sharedfile); }
+
+ groupfile = validParameter.validFile(parameters, "group", true);
+ if (groupfile == "not open") { abort = true; groupfile = ""; }
+ else if (groupfile == "not found") { groupfile = ""; }
+ else { m->setGroupFile(groupfile); }
//check for optional parameter and set defaults
// ...at some point should added some additional type checking...
if (groups == "not found") { groups = "all"; }
m->splitAtDash(groups, Groups);
m->setGroups(Groups);
+
+ if ((sharedfile == "") && (groupfile == "")) {
+ //give priority to group, then shared
+ groupfile = m->getGroupFile();
+ if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
+ else {
+ sharedfile = m->getSharedFile();
+ if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You have no current groupfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
+ }
+ }
+ }
}
}
try {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ designMap = new GroupMap(designfile);
+ designMap->readDesignMap();
+
+ if (groupfile != "") { processGroupFile(designMap); }
+ if (sharedfile != "") { processSharedFile(designMap); }
+
+ //reset groups parameter
+ m->clearGroups();
+ delete designMap;
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0;}
+
+
+ //set shared file as new current sharedfile
+ string current = "";
+ itTypes = outputTypes.find("shared");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
+ }
+
+ itTypes = outputTypes.find("group");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
+ }
+
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MergeGroupsCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int MergeGroupsCommand::process(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
+ try {
+
+ map<string, SharedRAbundVector> merged;
+ map<string, SharedRAbundVector>::iterator it;
+
+ for (int i = 0; i < thisLookUp.size(); i++) {
+
+ if (m->control_pressed) { return 0; }
+
+ //what grouping does this group belong to
+ string grouping = designMap->getGroup(thisLookUp[i]->getGroup());
+ if (grouping == "not found") { m->mothurOut("[ERROR]: " + thisLookUp[i]->getGroup() + " is not in your design file. Ignoring!"); m->mothurOutEndLine(); grouping = "NOTFOUND"; }
+
+ else {
+ //do we already have a member of this grouping?
+ it = merged.find(grouping);
+
+ if (it == merged.end()) { //nope, so create it
+ merged[grouping] = *thisLookUp[i];
+ merged[grouping].setGroup(grouping);
+ }else { //yes, merge it
+
+ for (int j = 0; j < thisLookUp[i]->getNumBins(); j++) {
+ int abund = (it->second).getAbundance(j);
+ abund += thisLookUp[i]->getAbundance(j);
+
+ (it->second).set(j, abund, grouping);
+ }
+ }
+ }
+ }
+
+ //print new file
+ for (it = merged.begin(); it != merged.end(); it++) {
+ out << (it->second).getLabel() << '\t' << it->first << '\t';
+ (it->second).print(out);
+ }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MergeGroupsCommand", "process");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int MergeGroupsCommand::processSharedFile(GroupMap*& designMap){
+ try {
- if (outputDir == "") { outputDir += m->hasPath(sharedfile); }
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + "merge" + m->getExtension(sharedfile);
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); }
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + "merge" + m->getExtension(sharedfile);
outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
ofstream out;
m->openOutputFile(outputFileName, out);
- designMap = new GroupMap(designfile);
- designMap->readDesignMap();
-
InputData input(sharedfile, "sharedfile");
lookup = input.getSharedRAbundVectors();
string lastLabel = lookup[0]->getLabel();
}
out.close();
- //reset groups parameter
- m->clearGroups();
- delete designMap;
-
- if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0;}
-
-
- //set shared file as new current sharedfile
- string current = "";
- itTypes = outputTypes.find("shared");
- if (itTypes != outputTypes.end()) {
- if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
- }
-
- m->mothurOutEndLine();
- m->mothurOut("Output File Names: "); m->mothurOutEndLine();
- for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
- m->mothurOutEndLine();
+
return 0;
+
}
catch(exception& e) {
- m->errorOut(e, "MergeGroupsCommand", "execute");
+ m->errorOut(e, "MergeGroupsCommand", "processSharedFile");
exit(1);
}
}
//**********************************************************************************************************************
-int MergeGroupsCommand::process(vector<SharedRAbundVector*>& thisLookUp, ofstream& out){
+int MergeGroupsCommand::processGroupFile(GroupMap*& designMap){
try {
- map<string, SharedRAbundVector> merged;
- map<string, SharedRAbundVector>::iterator it;
+ string thisOutputDir = outputDir;
+ if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
+ string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "merge" + m->getExtension(groupfile);
+ outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
- for (int i = 0; i < thisLookUp.size(); i++) {
+ ofstream out;
+ m->openOutputFile(outputFileName, out);
+
+ //read groupfile
+ GroupMap groupMap(groupfile);
+ groupMap.readMap();
+
+ //fill Groups - checks for "all" and for any typo groups
+ SharedUtil* util = new SharedUtil();
+ vector<string> nameGroups = groupMap.getNamesOfGroups();
+ util->setGroups(Groups, nameGroups);
+ delete util;
+
+ vector<string> namesOfSeqs = groupMap.getNamesSeqs();
+ bool error = false;
+
+ for (int i = 0; i < namesOfSeqs.size(); i++) {
- if (m->control_pressed) { return 0; }
+ if (m->control_pressed) { break; }
- //what grouping does this group belong to
- string grouping = designMap->getGroup(thisLookUp[i]->getGroup());
- if (grouping == "not found") { m->mothurOut("[ERROR]: " + thisLookUp[i]->getGroup() + " is not in your design file. Ignoring!"); m->mothurOutEndLine(); grouping = "NOTFOUND"; }
+ string thisGroup = groupMap.getGroup(namesOfSeqs[i]);
- else {
- //do we already have a member of this grouping?
- it = merged.find(grouping);
+ //are you in a group the user wants
+ if (m->inUsersGroups(thisGroup, Groups)) {
+ string thisGrouping = designMap->getGroup(thisGroup);
- if (it == merged.end()) { //nope, so create it
- merged[grouping] = *thisLookUp[i];
- merged[grouping].setGroup(grouping);
- }else { //yes, merge it
-
- for (int j = 0; j < thisLookUp[i]->getNumBins(); j++) {
- int abund = (it->second).getAbundance(j);
- abund += thisLookUp[i]->getAbundance(j);
-
- (it->second).set(j, abund, grouping);
- }
+ if (thisGrouping == "not found") { m->mothurOut("[ERROR]: " + namesOfSeqs[i] + " is from group " + thisGroup + " which is not in your design file, please correct."); m->mothurOutEndLine(); error = true; }
+ else {
+ out << namesOfSeqs[i] << '\t' << thisGrouping << endl;
}
}
}
- //print new file
- for (it = merged.begin(); it != merged.end(); it++) {
- out << (it->second).getLabel() << '\t' << it->first << '\t';
- (it->second).print(out);
- }
+ if (error) { m->control_pressed = true; }
+
+ out.close();
return 0;
}
catch(exception& e) {
- m->errorOut(e, "MergeGroupsCommand", "process");
+ m->errorOut(e, "MergeGroupsCommand", "processGroupFile");
exit(1);
}
}
bool abort, allLines, pickedGroups;
set<string> labels; //holds labels to be used
- string groups, label, outputDir, inputDir, designfile, sharedfile;
+ string groups, label, outputDir, inputDir, designfile, sharedfile, groupfile;
vector<string> Groups, outputNames;
int process(vector<SharedRAbundVector*>&, ofstream&);
+ int processSharedFile(GroupMap*&);
+ int processGroupFile(GroupMap*&);
};
#endif
IntNode* right;
};
+struct diffPair {
+ float prob;
+ float reverseProb;
+
+ diffPair() {
+ prob = 0; reverseProb = 0;
+ }
+ diffPair(float p, float rp) {
+ prob = p;
+ reverseProb = rp;
+ }
+};
+
/************************************************************/
struct clusterNode {
int numSeq;
int pos2 = taxon.find_last_of(')');
if (pos2 != -1) {
string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
- if (isContainingOnlyDigits(confidenceScore)) {
+ if (isNumeric1(confidenceScore)) {
taxon = taxon.substr(0, pos); //rip off confidence
}
}
}
}else{ it++; }
}
-
+
return parameters;
}
catch(exception& e) {
}
}
+/***********************************************************************/
+//pass a vector of filenames that may match the current namefile.
+//this function will look at each one, if the rootnames match, mothur will warn
+//the user that they may have neglected to provide a namefile.
+//stops when it finds a match.
+bool OptionParser::getNameFile(vector<string> files) {
+ try {
+ string namefile = m->getNameFile();
+ bool match = false;
+
+ if (namefile != "") {
+ string temp = m->getRootName(m->getSimpleName(namefile));
+ vector<string> rootName;
+ m->splitAtChar(temp, rootName, '.');
+
+ for (int i = 0; i < files.size(); i++) {
+ temp = m->getRootName(m->getSimpleName(files[i]));
+ vector<string> root;
+ m->splitAtChar(temp, root, '.');
+
+ int smallest = rootName.size();
+ if (root.size() < smallest) { smallest = root.size(); }
+
+ int numMatches = 0;
+ for(int j = 0; j < smallest; j++) {
+ if (root[j] == rootName[j]) { numMatches++; }
+ }
+
+ if (smallest > 0) {
+ if ((numMatches >= (smallest-2)) && (root[0] == rootName[0])) {
+ m->mothurOut("[WARNING]: This command can take a namefile and you did not provide one. The current namefile is " + namefile + " which seems to match " + files[i] + ".");
+ m->mothurOutEndLine();
+ match = true;
+ break;
+ }
+ }
+ }
+
+ }
+
+
+ return match;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "OptionParser", "getNameFile");
+ exit(1);
+ }
+}
+
+
/***********************************************************************/
#include "mothur.h"
#include "mothurout.h"
-
+#include "command.hpp"
/***********************************************************************/
OptionParser(string);
~OptionParser() {}
map<string, string> getParameters();
+ bool getNameFile(vector<string>);
private:
map<string, string> parameters;
MothurOut* m;
vector<string> ParseFastaQCommand::setParameters(){
try {
CommandParameter pfastq("fastq", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfastq);
+ CommandParameter pfasta("fasta", "Bool", "", "T", "", "", "",false,false); parameters.push_back(pfasta);
+ CommandParameter pqual("qfile", "Bool", "", "T", "", "", "",false,false); parameters.push_back(pqual);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(fastaQFile); }
+
+ string temp;
+ temp = validParameter.validFile(parameters, "fasta", false); if(temp == "not found"){ temp = "T"; }
+ fasta = m->isTrue(temp);
+
+ temp = validParameter.validFile(parameters, "qfile", false); if(temp == "not found"){ temp = "T"; }
+ qual = m->isTrue(temp);
+
+ if ((!fasta) && (!qual)) { m->mothurOut("[ERROR]: no outputs selected. Aborting."); m->mothurOutEndLine(); abort=true; }
}
}
string fastaFile = outputDir + m->getRootName(m->getSimpleName(fastaQFile)) + "fasta";
string qualFile = outputDir + m->getRootName(m->getSimpleName(fastaQFile)) + "qual";
ofstream outFasta, outQual;
- m->openOutputFile(fastaFile, outFasta); outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile);
- m->openOutputFile(qualFile, outQual); outputNames.push_back(qualFile); outputTypes["qfile"].push_back(qualFile);
+
+ if (fasta) { m->openOutputFile(fastaFile, outFasta); outputNames.push_back(fastaFile); outputTypes["fasta"].push_back(fastaFile); }
+ if (qual) { m->openOutputFile(qualFile, outQual); outputNames.push_back(qualFile); outputTypes["qfile"].push_back(qualFile); }
ifstream in;
m->openInputFile(fastaQFile, in);
while (!in.eof()) {
+
+ if (m->control_pressed) { break; }
//read sequence name
string name = m->getline(in); m->gobble(in);
else { name2 = name2.substr(1); }
//read quality scores
- string qual = m->getline(in); m->gobble(in);
- if (qual == "") { m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; break; }
+ string quality = m->getline(in); m->gobble(in);
+ if (quality == "") { m->mothurOut("[ERROR]: missing quality for " + name2); m->mothurOutEndLine(); m->control_pressed = true; break; }
//sanity check sequence length and number of quality scores match
if (name2 != "") { if (name != name2) { m->mothurOut("[ERROR]: names do not match. read " + name + " for fasta and " + name2 + " for quality."); m->mothurOutEndLine(); m->control_pressed = true; break; } }
- if (qual.length() != sequence.length()) { m->mothurOut("[ERROR]: lengths do not match. read " + toString(sequence.length()) + " characters for fasta and " + toString(qual.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; break; }
-
- //convert quality scores
- vector<int> qualScores = convertQual(qual);
+ if (quality.length() != sequence.length()) { m->mothurOut("[ERROR]: lengths do not match. read " + toString(sequence.length()) + " characters for fasta and " + toString(quality.length()) + " characters for quality scores."); m->mothurOutEndLine(); m->control_pressed = true; break; }
//print sequence info to files
- outFasta << ">" << name << endl << sequence << endl;
+ if (fasta) { outFasta << ">" << name << endl << sequence << endl; }
- outQual << ">" << name << endl;
- for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
- outQual << endl;
+ if (qual) {
+ vector<int> qualScores = convertQual(quality);
+ outQual << ">" << name << endl;
+ for (int i = 0; i < qualScores.size(); i++) { outQual << qualScores[i] << " "; }
+ outQual << endl;
+ }
}
in.close();
- outFasta.close();
- outQual.close();
+ if (fasta) { outFasta.close(); }
+ if (qual) { outQual.close(); }
if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(fastaFile); m->mothurRemove(qualFile); return 0; }
vector<string> outputNames;
string outputDir, fastaQFile;
- bool abort;
+ bool abort, fasta, qual;
vector<int> convertQual(string);
};
if (randomtree == "") {
//check for required parameters
treefile = validParameter.validFile(parameters, "tree", true);
- if (treefile == "not open") { abort = true; }
+ if (treefile == "not open") { treefile = ""; abort = true; }
else if (treefile == "not found") { //if there is a current design file, use it
treefile = m->getTreeFile();
if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
else { m->setGroupFile(groupfile); }
namefile = validParameter.validFile(parameters, "name", true);
- if (namefile == "not open") { abort = true; }
+ if (namefile == "not open") { namefile = ""; abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
}
m->setProcessors(temp);
m->mothurConvert(temp, processors);
+ if (namefile == "") {
+ vector<string> files; files.push_back(treefile);
+ parser.getNameFile(files);
+ }
+
}
}
//check for required parameters
treefile = validParameter.validFile(parameters, "tree", true);
- if (treefile == "not open") { abort = true; }
+ if (treefile == "not open") { treefile = ""; abort = true; }
else if (treefile == "not found") {
//if there is a current design file, use it
treefile = m->getTreeFile();
else { m->setGroupFile(groupfile); }
namefile = validParameter.validFile(parameters, "name", true);
- if (namefile == "not open") { abort = true; }
+ if (namefile == "not open") { namefile = ""; abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
}
if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; }
+
+ if (namefile == "") {
+ vector<string> files; files.push_back(treefile);
+ parser.getNameFile(files);
+ }
}
}
int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
try {
+
numSeqs++;
map<string, int>::iterator childPointer;
tree[0].heirarchyID = "0";
maxLevel = 0;
calcTotals = true;
+ addSeqToTree("unknown", "unknown;");
}
catch(exception& e) {
m->errorOut(e, "PhyloTree", "PhyloTree");
maxLevel = 0;
calcTotals = true;
string name, tax;
+ addSeqToTree("unknown", "unknown;");
#ifdef USE_MPI
int PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
try {
-
numSeqs++;
map<string, int>::iterator childPointer;
map<string, int>::iterator childPointer;
vector<TaxNode> copy = tree;
-
+
//fill out tree
fillOutTree(0, copy);
void PhyloTree::print(ofstream& out, vector<TaxNode>& copy){
try {
-
+
//output mothur version
out << "#" << m->getVersion() << endl;
out << copy.size() << endl;
out << maxLevel << endl;
-
+
for (int i = 0; i < copy.size(); i++) {
-
+
out << copy[i].level << '\t'<< copy[i].name << '\t' << copy[i].children.size() << '\t';
map<string,int>::iterator it;
try {
bool okay = true;
+ templateFileNames.push_back("unknown");
map<string, int>::iterator itFind;
map<string, int> taxonomyFileNames = name2Taxonomy;
m->mothurOut("No valid current files. taxonomy is a required parameter."); m->mothurOutEndLine();
abort = true;
}
- }else if (taxonomyFileName == "not open") { abort = true; }
+ }else if (taxonomyFileName == "not open") { taxonomyFileName = ""; abort = true; }
else { m->setTaxonomyFile(taxonomyFileName); }
namefile = validParameter.validFile(parameters, "name", true);
- if (namefile == "not open") { abort = true; }
+ if (namefile == "not open") { namefile = ""; abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { readNamesFile(); m->setNameFile(namefile); }
else { allLines = 1; }
}
+ if (namefile == "") {
+ vector<string> files; files.push_back(taxonomyFileName);
+ parser.getNameFile(files);
+ }
+
}
}
catch(exception& e) {
// ...at some point should added some additional type checking...
namefile = validParameter.validFile(parameters, "name", true);
if (namefile == "not found") { namefile = ""; }
- else if (namefile == "not open") { abort = true; }
+ else if (namefile == "not open") { namefile = ""; abort = true; }
else { m->setNameFile(namefile); }
groupfile = validParameter.validFile(parameters, "group", true);
m->setProcessors(temp);
m->mothurConvert(temp, processors);
-
+ if (namefile == "") {
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
}
}
setSavedReference("");
for(int i = 0; i < wordGenusProb.size(); i++) { wordGenusProb[i].clear(); }
wordGenusProb.clear();
+ WordPairDiffArr.clear();
setSavedTaxonomy("");
}
/*******************************************************
bool save;
vector<Sequence> referenceSeqs;
vector< vector<float> > wordGenusProb;
+ vector<diffPair> WordPairDiffArr;
string getSavedReference() { return referencefile; }
void setSavedReference(string p) { referencefile = p; }
//**********************************************************************************************************************
vector<string> RemoveGroupsCommand::setParameters(){
try {
- CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
- CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT-sharedGroup", "none",false,false); parameters.push_back(pshared);
- CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT-sharedGroup", "none",false,false); parameters.push_back(pgroup);
- CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
- CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
+ CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(pfasta);
+ CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pshared);
+ CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pgroup("group", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pgroup);
+ CommandParameter plist("list", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(plist);
+ CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(ptaxonomy);
CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
if ((fastafile == "") && (namefile == "") && (groupfile == "") && (sharedfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared or list."); m->mothurOutEndLine(); abort = true; }
if ((groupfile == "") && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != ""))) { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group file."); m->mothurOutEndLine(); abort = true; }
+
+ if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+ vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+ parser.getNameFile(files);
+ }
+
}
}
//check for required parameters
fastafile = validParameter.validFile(parameters, "fasta", true);
- if (fastafile == "not open") { abort = true; }
+ if (fastafile == "not open") { fastafile = ""; abort = true; }
else if (fastafile == "not found") { fastafile = ""; }
else { m->setFastaFile(fastafile); }
namefile = validParameter.validFile(parameters, "name", true);
- if (namefile == "not open") { abort = true; }
+ if (namefile == "not open") { namefile = ""; abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
else { m->setListFile(listfile); }
taxfile = validParameter.validFile(parameters, "taxonomy", true);
- if (taxfile == "not open") { abort = true; }
+ if (taxfile == "not open") { taxfile = ""; abort = true; }
else if (taxfile == "not found") {
taxfile = m->getTaxonomyFile();
if (taxfile != "") { m->mothurOut("Using " + taxfile + " as input file for the taxonomy parameter."); m->mothurOutEndLine(); }
if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide one of the following: fasta, name, group, alignreport, taxonomy or listfile."); m->mothurOutEndLine(); abort = true; }
if ((usedDups != "") && (namefile == "")) { m->mothurOut("You may only use dups with the name option."); m->mothurOutEndLine(); abort = true; }
-
+
+ if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
+ vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
+ parser.getNameFile(files);
+ }
+
}
}
}else { m->setAccnosFile(accnosfile); }
fastafile = validParameter.validFile(parameters, "fasta", true);
- if (fastafile == "not open") { abort = true; }
+ if (fastafile == "not open") { fastafile = ""; abort = true; }
else if (fastafile == "not found") { fastafile = ""; }
else { m->setFastaFile(fastafile); }
namefile = validParameter.validFile(parameters, "name", true);
- if (namefile == "not open") { abort = true; }
+ if (namefile == "not open") { namefile = ""; abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
if ((fastafile == "") && (namefile == "") && (groupfile == "") && (alignfile == "") && (listfile == "") && (taxfile == "") && (qualfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, group, taxonomy, quality, alignreport or list."); m->mothurOutEndLine(); abort = true; }
+ if ((fastafile != "") && (namefile == "")) {
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
}
}
temp = validParameter.validFile(parameters, "criteria", false); if (temp == "not found"){ temp = "90"; }
m->mothurConvert(temp, criteria);
+
+ if (namefile == "") {
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
}
}
else if (mapfile == "not found") { mapfile = ""; m->mothurOut("You must provide an map file."); m->mothurOutEndLine(); abort = true; }
fastafile = validParameter.validFile(parameters, "fasta", true);
- if (fastafile == "not open") { abort = true; }
+ if (fastafile == "not open") { fastafile = ""; abort = true; }
else if (fastafile == "not found") {
fastafile = m->getFastaFile();
if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
outputDir = "";
outputDir += m->hasPath(fastafile); //if user entered a file with a path then preserve it
}
-
+
+ if ((namefile == "") && (fastafile != "")){
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
}
}
if (queryFileName != "") { m->mothurOut("Using " + queryFileName + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
else { m->mothurOut("You have no current fasta file and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
}
- else if (queryFileName == "not open") { abort = true; }
+ else if (queryFileName == "not open") { queryFileName = ""; abort = true; }
else { m->setFastaFile(queryFileName); }
referenceFileName = validParameter.validFile(parameters, "reference", true);
substitutionMatrix.resize(6);
for(int i=0;i<6;i++){ substitutionMatrix[i].resize(6,0); }
+
+ if ((namesFileName == "") && (queryFileName != "")){
+ vector<string> files; files.push_back(queryFileName);
+ parser.getNameFile(files);
+ }
}
}
catch(exception& e) {
string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
m->mothurConvert(temp, processors);
-
-
+
+ if (namefile == "") {
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
+
}
}
catch(exception& e) {
bool Sequence::getIsAligned(){
return isAligned;
}
-
//********************************************************************************************************************
void Sequence::reverseComplement(){
if (lastChar != "\\") { tempdefault += "\\"; }
#endif
- m->mothurOut("tempDefault=" + tempdefault); m->mothurOutEndLine();
- m->setDefaultPath(tempdefault);
+ //test to make sure directory exists
+ tempdefault = m->getFullPathName(tempdefault);
+ string inTemp = tempdefault + tag + "temp";
+ ofstream in;
+ in.open(inTemp.c_str(), ios::trunc);
+ if(!in) {
+ m->mothurOut(tempdefault + " directory does not exist or is not writable."); m->mothurOutEndLine();
+ }else{
+ in.close();
+ m->mothurRemove(inTemp);
+ m->mothurOut("tempDefault=" + tempdefault); m->mothurOutEndLine();
+ m->setDefaultPath(tempdefault);
+ }
}
return 0;
try {
CommandParameter pflow("flow", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pflow);
CommandParameter pfile("file", "InputTypes", "", "", "none", "fileflow", "none",false,false); parameters.push_back(pfile);
- CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plookup);
+ CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plookup);
CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "",false,false); parameters.push_back(pcutoff);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
CommandParameter pmaxiter("maxiter", "Number", "", "1000", "", "", "",false,false); parameters.push_back(pmaxiter);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
- string qualityFileName = thisOutputDir + flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.qual";
+ string qualityFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.qual";
ofstream qualityFile;
m->openOutputFile(qualityFileName, qualityFile);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
- string fastaFileName = thisOutputDir + flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.fasta";
+ string fastaFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.fasta";
ofstream fastaFile;
m->openOutputFile(fastaFileName, fastaFile);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
- string nameFileName = thisOutputDir + flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.names";
+ string nameFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.names";
ofstream nameFile;
m->openOutputFile(nameFileName, nameFile);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
- string fileRoot = thisOutputDir + flowFileName.substr(0,flowFileName.find_last_of('.'));
+ string fileRoot = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName));
string groupFileName = fileRoot + ".shhh.groups";
ofstream groupFile;
m->openOutputFile(groupFileName, groupFile);
try {
string thisOutputDir = outputDir;
if (outputDir == "") { thisOutputDir += m->hasPath(flowFileName); }
- string otuCountsFileName = thisOutputDir + flowFileName.substr(0,flowFileName.find_last_of('.')) + ".shhh.counts";
+ string otuCountsFileName = thisOutputDir + m->getRootName(m->getSimpleName(flowFileName)) + ".shhh.counts";
ofstream otuCountsFile;
m->openOutputFile(otuCountsFileName, otuCountsFile);
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
m->mothurConvert(temp, processors);
+
+ if (namefile == "") {
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
}
}
catch(exception& e) {
m->mothurConvert(temp, cutoff);
if (cutoff == 0) { m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
-
}
}
namefile = validParameter.validFile(parameters, "name", true);
- if (namefile == "not open") { abort = true; }
+ if (namefile == "not open") { namefile = ""; abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(groupfile); }
+
+ if (namefile == "") {
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
}
}
if ((groupfile != "") && ((fastafile != "") && (listfile != ""))) {
m->mothurOut("A new group file can only be made from the subsample of a listfile or fastafile, not both. Please correct."); m->mothurOutEndLine(); abort = true; }
+ if ((fastafile != "") && (namefile == "")) {
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
}
}
//check for required parameters
qualfile = validParameter.validFile(parameters, "qfile", true);
- if (qualfile == "not open") { abort = true; }
+ if (qualfile == "not open") { qualfile = ""; abort = true; }
else if (qualfile == "not found") {
qualfile = m->getQualFile();
if (qualfile != "") { m->mothurOut("Using " + qualfile + " as input file for the qfile parameter."); m->mothurOutEndLine(); }
string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
- m->mothurConvert(temp, processors);
+ m->mothurConvert(temp, processors);
+
+ if (namefile == "") {
+ vector<string> files; files.push_back(qualfile);
+ parser.getNameFile(files);
+ }
}
}
catch(exception& e) {
outputDir = "";
outputDir += m->hasPath(taxfile); //if user entered a file with a path then preserve it
}
+
+ if (namefile == "") {
+ vector<string> files; files.push_back(taxfile);
+ parser.getNameFile(files);
+ }
+
}
}
catch(exception& e) {
m->mothurOut("You didn't set any options... quiting command."); m->mothurOutEndLine();
abort = true;
}
+
+ if (nameFile == "") {
+ vector<string> files; files.push_back(fastaFile);
+ parser.getNameFile(files);
+ }
}
}
else { m->setGroupFile(groupfile); }
namefile = validParameter.validFile(parameters, "name", true);
- if (namefile == "not open") { abort = true; }
+ if (namefile == "not open") { namefile = ""; abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
m->splitAtDash(groups, Groups);
m->setGroups(Groups);
}
+
+ if (namefile == "") {
+ vector<string> files; files.push_back(treefile);
+ parser.getNameFile(files);
+ }
}
}
//check for required parameters
treefile = validParameter.validFile(parameters, "tree", true);
- if (treefile == "not open") { abort = true; }
+ if (treefile == "not open") { treefile = ""; abort = true; }
else if (treefile == "not found") { //if there is a current design file, use it
treefile = m->getTreeFile();
if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
else { m->setGroupFile(groupfile); }
namefile = validParameter.validFile(parameters, "name", true);
- if (namefile == "not open") { abort = true; }
+ if (namefile == "not open") { namefile = ""; abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
m->mothurConvert(temp, processors);
if (!random) { iters = 0; } //turn off random calcs
+
+ if (namefile == "") {
+ vector<string> files; files.push_back(treefile);
+ parser.getNameFile(files);
+ }
}