From f6a58db15cdc7b90a601f8bf9c9d3b69d642f85d Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Tue, 4 Feb 2014 12:49:53 -0500 Subject: [PATCH] added relabund parameter to classify.seqs and summary.tax commands --- classifyotucommand.cpp | 16 +++---- classifyseqscommand.cpp | 11 +++-- classifyseqscommand.h | 2 +- phylosummary.cpp | 98 ++++++++++++++++++++++++++++------------- phylosummary.h | 10 ++--- summarytaxcommand.cpp | 16 ++++--- summarytaxcommand.h | 2 +- 7 files changed, 101 insertions(+), 54 deletions(-) diff --git a/classifyotucommand.cpp b/classifyotucommand.cpp index 160928f..f4a551c 100644 --- a/classifyotucommand.cpp +++ b/classifyotucommand.cpp @@ -544,11 +544,11 @@ int ClassifyOtuCommand::process(ListVector* processList) { PhyloSummary* taxaSum; if (countfile != "") { - if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, ct); } - else { taxaSum = new PhyloSummary(ct); } + if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, ct,false); } + else { taxaSum = new PhyloSummary(ct,false); } }else { - if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, groupMap); } - else { taxaSum = new PhyloSummary(groupMap); } + if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, groupMap,false); } + else { taxaSum = new PhyloSummary(groupMap,false); } } vector outSums; @@ -574,11 +574,11 @@ int ClassifyOtuCommand::process(ListVector* processList) { PhyloSummary* taxaSumt; if (countfile != "") { - if (refTaxonomy != "") { taxaSumt = new PhyloSummary(refTaxonomy, ct); } - else { taxaSumt = new PhyloSummary(ct); } + if (refTaxonomy != "") { taxaSumt = new PhyloSummary(refTaxonomy, ct, false); } + else { taxaSumt = new PhyloSummary(ct, false); } }else { - if (refTaxonomy != "") { taxaSumt = new PhyloSummary(refTaxonomy, groupMap); } - else { taxaSumt = new PhyloSummary(groupMap); } + if (refTaxonomy != "") { taxaSumt = new PhyloSummary(refTaxonomy, groupMap,false); } + else { taxaSumt = new PhyloSummary(groupMap,false); } } taxaSums.push_back(taxaSumt); } diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp index 1b56433..30bfd9e 100644 --- a/classifyseqscommand.cpp +++ b/classifyseqscommand.cpp @@ -34,6 +34,7 @@ vector ClassifySeqsCommand::setParameters(){ CommandParameter piters("iters", "Number", "", "100", "", "", "","",false,true); parameters.push_back(piters); CommandParameter psave("save", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psave); CommandParameter pshortcuts("shortcuts", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pshortcuts); + CommandParameter prelabund("relabund", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prelabund); CommandParameter pnumwanted("numwanted", "Number", "", "10", "", "", "","",false,true); parameters.push_back(pnumwanted); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); @@ -52,7 +53,7 @@ string ClassifySeqsCommand::getHelpString(){ try { string helpString = ""; helpString += "The classify.seqs command reads a fasta file containing sequences and creates a .taxonomy file and a .tax.summary file.\n"; - helpString += "The classify.seqs command parameters are reference, fasta, name, group, count, search, ksize, method, taxonomy, processors, match, mismatch, gapopen, gapextend, numwanted and probs.\n"; + helpString += "The classify.seqs command parameters are reference, fasta, name, group, count, search, ksize, method, taxonomy, processors, match, mismatch, gapopen, gapextend, numwanted, relabund and probs.\n"; helpString += "The reference, fasta and taxonomy parameters are required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n"; helpString += "The search parameter allows you to specify the method to find most similar template. Your options are: suffix, kmer, blast, align and distance. The default is kmer.\n"; helpString += "The name parameter allows you add a names file with your fasta file, if you enter multiple fasta files, you must enter matching names files for them.\n"; @@ -72,6 +73,7 @@ string ClassifySeqsCommand::getHelpString(){ helpString += "The numwanted parameter allows you to specify the number of sequence matches you want with the knn method. The default is 10.\n"; helpString += "The cutoff parameter allows you to specify a bootstrap confidence threshold for your taxonomy. The default is 0.\n"; helpString += "The probs parameter shuts off the bootstrapping results for the wang and zap method. The default is true, meaning you want the bootstrapping to be shown.\n"; + helpString += "The relabund parameter allows you to indicate you want the summary file values to be relative abundances rather than raw abundances. Default=F. \n"; helpString += "The iters parameter allows you to specify how many iterations to do when calculating the bootstrap confidence score for your taxonomy with the wang method. The default is 100.\n"; //helpString += "The flip parameter allows you shut off mothur's The default is T.\n"; helpString += "The classify.seqs command should be in the following format: \n"; @@ -558,6 +560,9 @@ ClassifySeqsCommand::ClassifySeqsCommand(string option) { temp = validParameter.validFile(parameters, "probs", false); if (temp == "not found"){ temp = "true"; } probs = m->isTrue(temp); + temp = validParameter.validFile(parameters, "relabund", false); if (temp == "not found"){ temp = "false"; } + relabund = m->isTrue(temp); + temp = validParameter.validFile(parameters, "shortcuts", false); if (temp == "not found"){ temp = "true"; } writeShortcuts = m->isTrue(temp); @@ -813,12 +818,12 @@ int ClassifySeqsCommand::execute(){ if (hasCount) { ct = new CountTable(); ct->readTable(countfileNames[s], true, false); - taxaSum = new PhyloSummary(taxonomyFileName, ct); + taxaSum = new PhyloSummary(taxonomyFileName, ct, relabund); taxaSum->summarize(tempTaxonomyFile); }else { if (groupfile != "") { group = groupfileNames[s]; groupMap = new GroupMap(group); groupMap->readMap(); } - taxaSum = new PhyloSummary(taxonomyFileName, groupMap); + taxaSum = new PhyloSummary(taxonomyFileName, groupMap, relabund); if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete classify; return 0; } diff --git a/classifyseqscommand.h b/classifyseqscommand.h index 55546f2..bfc42a9 100644 --- a/classifyseqscommand.h +++ b/classifyseqscommand.h @@ -77,7 +77,7 @@ private: string fastaFileName, templateFileName, countfile, distanceFileName, namefile, search, method, taxonomyFileName, outputDir, groupfile; int processors, kmerSize, numWanted, cutoff, iters; float match, misMatch, gapOpen, gapExtend; - bool abort, probs, save, flip, hasName, hasCount, writeShortcuts; + bool abort, probs, save, flip, hasName, hasCount, writeShortcuts, relabund; int driver(linePair*, string, string, string, string); int createProcesses(string, string, string, string); diff --git a/phylosummary.cpp b/phylosummary.cpp index a9d501e..89099ff 100644 --- a/phylosummary.cpp +++ b/phylosummary.cpp @@ -11,12 +11,13 @@ #include "referencedb.h" /**************************************************************************************************/ -PhyloSummary::PhyloSummary(string refTfile, CountTable* c){ +PhyloSummary::PhyloSummary(string refTfile, CountTable* c, bool r){ try { m = MothurOut::getInstance(); maxLevel = 0; ignore = false; numSeqs = 0; + relabund = r; ct = c; groupmap = NULL; @@ -44,12 +45,13 @@ PhyloSummary::PhyloSummary(string refTfile, CountTable* c){ /**************************************************************************************************/ -PhyloSummary::PhyloSummary(CountTable* c){ +PhyloSummary::PhyloSummary(CountTable* c, bool r){ try { m = MothurOut::getInstance(); maxLevel = 0; ignore = true; numSeqs = 0; + relabund = r; ct = c; groupmap = NULL; @@ -63,12 +65,13 @@ PhyloSummary::PhyloSummary(CountTable* c){ } } /**************************************************************************************************/ -PhyloSummary::PhyloSummary(string refTfile, GroupMap* g){ +PhyloSummary::PhyloSummary(string refTfile, GroupMap* g, bool r){ try { m = MothurOut::getInstance(); maxLevel = 0; ignore = false; numSeqs = 0; + relabund = r; groupmap = g; ct = NULL; @@ -96,12 +99,13 @@ PhyloSummary::PhyloSummary(string refTfile, GroupMap* g){ /**************************************************************************************************/ -PhyloSummary::PhyloSummary(GroupMap* g){ +PhyloSummary::PhyloSummary(GroupMap* g, bool r){ try { m = MothurOut::getInstance(); maxLevel = 0; ignore = true; numSeqs = 0; + relabund = r; groupmap = g; ct = NULL; @@ -377,11 +381,10 @@ void PhyloSummary::assignRank(int index){ try { map::iterator it; int counter = 1; - + for(it=tree[index].children.begin();it!=tree[index].children.end();it++){ tree[it->second].rank = tree[index].rank + '.' + toString(counter); counter++; - assignRank(it->second); } } @@ -395,7 +398,7 @@ void PhyloSummary::assignRank(int index){ void PhyloSummary::print(ofstream& out){ try { - if (ignore) { assignRank(0); } + if (ignore) { assignRank(0); } vector mGroups; //print labels out << "taxlevel\t rankID\t taxon\t daughterlevels\t total\t"; @@ -414,12 +417,10 @@ void PhyloSummary::print(ofstream& out){ } } } - out << endl; int totalChildrenInTree = 0; map::iterator itGroup; - map::iterator it; for(it=tree[0].children.begin();it!=tree[0].children.end();it++){ if (tree[it->second].total != 0) { @@ -435,15 +436,35 @@ void PhyloSummary::print(ofstream& out){ } //print root - out << tree[0].level << "\t" << tree[0].rank << "\t" << tree[0].name << "\t" << totalChildrenInTree << "\t" << tree[0].total << "\t"; - - - if (groupmap != NULL) { - for (int i = 0; i < mGroups.size(); i++) { out << tree[0].groupCount[mGroups[i]] << '\t'; } - }else if ( ct != NULL) { - if (ct->hasGroupInfo()) { for (int i = 0; i < mGroups.size(); i++) { out << tree[0].groupCount[mGroups[i]] << '\t'; } } - } - out << endl; + if (relabund) { + out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); + out << tree[0].level << "\t" << tree[0].rank << "\t" << tree[0].name << "\t" << totalChildrenInTree << "\t" << (tree[0].total/(double) tree[0].total) << '\t'; + + if (groupmap != NULL) { + for (int i = 0; i < mGroups.size(); i++) { + double thisNum = tree[0].groupCount[mGroups[i]]; + thisNum /= (double) groupmap->getNumSeqs(mGroups[i]); + out << thisNum << '\t'; + } + }else if ( ct != NULL) { + if (ct->hasGroupInfo()) { + for (int i = 0; i < mGroups.size(); i++) { + double thisNum = tree[0].groupCount[mGroups[i]]; + thisNum /= (double) ct->getGroupCount(mGroups[i]); + out << thisNum << '\t'; + } + } + } + out << endl; + }else { + out << tree[0].level << "\t" << tree[0].rank << "\t" << tree[0].name << "\t" << totalChildrenInTree << "\t" << tree[0].total << "\t"; + if (groupmap != NULL) { + for (int i = 0; i < mGroups.size(); i++) { out << tree[0].groupCount[mGroups[i]] << '\t'; } + }else if ( ct != NULL) { + if (ct->hasGroupInfo()) { for (int i = 0; i < mGroups.size(); i++) { out << tree[0].groupCount[mGroups[i]] << '\t'; } } + } + out << endl; + } //print rest print(0, out); @@ -525,22 +546,37 @@ void PhyloSummary::print(int i, ofstream& out){ if (tree[it2->second].total != 0) { totalChildrenInTree++; } } - out << tree[it->second].level << "\t" << tree[it->second].rank << "\t" << tree[it->second].name << "\t" << totalChildrenInTree << "\t" << tree[it->second].total << "\t"; - - map::iterator itGroup; - if (groupmap != NULL) { - //for (itGroup = tree[it->second].groupCount.begin(); itGroup != tree[it->second].groupCount.end(); itGroup++) { - // out << itGroup->second << '\t'; - //} - vector mGroups = groupmap->getNamesOfGroups(); - for (int i = 0; i < mGroups.size(); i++) { out << tree[it->second].groupCount[mGroups[i]] << '\t'; } - }else if (ct != NULL) { - if (ct->hasGroupInfo()) { - vector mGroups = ct->getNamesOfGroups(); + if (relabund) { + out << tree[it->second].level << "\t" << tree[it->second].rank << "\t" << tree[it->second].name << "\t" << totalChildrenInTree << "\t" << (tree[it->second].total/(double) tree[0].total) << "\t"; + }else { + out << tree[it->second].level << "\t" << tree[it->second].rank << "\t" << tree[it->second].name << "\t" << totalChildrenInTree << "\t" << tree[it->second].total << "\t"; + } + + if (relabund) { + map::iterator itGroup; + if (groupmap != NULL) { + vector mGroups = groupmap->getNamesOfGroups(); + for (int i = 0; i < mGroups.size(); i++) { out << (tree[it->second].groupCount[mGroups[i]]/(double)groupmap->getNumSeqs(mGroups[i])) << '\t'; } + }else if (ct != NULL) { + if (ct->hasGroupInfo()) { + vector mGroups = ct->getNamesOfGroups(); + for (int i = 0; i < mGroups.size(); i++) { out << (tree[it->second].groupCount[mGroups[i]]/(double)ct->getGroupCount(mGroups[i])) << '\t'; } + } + } + }else { + map::iterator itGroup; + if (groupmap != NULL) { + vector mGroups = groupmap->getNamesOfGroups(); for (int i = 0; i < mGroups.size(); i++) { out << tree[it->second].groupCount[mGroups[i]] << '\t'; } + }else if (ct != NULL) { + if (ct->hasGroupInfo()) { + vector mGroups = ct->getNamesOfGroups(); + for (int i = 0; i < mGroups.size(); i++) { out << tree[it->second].groupCount[mGroups[i]] << '\t'; } + } } + } - out << endl; + out << endl; } diff --git a/phylosummary.h b/phylosummary.h index bd31173..4d3464f 100644 --- a/phylosummary.h +++ b/phylosummary.h @@ -33,10 +33,10 @@ struct rawTaxNode { class PhyloSummary { public: - PhyloSummary(GroupMap*); - PhyloSummary(string, GroupMap*); - PhyloSummary(CountTable*); - PhyloSummary(string, CountTable*); + PhyloSummary(GroupMap*, bool); + PhyloSummary(string, GroupMap*, bool); + PhyloSummary(CountTable*, bool); + PhyloSummary(string, CountTable*, bool); ~PhyloSummary() {} int summarize(string); //pass it a taxonomy file and a group file and it makes the tree @@ -55,7 +55,7 @@ private: void readTreeStruct(ifstream&); GroupMap* groupmap; CountTable* ct; - bool ignore; + bool ignore, relabund; int numNodes; int numSeqs; diff --git a/summarytaxcommand.cpp b/summarytaxcommand.cpp index 097983e..9741e66 100644 --- a/summarytaxcommand.cpp +++ b/summarytaxcommand.cpp @@ -18,6 +18,8 @@ vector SummaryTaxCommand::setParameters(){ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","",false,false,true); parameters.push_back(pcount); CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup); CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(preftaxonomy); + CommandParameter prelabund("relabund", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(prelabund); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); @@ -35,11 +37,12 @@ string SummaryTaxCommand::getHelpString(){ try { string helpString = ""; helpString += "The summary.tax command reads a taxonomy file and an optional name file, and summarizes the taxonomy information.\n"; - helpString += "The summary.tax command parameters are taxonomy, count, group and name. taxonomy is required, unless you have a valid current taxonomy file.\n"; + helpString += "The summary.tax command parameters are taxonomy, count, group, name and relabund. taxonomy is required, unless you have a valid current taxonomy file.\n"; helpString += "The name parameter allows you to enter a name file associated with your taxonomy file. \n"; helpString += "The group parameter allows you add a group file so you can have the summary totals broken up by group.\n"; helpString += "The count parameter allows you add a count file so you can have the summary totals broken up by group.\n"; helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. It is not required, but providing it will keep the rankIDs in the summary file static.\n"; + helpString += "The relabund parameter allows you to indicate you want the summary file values to be relative abundances rather than raw abundances. Default=F. \n"; helpString += "The summary.tax command should be in the following format: \n"; helpString += "summary.tax(taxonomy=yourTaxonomyFile) \n"; helpString += "Note: No spaces between parameter labels (i.e. taxonomy), '=' and parameters (i.e.yourTaxonomyFile).\n"; @@ -194,6 +197,9 @@ SummaryTaxCommand::SummaryTaxCommand(string option) { outputDir = ""; outputDir += m->hasPath(taxfile); //if user entered a file with a path then preserve it } + + string temp = validParameter.validFile(parameters, "relabund", false); if (temp == "not found"){ temp = "false"; } + relabund = m->isTrue(temp); if (countfile == "") { if (namefile == "") { @@ -228,11 +234,11 @@ int SummaryTaxCommand::execute(){ PhyloSummary* taxaSum; if (countfile != "") { - if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, ct); } - else { taxaSum = new PhyloSummary(ct); } + if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, ct, relabund); } + else { taxaSum = new PhyloSummary(ct, relabund); } }else { - if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, groupMap); } - else { taxaSum = new PhyloSummary(groupMap); } + if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, groupMap, relabund); } + else { taxaSum = new PhyloSummary(groupMap, relabund); } } if (m->control_pressed) { if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; } delete taxaSum; return 0; } diff --git a/summarytaxcommand.h b/summarytaxcommand.h index 5f61d13..7716ad0 100644 --- a/summarytaxcommand.h +++ b/summarytaxcommand.h @@ -34,7 +34,7 @@ class SummaryTaxCommand : public Command { void help() { m->mothurOut(getHelpString()); } private: - bool abort; + bool abort, relabund; string taxfile, outputDir, namefile, groupfile, refTaxonomy, countfile; vector outputNames; map nameMap; -- 2.39.2