GCC_MODEL_TUNING = "";
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
- "VERSION=\"\\\"1.26.0\\\"\"",
- "RELEASE_DATE=\"\\\"7/9/2012\\\"\"",
+ "VERSION=\"\\\"1.27.0\\\"\"",
+ "RELEASE_DATE=\"\\\"8/8/2012\\\"\"",
);
GCC_WARN_ABOUT_MISSING_NEWLINE = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
if (candidateSeq->getUnaligned().length() > alignment->getnRows()) {
alignment->resize(candidateSeq->getUnaligned().length()+1);
}
-
Sequence temp = templateDB->findClosestSequence(candidateSeq);
Sequence* templateSeq = &temp;
try {
CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
string BinSeqCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The bin.seqs command parameters are list, fasta, name, label and group. The fasta and list are required, unless you have a valid current list and fasta file.\n";
+ helpString += "The bin.seqs command parameters are list, fasta, name, count, label and group. The fasta and list are required, unless you have a valid current list and fasta file.\n";
helpString += "The label parameter allows you to select what distance levels you would like a output files created for, and are separated by dashes.\n";
helpString += "The bin.seqs command should be in the following format: bin.seqs(fasta=yourFastaFile, name=yourNamesFile, group=yourGroupFile, label=yourLabels).\n";
helpString += "Example bin.seqs(fasta=amazon.fasta, group=amazon.groups, name=amazon.names).\n";
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["group"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
if (groupfile == "not open") { abort = true; }
else if (groupfile == "not found") { groupfile = ""; }
else { m->setGroupFile(groupfile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((namesfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
- if (namesfile == ""){
- vector<string> files; files.push_back(fastafile);
- parser.getNameFile(files);
- }
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
+
+ if (countfile == "") {
+ if (namesfile == ""){
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
+ }
}
}
fasta->readFastaFile(fastafile);
//if user gave a namesfile then use it
- if (namesfile != "") {
- readNamesFile();
- }
+ if (namesfile != "") { readNamesFile(); }
+ if (countfile != "") { ct.readTable(countfile); }
input = new InputData(listfile, "list");
list = input->getListVector();
//return 1 if error, 0 otherwise
int BinSeqCommand::process(ListVector* list) {
try {
- string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + list->getLabel() + getOutputFileNameTag("fasta");
+ string outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + list->getLabel() + "." + getOutputFileNameTag("fasta");
m->openOutputFile(outputFileName, out);
outputNames.push_back(outputFileName); outputTypes["fasta"].push_back(outputFileName);
//do work for that name
string sequence = fasta->getSequence(name);
- if (sequence != "not found") {
- //if you don't have groups
- if (groupfile == "") {
- name = name + "\t" + toString(i+1);
- out << ">" << name << endl;
- out << sequence << endl;
- }else {//if you do have groups
- string group = groupMap->getGroup(name);
- if (group == "not found") {
- m->mothurOut(name + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
- return 1;
- }else{
- name = name + "\t" + group + "\t" + toString(i+1);
+
+ if (countfile != "") {
+ if (sequence != "not found") {
+ if (ct.hasGroupInfo()) {
+ vector<string> groups = ct.getGroups(name);
+ string groupInfo = "";
+ for (int k = 0; k < groups.size()-1; k++) {
+ groupInfo += groups[k] + "-";
+ }
+ if (groups.size() != 0) { groupInfo += groups[groups.size()-1]; }
+ else { groupInfo = "not found"; }
+ name = name + "\t" + groupInfo + "\t" + toString(i+1)+ "\tNumRep=" + toString(ct.getNumSeqs(name));
+ out << ">" << name << endl;
+ out << sequence << endl;
+ }else {
+ name = name + "\t" + toString(i+1) + "\tNumRep=" + toString(ct.getNumSeqs(name));
+ out << ">" << name << endl;
+ out << sequence << endl;
+ }
+
+ }else { m->mothurOut(name + " is missing from your fasta. Does your list file contain all sequence names or just the uniques?"); m->mothurOutEndLine(); return 1; }
+ }else {
+ if (sequence != "not found") {
+ //if you don't have groups
+ if (groupfile == "") {
+ name = name + "\t" + toString(i+1);
out << ">" << name << endl;
out << sequence << endl;
+ }else {//if you do have groups
+ string group = groupMap->getGroup(name);
+ if (group == "not found") {
+ m->mothurOut(name + " is missing from your group file. Please correct. "); m->mothurOutEndLine();
+ return 1;
+ }else{
+ name = name + "\t" + group + "\t" + toString(i+1);
+ out << ">" << name << endl;
+ out << sequence << endl;
+ }
}
- }
- }else { m->mothurOut(name + " is missing from your fasta or name file. Please correct. "); m->mothurOutEndLine(); return 1; }
+ }else { m->mothurOut(name + " is missing from your fasta or name file. Please correct. "); m->mothurOutEndLine(); return 1; }
+ }
}
}
#include "listvector.hpp"
#include "fastamap.h"
#include "groupmap.h"
+#include "counttable.h"
class BinSeqCommand : public Command {
void help() { m->mothurOut(getHelpString()); }
private:
-
+ CountTable ct;
ListVector* list;
InputData* input;
FastaMap* fasta;
GroupMap* groupMap;
bool abort, allLines;
set<string> labels; //holds labels to be used
- string filename, fastafile, listfile, namesfile, groupfile, label, outputDir;
+ string filename, fastafile, listfile, namesfile, groupfile, countfile, label, outputDir;
ofstream out;
ifstream in, inNames;
vector<string> outputNames;
bool ignore = false;
if (countfileNames[i] == "current") {
countfileNames[i] = m->getCountTableFile();
- if (nameFileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
+ if (countfileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
else {
m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true;
//erase from file list
CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy);
CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(preftaxonomy);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
CommandParameter pbasis("basis", "Multiple", "otu-sequence", "otu", "", "", "",false,false); parameters.push_back(pbasis);
CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "",false,true); parameters.push_back(pcutoff);
string ClassifyOtuCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The classify.otu command parameters are list, taxonomy, reftaxonomy, name, group, cutoff, label, basis and probs. The taxonomy and list parameters are required unless you have a valid current file.\n";
+ helpString += "The classify.otu command parameters are list, taxonomy, reftaxonomy, name, group, count, cutoff, label, basis and probs. The taxonomy and list parameters are required unless you have a valid current file.\n";
helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. Providing it will keep the rankIDs in the summary file static.\n";
helpString += "The name parameter allows you add a names file with your taxonomy file.\n";
helpString += "The group parameter allows you provide a group file to use in creating the summary file breakdown.\n";
- helpString += "The basis parameter allows you indicate what you want the summary file to represent, options are otu and sequence. Default is otu.\n";
+ helpString += "The count parameter allows you add a count file associated with your list file. When using the count parameter mothur assumes your list file contains only uniques.\n";
+ helpString += "The basis parameter allows you indicate what you want the summary file to represent, options are otu and sequence. Default is otu.\n";
helpString += "For example consider the following basis=sequence could give Clostridiales 3 105 16 43 46, where 105 is the total number of sequences whose otu classified to Clostridiales.\n";
helpString += "16 is the number of sequences in the otus from groupA, 43 is the number of sequences in the otus from groupB, and 46 is the number of sequences in the otus from groupC.\n";
helpString += "Now for basis=otu could give Clostridiales 3 7 6 1 2, where 7 is the number of otus that classified to Clostridiales.\n";
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["group"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
if (groupfile == "not open") { abort = true; }
else if (groupfile == "not found") { groupfile = ""; }
else { m->setGroupFile(groupfile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
+
//check for optional parameter and set defaults
// ...at some point should added some additional type checking...
if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true; }
- if (namefile == ""){
- vector<string> files; files.push_back(taxfile);
- parser.getNameFile(files);
- }
+ if (countfile == "") {
+ if (namefile == ""){
+ vector<string> files; files.push_back(taxfile);
+ parser.getNameFile(files);
+ }
+ }
}
}
if (abort == true) { if (calledHelp) { return 0; } return 2; }
//if user gave a namesfile then use it
- if (namefile != "") { m->readNames(namefile, nameMap, true); }
+ if (namefile != "") { m->readNames(namefile, nameMap, true); }
+ if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); }
+ else { groupMap = NULL; }
+ if (countfile != "") { ct = new CountTable(); ct->readTable(countfile); }
+ else { ct = NULL; }
//read taxonomy file and save in map for easy access in building bin trees
m->readTax(taxfile, taxMap);
set<string> processedLabels;
set<string> userLabels = labels;
- if (m->control_pressed) { outputTypes.clear(); delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+ if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
m->mothurOut(list->getLabel() + "\t" + toString(list->size())); m->mothurOutEndLine();
process(list);
- if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete input; delete list; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete input; delete list; return 0; }
processedLabels.insert(list->getLabel());
userLabels.erase(list->getLabel());
process(list);
- if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete input; delete list; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete input; delete list; return 0; }
processedLabels.insert(list->getLabel());
userLabels.erase(list->getLabel());
process(list);
delete list;
- if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete input; delete list; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete input; delete list; return 0; }
}
delete input;
+ if (groupMap != NULL) { delete groupMap; }
+ if (ct != NULL) { delete ct; }
if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
if (it == taxMap.end()) { //this name is not in taxonomy file, skip it
m->mothurOut(names[i] + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine();
}else{
+ if (countfile != "") {
+ int numDups = ct->getNumSeqs(names[i]);
+ for (int j = 0; j < numDups; j++) { phylo->addSeqToTree(names[i], it->second); }
+ size += numDups;
+ }else{
//add seq to tree
- phylo->addSeqToTree(names[i], it->second);
- size++;
- allNames.push_back(names[i]);
+ phylo->addSeqToTree(names[i], it->second);
+ size++;
+ }
+ allNames.push_back(names[i]);
}
}
if (outputDir == "") { outputDir += m->hasPath(listfile); }
ofstream out;
- string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + getOutputFileNameTag("constaxonomy");
+ string outputFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." +getOutputFileNameTag("constaxonomy");
m->openOutputFile(outputFile, out);
outputNames.push_back(outputFile); outputTypes["constaxonomy"].push_back(outputFile);
ofstream outSum;
- string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + getOutputFileNameTag("taxsummary");
+ string outputSumFile = outputDir + m->getRootName(m->getSimpleName(listfile)) + processList->getLabel() + "." +getOutputFileNameTag("taxsummary");
m->openOutputFile(outputSumFile, outSum);
outputNames.push_back(outputSumFile); outputTypes["taxsummary"].push_back(outputSumFile);
out << "OTU\tSize\tTaxonomy" << endl;
PhyloSummary* taxaSum;
- if (refTaxonomy != "") {
- taxaSum = new PhyloSummary(refTaxonomy, groupfile);
+ if (countfile != "") {
+ if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, ct); }
+ else { taxaSum = new PhyloSummary(ct); }
}else {
- taxaSum = new PhyloSummary(groupfile);
- }
-
+ if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, groupMap); }
+ else { taxaSum = new PhyloSummary(groupMap); }
+ }
//for each bin in the list vector
string snumBins = toString(processList->getNumBins());
if (basis == "sequence") {
for(int j = 0; j < names.size(); j++) { taxaSum->addSeqToTree(names[j], noConfidenceConTax); }
}else { //otu
- taxaSum->addSeqToTree(noConfidenceConTax, names);
+ map<string, bool> containsGroup;
+ if (countfile != "") {
+ if (ct->hasGroupInfo()) {
+ vector<string> mGroups = ct->getNamesOfGroups();
+ for (int k = 0; k < names.size(); k++) {
+ vector<int> counts = ct->getGroupCounts(names[k]);
+ for (int h = 0; h < counts.size(); h++) {
+ if (counts[h] != 0) { containsGroup[mGroups[h]] = true; }
+ }
+ }
+ }
+ }else {
+ if (groupfile != "") {
+ vector<string> mGroups = groupMap->getNamesOfGroups();
+ for (int j = 0; j < mGroups.size(); j++) { containsGroup[mGroups[j]] = false; }
+
+ for (int k = 0; k < names.size(); k++) {
+ //find out the sequences group
+ string group = groupMap->getGroup(names[k]);
+
+ if (group == "not found") { m->mothurOut("[WARNING]: " + names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); }
+ else {
+ containsGroup[group] = true;
+ }
+ }
+ }
+ }
+ taxaSum->addSeqToTree(noConfidenceConTax, containsGroup);
}
}
#include "command.hpp"
#include "listvector.hpp"
#include "inputdata.h"
+#include "counttable.h"
class ClassifyOtuCommand : public Command {
void help() { m->mothurOut(getHelpString()); }
private:
-
+ GroupMap* groupMap;
+ CountTable* ct;
ListVector* list;
InputData* input;
- string listfile, namefile, taxfile, label, outputDir, refTaxonomy, groupfile, basis;
+ string listfile, namefile, taxfile, label, outputDir, refTaxonomy, groupfile, basis, countfile;
bool abort, allLines, probs;
int cutoff;
set<string> labels; //holds labels to be used
CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy);
CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptemplate);
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
+
CommandParameter psearch("search", "Multiple", "kmer-blast-suffix-distance", "kmer", "", "", "",false,false); parameters.push_back(psearch);
CommandParameter pksize("ksize", "Number", "", "8", "", "", "",false,false); parameters.push_back(pksize);
CommandParameter pmethod("method", "Multiple", "bayesian-knn", "bayesian", "", "", "",false,false); parameters.push_back(pmethod);
try {
string helpString = "";
helpString += "The classify.seqs command reads a fasta file containing sequences and creates a .taxonomy file and a .tax.summary file.\n";
- helpString += "The classify.seqs command parameters are reference, fasta, name, search, ksize, method, taxonomy, processors, match, mismatch, gapopen, gapextend, numwanted and probs.\n";
+ helpString += "The classify.seqs command parameters are reference, fasta, name, group, count, search, ksize, method, taxonomy, processors, match, mismatch, gapopen, gapextend, numwanted and probs.\n";
helpString += "The reference, fasta and taxonomy parameters are required. You may enter multiple fasta files by separating their names with dashes. ie. fasta=abrecovery.fasta-amzon.fasta \n";
helpString += "The search parameter allows you to specify the method to find most similar template. Your options are: suffix, kmer, blast and distance. The default is kmer.\n";
helpString += "The name parameter allows you add a names file with your fasta file, if you enter multiple fasta files, you must enter matching names files for them.\n";
helpString += "The group parameter allows you add a group file so you can have the summary totals broken up by group.\n";
+ helpString += "The count parameter allows you add a count file so you can have the summary totals broken up by group.\n";
helpString += "The method parameter allows you to specify classification method to use. Your options are: bayesian and knn. The default is bayesian.\n";
helpString += "The ksize parameter allows you to specify the kmer size for finding most similar template to candidate. The default is 8.\n";
helpString += "The processors parameter allows you to specify the number of processors to use. The default is 1.\n";
ClassifySeqsCommand::ClassifySeqsCommand(string option) {
try {
abort = false; calledHelp = false;
- rdb = ReferenceDB::getInstance();
+ rdb = ReferenceDB::getInstance(); hasName = false; hasCount=false;
//allow user to run help
if(option == "help") { help(); abort = true; calledHelp = true; }
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["group"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
fastaFileName = validParameter.validFile(parameters, "fasta", false);
}
}
}
-
+
+ if (namefileNames.size() != 0) { hasName = true; }
+
if (namefile != "") {
if (namefileNames.size() != fastaFileNames.size()) { abort = true; m->mothurOut("If you provide a name file, you must have one for each fasta file."); m->mothurOutEndLine(); }
}
+ //check for required parameters
+ countfile = validParameter.validFile(parameters, "count", false);
+ if (countfile == "not found") {
+ countfile = "";
+ }else {
+ m->splitAtDash(countfile, countfileNames);
+
+ //go through files and make sure they are good, if not, then disregard them
+ for (int i = 0; i < countfileNames.size(); i++) {
+
+ bool ignore = false;
+ if (countfileNames[i] == "current") {
+ countfileNames[i] = m->getCountTableFile();
+ if (countfileNames[i] != "") { m->mothurOut("Using " + countfileNames[i] + " as input file for the count parameter where you had given current."); m->mothurOutEndLine(); }
+ else {
+ m->mothurOut("You have no current count file, ignoring current."); m->mothurOutEndLine(); ignore=true;
+ //erase from file list
+ countfileNames.erase(countfileNames.begin()+i);
+ i--;
+ }
+ }
+
+ if (!ignore) {
+
+ if (inputDir != "") {
+ string path = m->hasPath(countfileNames[i]);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { countfileNames[i] = inputDir + countfileNames[i]; }
+ }
+
+ int ableToOpen;
+ ifstream in;
+
+ ableToOpen = m->openInputFile(countfileNames[i], in, "noerror");
+
+ //if you can't open it, try default location
+ if (ableToOpen == 1) {
+ if (m->getDefaultPath() != "") { //default path is set
+ string tryPath = m->getDefaultPath() + m->getSimpleName(countfileNames[i]);
+ m->mothurOut("Unable to open " + countfileNames[i] + ". Trying default " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ countfileNames[i] = tryPath;
+ }
+ }
+
+ if (ableToOpen == 1) {
+ if (m->getOutputDir() != "") { //default path is set
+ string tryPath = m->getOutputDir() + m->getSimpleName(countfileNames[i]);
+ m->mothurOut("Unable to open " + countfileNames[i] + ". Trying output directory " + tryPath); m->mothurOutEndLine();
+ ifstream in2;
+ ableToOpen = m->openInputFile(tryPath, in2, "noerror");
+ in2.close();
+ countfileNames[i] = tryPath;
+ }
+ }
+
+ in.close();
+
+ if (ableToOpen == 1) {
+ m->mothurOut("Unable to open " + countfileNames[i] + ". It will be disregarded."); m->mothurOutEndLine();
+ //erase from file list
+ countfileNames.erase(countfileNames.begin()+i);
+ i--;
+ }else {
+ m->setCountTableFile(countfileNames[i]);
+ }
+ }
+ }
+ }
+
+ if (countfileNames.size() != 0) { hasCount = true; if (countfileNames.size() != fastaFileNames.size()) {m->mothurOut("If you provide a count file, you must have one for each fasta file."); m->mothurOutEndLine(); } }
+
+ //make sure there is at least one valid file left
+ if (hasName && hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+
groupfile = validParameter.validFile(parameters, "group", false);
if (groupfile == "not found") { groupfile = ""; }
else {
if (groupfile != "") {
if (groupfileNames.size() != fastaFileNames.size()) { abort = true; m->mothurOut("If you provide a group file, you must have one for each fasta file."); m->mothurOutEndLine(); }
+ if (hasCount) { m->mothurOut("[ERROR]: You must enter ONLY ONE of the following: count or group."); m->mothurOutEndLine(); abort = true; }
}else {
for (int i = 0; i < fastaFileNames.size(); i++) { groupfileNames.push_back(""); }
}
}
if (!abort) {
- if (namefileNames.size() == 0){
- if (fastaFileNames.size() != 0) {
- vector<string> files; files.push_back(fastaFileNames[fastaFileNames.size()-1]);
- parser.getNameFile(files);
+ if (!hasCount) {
+ if (namefileNames.size() == 0){
+ if (fastaFileNames.size() != 0) {
+ vector<string> files; files.push_back(fastaFileNames[fastaFileNames.size()-1]);
+ parser.getNameFile(files);
+ }
}
}
}
}
#endif
- string group = "";
- if (groupfile != "") { group = groupfileNames[s]; }
-
- PhyloSummary taxaSum(baseTName, group);
-
- if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete classify; return 0; }
-
- if (namefile == "") { taxaSum.summarize(tempTaxonomyFile); }
- else {
- ifstream in;
- m->openInputFile(tempTaxonomyFile, in);
-
- //read in users taxonomy file and add sequences to tree
- string name, taxon;
-
- while(!in.eof()){
- in >> name >> taxon; m->gobble(in);
-
- itNames = nameMap.find(name);
-
- if (itNames == nameMap.end()) {
- m->mothurOut(name + " is not in your name file please correct."); m->mothurOutEndLine(); exit(1);
- }else{
- for (int i = 0; i < itNames->second.size(); i++) {
- taxaSum.addSeqToTree(itNames->second[i], taxon); //add it as many times as there are identical seqs
- }
- itNames->second.clear();
- nameMap.erase(itNames->first);
- }
- }
- in.close();
- }
+ string group = "";
+ GroupMap* groupMap = NULL;
+ CountTable* ct = NULL;
+ PhyloSummary* taxaSum;
+ if (hasCount) {
+ ct = new CountTable();
+ ct->readTable(countfileNames[s]);
+ taxaSum = new PhyloSummary(baseTName, ct);
+ taxaSum->summarize(tempTaxonomyFile);
+ }else {
+ if (groupfile != "") { group = groupfileNames[s]; groupMap = new GroupMap(group); groupMap->readMap(); }
+
+ taxaSum = new PhyloSummary(baseTName, groupMap);
+
+ if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete classify; return 0; }
+
+ if (namefile == "") { taxaSum->summarize(tempTaxonomyFile); }
+ else {
+ ifstream in;
+ m->openInputFile(tempTaxonomyFile, in);
+
+ //read in users taxonomy file and add sequences to tree
+ string name, taxon;
+
+ while(!in.eof()){
+ if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete classify; return 0; }
+
+ in >> name >> taxon; m->gobble(in);
+
+ itNames = nameMap.find(name);
+
+ if (itNames == nameMap.end()) {
+ m->mothurOut(name + " is not in your name file please correct."); m->mothurOutEndLine(); exit(1);
+ }else{
+ for (int i = 0; i < itNames->second.size(); i++) {
+ taxaSum->addSeqToTree(itNames->second[i], taxon); //add it as many times as there are identical seqs
+ }
+ itNames->second.clear();
+ nameMap.erase(itNames->first);
+ }
+ }
+ in.close();
+ }
+ }
m->mothurRemove(tempTaxonomyFile);
- if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete classify; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } delete classify; return 0; }
//print summary file
ofstream outTaxTree;
m->openOutputFile(taxSummary, outTaxTree);
- taxaSum.print(outTaxTree);
+ taxaSum->print(outTaxTree);
outTaxTree.close();
//output taxonomy with the unclassified bins added
m->openOutputFile(unclass, outTax);
//get maxLevel from phylotree so you know how many 'unclassified's to add
- int maxLevel = taxaSum.getMaxLevel();
+ int maxLevel = taxaSum->getMaxLevel();
//read taxfile - this reading and rewriting is done to preserve the confidence scores.
string name, taxon;
while (!inTax.eof()) {
- if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } m->mothurRemove(unclass); delete classify; return 0; }
+ if (m->control_pressed) { outputTypes.clear(); if (ct != NULL) { delete ct; } if (groupMap != NULL) { delete groupMap; } delete taxaSum; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } m->mothurRemove(unclass); delete classify; return 0; }
inTax >> name >> taxon; m->gobble(inTax);
inTax.close();
outTax.close();
+ if (ct != NULL) { delete ct; }
+ if (groupMap != NULL) { delete groupMap; } delete taxaSum;
m->mothurRemove(newTaxonomyFile);
rename(unclass.c_str(), newTaxonomyFile.c_str());
vector<linePair*> lines;
vector<string> fastaFileNames;
vector<string> namefileNames;
+ vector<string> countfileNames;
vector<string> groupfileNames;
vector<string> outputNames;
map<string, vector<string> > nameMap;
Classify* classify;
ReferenceDB* rdb;
- string fastaFileName, templateFileName, distanceFileName, namefile, search, method, taxonomyFileName, outputDir, groupfile;
+ string fastaFileName, templateFileName, countfile, distanceFileName, namefile, search, method, taxonomyFileName, outputDir, groupfile;
int processors, kmerSize, numWanted, cutoff, iters;
float match, misMatch, gapOpen, gapExtend;
- bool abort, probs, save, flip;
+ bool abort, probs, save, flip, hasName, hasCount;
int driver(linePair*, string, string, string, string);
int createProcesses(string, string, string, string);
try {
CommandParameter ptree("tree", "InputTypes", "", "", "", "", "none",false,true); parameters.push_back(ptree);
CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "", "", "none",false,true); parameters.push_back(ptaxonomy);
- CommandParameter pname("name", "InputTypes", "", "", "", "", "none",false,false); parameters.push_back(pname);
- CommandParameter pgroup("group", "InputTypes", "", "", "", "", "none",false,false); parameters.push_back(pgroup);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
CommandParameter pcutoff("cutoff", "Number", "", "51", "", "", "",false,true); parameters.push_back(pcutoff);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
helpString += "The classify.tree command reads a tree and taxonomy file and output the consensus taxonomy for each node on the tree. \n";
helpString += "If you provide a group file, the concensus for each group will also be provided. \n";
helpString += "The new tree contains labels at each internal node. The label is the node number so you can relate the tree to the summary file.\n";
+ helpString += "The count parameter allows you add a count file so you can have the summary totals broken up by group.\n";
helpString += "The summary file lists the concensus taxonomy for the descendants of each node.\n";
- helpString += "The classify.tree command parameters are tree, group, name and taxonomy. The tree and taxonomy files are required.\n";
+ helpString += "The classify.tree command parameters are tree, group, name, count and taxonomy. The tree and taxonomy files are required.\n";
helpString += "The cutoff parameter allows you to specify a consensus confidence threshold for your taxonomy. The default is 51, meaning 51%. Cutoff cannot be below 51.\n";
helpString += "The classify.tree command should be used in the following format: classify.tree(tree=test.tre, group=test.group, taxonomy=test.taxonomy)\n";
helpString += "Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n";
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
else if (groupfile == "not found") { groupfile = ""; }
else { m->setGroupFile(groupfile); }
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
+
string temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "51"; }
m->mothurConvert(temp, cutoff);
if ((cutoff < 51) || (cutoff > 100)) { m->mothurOut("cutoff must be above 50, and no greater than 100."); m->mothurOutEndLine(); abort = true; }
- if (namefile == "") {
- vector<string> files; files.push_back(treefile);
- parser.getNameFile(files);
+ if (countfile == "") {
+ if (namefile == "") {
+ vector<string> files; files.push_back(treefile);
+ parser.getNameFile(files);
+ }
}
-
}
}
catch(exception& e) {
TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
vector<Tree*> T = reader->getTrees();
- TreeMap* tmap = T[0]->getTreeMap();
+ CountTable* tmap = T[0]->getCountTable();
Tree* outputTree = T[0];
delete reader;
if (itTax == taxMap.end()) { //this name is not in taxonomy file, skip it
m->mothurOut((*it) + " is not in your taxonomy file. I will not include it in the consensus."); m->mothurOutEndLine();
}else{
- //add seq to tree
- phylo->addSeqToTree((*it), itTax->second);
- size++;
- }
+ if (countfile != "") {
+ int numDups = ct->getNumSeqs((*it));
+ for (int j = 0; j < numDups; j++) { phylo->addSeqToTree((*it), itTax->second); }
+ size += numDups;
+ }else{
+ //add seq to tree
+ phylo->addSeqToTree((*it), itTax->second);
+ size++;
+ } }
}
if (m->control_pressed) { delete phylo; return conTax; }
int lc = T->tree[i].getLChild();
int rc = T->tree[i].getRChild();
- TreeMap* tmap = T->getTreeMap();
+ // TreeMap* tmap = T->getTreeMap();
if (lc == -1) { //you are a leaf your only descendant is yourself
- string group = tmap->getGroup(T->tree[i].getName());
+ vector<string> groups = T->tree[i].getGroup();
set<string> mynames; mynames.insert(T->tree[i].getName());
- names[group] = mynames; //mygroup -> me
+ for (int j = 0; j < groups.size(); j++) { names[groups[j]] = mynames; } //mygroup -> me
names["AllGroups"] = mynames;
}else{ //your descedants are the combination of your childrens descendants
names = descendants[lc];
#include "command.hpp"
#include "readtree.h"
#include "treemap.h"
+#include "counttable.h"
class ClassifyTreeCommand : public Command {
public:
void help() { m->mothurOut(getHelpString()); }
private:
- string treefile, taxonomyfile, groupfile, namefile, outputDir;
+ string treefile, taxonomyfile, groupfile, namefile, countfile, outputDir;
bool abort;
vector<string> outputNames;
int numUniquesInName, cutoff;
map<string, string> nameMap;
map<string, int> nameCount;
map<string, string> taxMap;
+ CountTable* ct;
int getClassifications(Tree*&);
map<string, set<string> > getDescendantList(Tree*&, int, map<int, map<string, set<string> > >);
vector<string> ClusterFragmentsCommand::setParameters(){
try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
CommandParameter pdiffs("diffs", "Number", "", "0", "", "", "",false,false); parameters.push_back(pdiffs);
CommandParameter ppercent("percent", "Number", "", "0", "", "", "",false,false); parameters.push_back(ppercent);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
try {
string helpString = "";
helpString += "The cluster.fragments command groups sequences that are part of a larger sequence.\n";
- helpString += "The cluster.fragments command outputs a new fasta and name file.\n";
- helpString += "The cluster.fragments command parameters are fasta, name, diffs and percent. The fasta parameter is required, unless you have a valid current file. \n";
+ helpString += "The cluster.fragments command outputs a new fasta and name or count file.\n";
+ helpString += "The cluster.fragments command parameters are fasta, name, count, diffs and percent. The fasta parameter is required, unless you have a valid current file. \n";
helpString += "The names parameter allows you to give a list of seqs that are identical. This file is 2 columns, first column is name or representative sequence, second column is a list of its identical sequences separated by commas.\n";
helpString += "The diffs parameter allows you to set the number of differences allowed, default=0. \n";
helpString += "The percent parameter allows you to set percentage of differences allowed, default=0. percent=2 means if the number of difference is less than or equal to two percent of the length of the fragment, then cluster.\n";
else {
if (type == "fasta") { outputFileName = "fragclust.fasta"; }
else if (type == "name") { outputFileName = "fragclust.names"; }
+ else if (type == "count") { outputFileName = "fragclust.count.table"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
}
return outputFileName;
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
outputTypes["name"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "ClusterFragmentsCommand", "ClusterFragmentsCommand");
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
outputTypes["name"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["name"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
//check for required parameters
if (namefile == "not found") { namefile = ""; }
else if (namefile == "not open") { namefile = ""; abort = true; }
else { readNameFile(); m->setNameFile(namefile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { abort = true; countfile = ""; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { ct.readTable(countfile); m->setCountTableFile(countfile); }
+
+ if ((countfile != "") && (namefile != "")) { m->mothurOut("When executing a cluster.fragments command you must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
string temp;
temp = validParameter.validFile(parameters, "diffs", false); if (temp == "not found"){ temp = "0"; }
temp = validParameter.validFile(parameters, "percent", false); if (temp == "not found"){ temp = "0"; }
m->mothurConvert(temp, percent);
- if (namefile == "") {
- vector<string> files; files.push_back(fastafile);
- parser.getNameFile(files);
- }
+ if (countfile == "") {
+ if (namefile == "") {
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
+ }
}
string jBases = alignSeqs[j].seq.getUnaligned();
if (isFragment(iBases, jBases)) {
- //merge
- alignSeqs[i].names += ',' + alignSeqs[j].names;
- alignSeqs[i].numIdentical += alignSeqs[j].numIdentical;
-
+ if (countfile != "") {
+ ct.mergeCounts(alignSeqs[i].names, alignSeqs[j].names);
+ }else {
+ //merge
+ alignSeqs[i].names += ',' + alignSeqs[j].names;
+ alignSeqs[i].numIdentical += alignSeqs[j].numIdentical;
+ }
alignSeqs[j].active = 0;
alignSeqs[j].numIdentical = 0;
count++;
string newFastaFile = fileroot + getOutputFileNameTag("fasta");
string newNamesFile = fileroot + getOutputFileNameTag("name");
+ if (countfile != "") { newNamesFile = fileroot + getOutputFileNameTag("count"); }
if (m->control_pressed) { return 0; }
if (itTypes != outputTypes.end()) {
if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
}
+
+ itTypes = outputTypes.find("count");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
return 0;
else{
seqRNode tempNode(itSize->second, seq, names[seq.getName()], seq.getUnaligned().length());
alignSeqs.push_back(tempNode);
- }
+ }
+ }else if(countfile != "") {
+ seqRNode tempNode(ct.getNumSeqs(seq.getName()), seq, seq.getName(), seq.getUnaligned().length());
+ alignSeqs.push_back(tempNode);
}else { //no names file, you are identical to yourself
seqRNode tempNode(1, seq, seq.getName(), seq.getUnaligned().length());
alignSeqs.push_back(tempNode);
ofstream outNames;
m->openOutputFile(newfasta, outFasta);
- m->openOutputFile(newname, outNames);
+ if (countfile == "") { m->openOutputFile(newname, outNames); }
for (int i = 0; i < alignSeqs.size(); i++) {
if (alignSeqs[i].numIdentical != 0) {
alignSeqs[i].seq.printSequence(outFasta);
- outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl;
+ if (countfile == "") { outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl; }
}
}
outFasta.close();
- outNames.close();
+ if (countfile == "") { outNames.close(); }
+ else { ct.printTable(newname); }
}
catch(exception& e) {
m->errorOut(e, "ClusterFragmentsCommand", "printData");
exit(1);
}
}
-
/**************************************************************************************************/
#include "command.hpp"
#include "sequence.hpp"
+#include "counttable.h"
/************************************************************/
struct seqRNode {
void help() { m->mothurOut(getHelpString()); }
private:
+ CountTable ct;
bool abort;
- string fastafile, namefile, outputDir;
+ string fastafile, namefile, countfile, outputDir;
int diffs, percent;
vector<seqRNode> alignSeqs;
map<string, string> names; //represents the names file first column maps to second column
if (m->control_pressed) { return 0; }
- consensusTree = new Tree(t[0]->getTreeMap());
+ consensusTree = new Tree(t[0]->getCountTable());
it2 = nodePairs.find(treeSet);
if (m->control_pressed) { delete consensusTree; return 0; }
- map<string, string> empty;
- consensusTree->assembleTree(empty);
+ consensusTree->assembleTree();
if (m->control_pressed) { delete consensusTree; return 0; }
vector<string> ConsensusSeqsCommand::setParameters(){
try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(plist);
CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
CommandParameter pcutoff("cutoff", "Number", "", "100", "", "", "",false,false); parameters.push_back(pcutoff);
try {
string helpString = "";
helpString += "The consensus.seqs command can be used in 2 ways: create a consensus sequence from a fastafile, or with a listfile create a consensus sequence for each otu. Sequences must be aligned.\n";
- helpString += "The consensus.seqs command parameters are fasta, list, name, cutoff and label.\n";
+ helpString += "The consensus.seqs command parameters are fasta, list, name, count, cutoff and label.\n";
helpString += "The fasta parameter allows you to enter the fasta file containing your sequences, and is required, unless you have a valid current fasta file. \n";
helpString += "The list parameter allows you to enter a your list file. \n";
helpString += "The name parameter allows you to enter a names file associated with the fasta file. \n";
else {
if (type == "fasta") { outputFileName = "cons.fasta"; }
else if (type == "name") { outputFileName = "cons.names"; }
+ else if (type == "count") { outputFileName = "cons.count.table"; }
else if (type == "summary") { outputFileName = "cons.summary"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
}
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
outputTypes["name"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
outputTypes["summary"] = tempOutNames;
}
catch(exception& e) {
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
outputTypes["name"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
outputTypes["summary"] = tempOutNames;
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["list"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { abort = true; countfile = ""; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((countfile != "") && (namefile != "")) { m->mothurOut("You must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+
listfile = validParameter.validFile(parameters, "list", true);
if (listfile == "not open") { abort = true; }
else if (listfile == "not found") { listfile = ""; }
//if the user changes the output directory command factory will send this info to us in the output parameter
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(fastafile); }
- if (namefile == ""){
- vector<string> files; files.push_back(fastafile);
- parser.getNameFile(files);
- }
+ if (countfile == "") {
+ if (namefile == ""){
+ vector<string> files; files.push_back(fastafile);
+ parser.getNameFile(files);
+ }
+ }
}
}
catch(exception& e) {
if (m->control_pressed) { return 0; }
if (namefile != "") { readNames(); }
+ if (countfile != "") { ct.readTable(countfile); }
if (m->control_pressed) { return 0; }
string outputFastaFile = outputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta");
m->openOutputFile(outputFastaFile, outFasta);
outputNames.push_back(outputFastaFile); outputTypes["fasta"].push_back(outputFastaFile);
-
- vector<string> seqs;
- int seqLength = 0;
- for (map<string, string>::iterator it = nameMap.begin(); it != nameMap.end(); it++) {
-
- if (m->control_pressed) { outSummary.close(); outFasta.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
-
- string seq = fastaMap[it->second];
- seqs.push_back(seq);
-
- if (seqLength == 0) { seqLength = seq.length(); }
- else if (seqLength != seq.length()) { m->mothurOut("[ERROR]: sequence are not the same length, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
-
- }
-
+
vector< vector<float> > percentages; percentages.resize(5);
for (int j = 0; j < percentages.size(); j++) { percentages[j].resize(seqLength, 0.0); }
string consSeq = "";
+ int thisCount;
//get counts
for (int j = 0; j < seqLength; j++) {
vector<int> counts; counts.resize(5, 0); //A,T,G,C,Gap
int numDots = 0;
-
- for (int i = 0; i < seqs.size(); i++) {
+ thisCount = 0;
+ for (map<string, string>::iterator it = fastaMap.begin(); it != fastaMap.end(); it++) {
- if (seqs[i][j] == '.') { numDots++; }
-
- char base = toupper(seqs[i][j]);
- if (base == 'A') { counts[0]++; }
- else if (base == 'T') { counts[1]++; }
- else if (base == 'G') { counts[2]++; }
- else if (base == 'C') { counts[3]++; }
- else { counts[4]++; }
+ string thisSeq = it->second;
+ int size = 0;
+
+ if (countfile != "") { size = ct.getNumSeqs(it->first); }
+ else {
+ map<string, int>::iterator itCount = nameFileMap.find(it->first);
+ if (itCount != nameFileMap.end()) {
+ size = itCount->second;
+ }else { m->mothurOut("[ERROR]: file mismatch, aborting.\n"); m->control_pressed = true; break; }
+ }
+
+ for (int k = 0; k < size; k++) {
+ if (thisSeq[j] == '.') { numDots++; }
+
+ char base = toupper(thisSeq[j]);
+ if (base == 'A') { counts[0]++; }
+ else if (base == 'T') { counts[1]++; }
+ else if (base == 'G') { counts[2]++; }
+ else if (base == 'C') { counts[3]++; }
+ else { counts[4]++; }
+ thisCount++;
+ }
}
char conBase = '.';
- if (numDots != seqs.size()) { conBase = getBase(counts, seqs.size()); }
+ if (numDots != thisCount) { conBase = getBase(counts, thisCount); }
consSeq += conBase;
- percentages[0][j] = counts[0] / (float) seqs.size();
- percentages[1][j] = counts[1] / (float) seqs.size();
- percentages[2][j] = counts[2] / (float) seqs.size();
- percentages[3][j] = counts[3] / (float) seqs.size();
- percentages[4][j] = counts[4] / (float) seqs.size();
-
+ percentages[0][j] = counts[0] / (float) thisCount;
+ percentages[1][j] = counts[1] / (float) thisCount;
+ percentages[2][j] = counts[2] / (float) thisCount;
+ percentages[3][j] = counts[3] / (float) thisCount;
+ percentages[4][j] = counts[4] / (float) thisCount;
}
for (int j = 0; j < seqLength; j++) {
- outSummary << (j+1) << '\t' << percentages[0][j] << '\t'<< percentages[1][j] << '\t'<< percentages[2][j] << '\t' << percentages[3][j] << '\t' << percentages[4][j] << '\t' << seqs.size() << '\t' << consSeq[j] << endl;
+ outSummary << (j+1) << '\t' << percentages[0][j] << '\t'<< percentages[1][j] << '\t'<< percentages[2][j] << '\t' << percentages[3][j] << '\t' << percentages[4][j] << '\t' << thisCount << '\t' << consSeq[j] << endl;
}
outFasta << ">conseq" << endl << consSeq << endl;
outSummary.close(); outFasta.close();
-
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
}else {
if (m->control_pressed) { outSummary.close(); outName.close(); outFasta.close(); return 0; }
string bin = list->get(i);
-
- string newName = "";
- string consSeq = getConsSeq(bin, outSummary, newName, i);
+ string consSeq = getConsSeq(bin, outSummary, i);
outFasta << ">seq" << (i+1) << endl << consSeq << endl;
- outName << "seq" << (i+1) << '\t' << "seq" << (i+1) << "," << newName << endl;
+ outName << "seq" << (i+1) << '\t' << "seq" << (i+1) << "," << bin << endl;
}
outSummary.close(); outName.close(); outFasta.close();
}
//***************************************************************************************************************
-//made this smart enough to owrk with unique or non unique list file
-string ConsensusSeqsCommand::getConsSeq(string bin, ofstream& outSummary, string& name, int binNumber){
+string ConsensusSeqsCommand::getConsSeq(string bin, ofstream& outSummary, int binNumber){
try{
string consSeq = "";
bool error = false;
-
- //the whole bin is the second column if no names file, otherwise build it
- name = bin;
- if (namefile != "") { name = ""; }
-
+ int totalSize=0;
+
vector<string> binNames;
m->splitAtComma(bin, binNames);
-
- //get sequence strings for each name in the bin
- vector<string> seqs;
-
- set<string> addedAlready;
- int seqLength = 0;
- for (int i = 0; i < binNames.size(); i++) {
-
- map<string, string>::iterator it;
-
- it = nameMap.find(binNames[i]);
- if (it == nameMap.end()) {
- if (namefile == "") { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your fasta file, please correct."); m->mothurOutEndLine(); error = true; }
- else { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your fasta or name file, please correct."); m->mothurOutEndLine(); error = true; }
- break;
- }else {
-
- //add sequence string to seqs vector to process below
- string seq = fastaMap[it->second];
- seqs.push_back(seq);
-
- if (seqLength == 0) { seqLength = seq.length(); }
- else if (seqLength != seq.length()) { m->mothurOut("[ERROR]: sequence are not the same length, please correct."); m->mothurOutEndLine(); error = true; break; }
-
- if (namefile != "") {
- //did we add this line from name file already?
- if (addedAlready.count(it->second) == 0) {
- name += "," + nameFileMap[it->second];
- addedAlready.insert(it->second);
- }
- }
-
- }
- }
-
- if (error) { m->control_pressed = true; return consSeq; }
-
- if (namefile != "") { name = name.substr(1); }
-
- vector< vector<float> > percentages; percentages.resize(5);
+
+ vector< vector<float> > percentages; percentages.resize(5);
for (int j = 0; j < percentages.size(); j++) { percentages[j].resize(seqLength, 0.0); }
-
- //get counts
- for (int j = 0; j < seqLength; j++) {
-
- if (m->control_pressed) { return consSeq; }
-
- vector<int> counts; counts.resize(5, 0); //A,T,G,C,Gap
- int numDots = 0;
-
- for (int i = 0; i < seqs.size(); i++) {
-
- if (seqs[i][j] == '.') { numDots++; }
-
- char base = toupper(seqs[i][j]);
- if (base == 'A') { counts[0]++; }
- else if (base == 'T') { counts[1]++; }
- else if (base == 'G') { counts[2]++; }
- else if (base == 'C') { counts[3]++; }
- else { counts[4]++; }
- }
-
- char conBase = '.';
- if (numDots != seqs.size()) { conBase = getBase(counts, seqs.size()); }
-
- consSeq += conBase;
-
- percentages[0][j] = counts[0] / (float) seqs.size();
- percentages[1][j] = counts[1] / (float) seqs.size();
- percentages[2][j] = counts[2] / (float) seqs.size();
- percentages[3][j] = counts[3] / (float) seqs.size();
- percentages[4][j] = counts[4] / (float) seqs.size();
-
+
+ if (countfile != "") {
+ //get counts
+ for (int j = 0; j < seqLength; j++) {
+
+ if (m->control_pressed) { return consSeq; }
+
+ vector<int> counts; counts.resize(5, 0); //A,T,G,C,Gap
+ int numDots = 0;
+ totalSize = 0;
+ for (int i = 0; i < binNames.size(); i++) {
+ if (m->control_pressed) { return consSeq; }
+
+ string thisSeq = "";
+ map<string, string>::iterator itFasta = fastaMap.find(binNames[i]);
+ if (itFasta != fastaMap.end()) {
+ thisSeq = itFasta->second;
+ }else { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your fasta file, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
+
+ int size = ct.getNumSeqs(binNames[i]);
+ if (size != 0) {
+ for (int k = 0; k < size; k++) {
+ if (thisSeq[j] == '.') { numDots++; }
+
+ char base = toupper(thisSeq[j]);
+ if (base == 'A') { counts[0]++; }
+ else if (base == 'T') { counts[1]++; }
+ else if (base == 'G') { counts[2]++; }
+ else if (base == 'C') { counts[3]++; }
+ else { counts[4]++; }
+ totalSize++;
+ }
+ }else { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your count file, please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
+ }
+ char conBase = '.';
+ if (numDots != totalSize) { conBase = getBase(counts, totalSize); }
+
+ consSeq += conBase;
+
+ percentages[0][j] = counts[0] / (float) totalSize;
+ percentages[1][j] = counts[1] / (float) totalSize;
+ percentages[2][j] = counts[2] / (float) totalSize;
+ percentages[3][j] = counts[3] / (float) totalSize;
+ percentages[4][j] = counts[4] / (float) totalSize;
+ }
+
+ }else {
+
+ //get sequence strings for each name in the bin
+ vector<string> seqs;
+ for (int i = 0; i < binNames.size(); i++) {
+
+ map<string, string>::iterator it;
+ it = nameMap.find(binNames[i]);
+ if (it == nameMap.end()) {
+ if (namefile == "") { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your fasta file, please correct."); m->mothurOutEndLine(); error = true; }
+ else { m->mothurOut("[ERROR]: " + binNames[i] + " is not in your fasta or name file, please correct."); m->mothurOutEndLine(); error = true; }
+ break;
+ }else {
+ //add sequence string to seqs vector to process below
+ map<string, string>::iterator itFasta = fastaMap.find(it->second);
+
+ if (itFasta != fastaMap.end()) {
+ string seq = itFasta->second;
+ seqs.push_back(seq);
+ }else { m->mothurOut("[ERROR]: file mismatch, aborting. \n"); }
+ }
+ }
+
+ if (error) { m->control_pressed = true; return consSeq; }
+ totalSize = seqs.size();
+ //get counts
+ for (int j = 0; j < seqLength; j++) {
+
+ if (m->control_pressed) { return consSeq; }
+
+ vector<int> counts; counts.resize(5, 0); //A,T,G,C,Gap
+ int numDots = 0;
+
+ for (int i = 0; i < seqs.size(); i++) {
+
+ if (seqs[i][j] == '.') { numDots++; }
+
+ char base = toupper(seqs[i][j]);
+ if (base == 'A') { counts[0]++; }
+ else if (base == 'T') { counts[1]++; }
+ else if (base == 'G') { counts[2]++; }
+ else if (base == 'C') { counts[3]++; }
+ else { counts[4]++; }
+ }
+
+ char conBase = '.';
+ if (numDots != seqs.size()) { conBase = getBase(counts, seqs.size()); }
+
+ consSeq += conBase;
+
+ percentages[0][j] = counts[0] / (float) seqs.size();
+ percentages[1][j] = counts[1] / (float) seqs.size();
+ percentages[2][j] = counts[2] / (float) seqs.size();
+ percentages[3][j] = counts[3] / (float) seqs.size();
+ percentages[4][j] = counts[4] / (float) seqs.size();
+
+ }
}
-
+
+
+
for (int j = 0; j < seqLength; j++) {
- outSummary << (binNumber + 1) << '\t' << (j+1) << '\t' << percentages[0][j] << '\t'<< percentages[1][j] << '\t'<< percentages[2][j] << '\t' << percentages[3][j] << '\t' << percentages[4][j] << '\t' << seqs.size() << '\t' << consSeq[j] << endl;
+ outSummary << (binNumber + 1) << '\t' << (j+1) << '\t' << percentages[0][j] << '\t'<< percentages[1][j] << '\t'<< percentages[2][j] << '\t' << percentages[3][j] << '\t' << percentages[4][j] << '\t' << totalSize << '\t' << consSeq[j] << endl;
}
return consSeq;
ifstream in;
m->openInputFile(fastafile, in);
-
+ seqLength = 0;
+
while (!in.eof()) {
if (m->control_pressed) { break; }
if (name != "") {
fastaMap[name] = seq.getAligned();
nameMap[name] = name; //set nameMap incase no names file
- nameFileMap[name] = name;
+ nameFileMap[name] = 1;
+
+ if (seqLength == 0) { seqLength = seq.getAligned().length(); }
+ else if (seqLength != seq.getAligned().length()) { m->mothurOut("[ERROR]: sequence are not the same length, please correct."); m->mothurOutEndLine(); m->control_pressed = true; break; }
}
}
it = nameMap.find(thisname);
if (it != nameMap.end()) { //then this sequence was in the fastafile
- nameFileMap[thisname] = repnames; //for later when outputting the new namesFile if the list file is unique
+ nameFileMap[thisname] = m->getNumNames(repnames); //for later when outputting the new namesFile if the list file is unique
vector<string> splitRepNames;
m->splitAtComma(repnames, splitRepNames);
#include "command.hpp"
#include "listvector.hpp"
+#include "counttable.h"
class ConsensusSeqsCommand : public Command {
public:
private:
+ CountTable ct;
bool abort, allLines;
- string fastafile, listfile, namefile, label, outputDir;
+ string fastafile, listfile, namefile, countfile, label, outputDir;
set<string> labels;
vector<string> outputNames;
map<string, string> fastaMap;
map<string, string> nameMap;
- map<string, string> nameFileMap;
- int cutoff;
+ map<string, int> nameFileMap;
+ int cutoff, seqLength;
int readFasta();
int readNames();
int processList(ListVector*&);
- string getConsSeq(string, ofstream&, string&, int);
+ string getConsSeq(string, ofstream&, int);
char getBase(vector<int>, int);
};
#include "countseqscommand.h"
#include "groupmap.h"
#include "sharedutilities.h"
+#include "counttable.h"
//**********************************************************************************************************************
vector<string> CountSeqsCommand::setParameters(){
int total = 0;
if (!large) { total = processSmall(outputFileName); }
else { total = processLarge(outputFileName); }
-
+
if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
//set rabund file as new current rabundfile
#include "counttable.h"
+/************************************************************/
+int CountTable::createTable(set<string>& n, map<string, string>& g, set<string>& gs) {
+ try {
+ int numGroups = 0;
+ groups.clear();
+ totalGroups.clear();
+ indexGroupMap.clear();
+ indexNameMap.clear();
+ counts.clear();
+ for (set<string>::iterator it = gs.begin(); it != gs.end(); it++) { groups.push_back(*it); hasGroups = true; }
+ numGroups = groups.size();
+ totalGroups.resize(numGroups, 0);
+
+ //sort groups to keep consistent with how we store the groups in groupmap
+ sort(groups.begin(), groups.end());
+ for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; }
+ m->setAllGroups(groups);
+
+ uniques = 0;
+ total = 0;
+ for (set<string>::iterator it = n.begin(); it != n.end(); it++) {
+
+ if (m->control_pressed) { break; }
+
+ string seqName = *it;
+
+ vector<int> groupCounts; groupCounts.resize(numGroups, 0);
+ map<string, string>::iterator itGroup = g.find(seqName);
+
+ if (itGroup != g.end()) {
+ groupCounts[indexGroupMap[itGroup->second]] = 1;
+ totalGroups[indexGroupMap[itGroup->second]]++;
+ }else { m->mothurOut("[ERROR]: Your group file does not contain " + seqName + ". Please correct."); m->mothurOutEndLine(); }
+
+ map<string, int>::iterator it2 = indexNameMap.find(seqName);
+ if (it2 == indexNameMap.end()) {
+ if (hasGroups) { counts.push_back(groupCounts); }
+ indexNameMap[seqName] = uniques;
+ totals.push_back(1);
+ total++;
+ uniques++;
+ }
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "createTable");
+ exit(1);
+ }
+}
/************************************************************/
bool CountTable::testGroups(string file) {
try {
}
}
/************************************************************/
+int CountTable::createTable(string namefile, string groupfile, bool createGroup) {
+ try {
+
+ if (namefile == "") { m->mothurOut("[ERROR]: namefile cannot be blank when creating a count table.\n"); m->control_pressed = true; }
+
+ GroupMap* groupMap;
+ int numGroups = 0;
+ groups.clear();
+ totalGroups.clear();
+ indexGroupMap.clear();
+ indexNameMap.clear();
+ counts.clear();
+ map<int, string> originalGroupIndexes;
+
+ if (groupfile != "") {
+ hasGroups = true;
+ groupMap = new GroupMap(groupfile); groupMap->readMap();
+ numGroups = groupMap->getNumGroups();
+ groups = groupMap->getNamesOfGroups();
+ totalGroups.resize(numGroups, 0);
+ }else if(createGroup) {
+ hasGroups = true;
+ numGroups = 1;
+ groups.push_back("Group1");
+ totalGroups.resize(numGroups, 0);
+ }
+ //sort groups to keep consistent with how we store the groups in groupmap
+ sort(groups.begin(), groups.end());
+ for (int i = 0; i < groups.size(); i++) { indexGroupMap[groups[i]] = i; }
+ m->setAllGroups(groups);
+
+ bool error = false;
+ string name;
+ uniques = 0;
+ total = 0;
+
+
+ //open input file
+ ifstream in;
+ m->openInputFile(namefile, in);
+
+ int total = 0;
+ while (!in.eof()) {
+ if (m->control_pressed) { break; }
+
+ string firstCol, secondCol;
+ in >> firstCol; m->gobble(in); in >> secondCol; m->gobble(in);
+
+ vector<string> names;
+ m->splitAtChar(secondCol, names, ',');
+
+ map<string, int> groupCounts;
+ int thisTotal = 0;
+ if (groupfile != "") {
+ //set to 0
+ for (int i = 0; i < groups.size(); i++) { groupCounts[groups[i]] = 0; }
+
+ //get counts for each of the users groups
+ for (int i = 0; i < names.size(); i++) {
+ string group = groupMap->getGroup(names[i]);
+
+ if (group == "not found") { m->mothurOut("[ERROR]: " + names[i] + " is not in your groupfile, please correct."); m->mothurOutEndLine(); error=true; }
+ else {
+ map<string, int>::iterator it = groupCounts.find(group);
+
+ //if not found, then this sequence is not from a group we care about
+ if (it != groupCounts.end()) {
+ it->second++;
+ thisTotal++;
+ }
+ }
+ }
+ }else if (createGroup) {
+ groupCounts["Group1"]=0;
+ for (int i = 0; i < names.size(); i++) {
+ string group = "Group1";
+ groupCounts["Group1"]++; thisTotal++;
+ }
+ }else { thisTotal = names.size(); }
+
+ //if group info, then read it
+ vector<int> thisGroupsCount; thisGroupsCount.resize(numGroups, 0);
+ for (int i = 0; i < numGroups; i++) {
+ thisGroupsCount[i] = groupCounts[groups[i]];
+ totalGroups[i] += thisGroupsCount[i];
+ }
+
+ map<string, int>::iterator it = indexNameMap.find(firstCol);
+ if (it == indexNameMap.end()) {
+ if (hasGroups) { counts.push_back(thisGroupsCount); }
+ indexNameMap[firstCol] = uniques;
+ totals.push_back(thisTotal);
+ total += thisTotal;
+ uniques++;
+ }else {
+ error = true;
+ m->mothurOut("[ERROR]: Your count table contains more than 1 sequence named " + firstCol + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();
+ }
+ }
+ in.close();
+
+ if (error) { m->control_pressed = true; }
+ if (groupfile != "") { delete groupMap; }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "createTable");
+ exit(1);
+ }
+}
+/************************************************************/
int CountTable::readTable(string file) {
try {
filename = file;
}
}
/************************************************************/
+int CountTable::printTable(string file) {
+ try {
+ ofstream out;
+ m->openOutputFile(file, out);
+ out << "Representative_Sequence\ttotal\t";
+ for (int i = 0; i < groups.size(); i++) { out << groups[i] << '\t'; }
+ out << endl;
+
+ for (map<string, int>::iterator itNames = indexNameMap.begin(); itNames != indexNameMap.end(); itNames++) {
+ out << itNames->first << '\t' << totals[itNames->second] << '\t';
+ if (hasGroups) {
+
+ for (int i = 0; i < groups.size(); i++) {
+ out << counts[itNames->second][i] << '\t';
+ }
+ }
+ out << endl;
+ }
+ out.close();
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "printTable");
+ exit(1);
+ }
+}
+/************************************************************/
+int CountTable::printHeaders(ofstream& out) {
+ try {
+ out << "Representative_Sequence\ttotal\t";
+ for (int i = 0; i < groups.size(); i++) { out << groups[i] << '\t'; }
+ out << endl;
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "printHeaders");
+ exit(1);
+ }
+}
+/************************************************************/
+int CountTable::printSeq(ofstream& out, string seqName) {
+ try {
+ map<string, int>::iterator it = indexNameMap.find(seqName);
+ if (it == indexNameMap.end()) {
+ m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else {
+ out << it->first << '\t' << totals[it->second] << '\t';
+ if (hasGroups) {
+ for (int i = 0; i < groups.size(); i++) {
+ out << counts[it->second][i] << '\t';
+ }
+ }
+ out << endl;
+ }
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "printSeq");
+ exit(1);
+ }
+}
+/************************************************************/
//group counts for a seq
vector<int> CountTable::getGroupCounts(string seqName) {
try {
exit(1);
}
}
+/************************************************************/
+//set the number of sequences for the seq for the group
+int CountTable::setAbund(string seqName, string groupName, int num) {
+ try {
+ if (hasGroups) {
+ map<string, int>::iterator it = indexGroupMap.find(groupName);
+ if (it == indexGroupMap.end()) {
+ m->mothurOut("[ERROR]: " + groupName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else {
+ map<string, int>::iterator it2 = indexNameMap.find(seqName);
+ if (it2 == indexNameMap.end()) {
+ m->mothurOut("[ERROR]: " + seqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else {
+ int oldCount = counts[it2->second][it->second];
+ counts[it2->second][it->second] = num;
+ totalGroups[it->second] += (num - oldCount);
+ total += (num - oldCount);
+ totals[it2->second] += (num - oldCount);
+ }
+ }
+ }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "set");
+ exit(1);
+ }
+}
+/************************************************************/
+//add group
+int CountTable::addGroup(string groupName) {
+ try {
+ bool sanity = m->inUsersGroups(groupName, groups);
+ if (sanity) { m->mothurOut("[ERROR]: " + groupName + " is already in the count table, cannot add again.\n"); m->control_pressed = true; return 0; }
+
+ groups.push_back(groupName);
+ if (!hasGroups) { counts.resize(uniques); }
+
+ for (int i = 0; i < counts.size(); i++) { counts[i].push_back(0); }
+ totalGroups.push_back(0);
+ indexGroupMap[groupName] = groups.size()-1;
+ map<string, int> originalGroupMap = indexGroupMap;
+
+ //important to play well with others, :)
+ sort(groups.begin(), groups.end());
+
+ //fix indexGroupMap && totalGroups
+ vector<int> newTotals; newTotals.resize(groups.size(), 0);
+ for (int i = 0; i < groups.size(); i++) {
+ indexGroupMap[groups[i]] = i;
+ //find original spot of group[i]
+ int index = originalGroupMap[groups[i]];
+ newTotals[i] = totalGroups[index];
+ }
+ totalGroups = newTotals;
+
+ //fix counts vectors
+ for (int i = 0; i < counts.size(); i++) {
+ vector<int> newCounts; newCounts.resize(groups.size(), 0);
+ for (int j = 0; j < groups.size(); j++) {
+ //find original spot of group[i]
+ int index = originalGroupMap[groups[j]];
+ newCounts[j] = counts[i][index];
+ }
+ counts[i] = newCounts;
+ }
+ hasGroups = true;
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "addGroup");
+ exit(1);
+ }
+}
+/************************************************************/
+//vector of groups for the seq
+vector<string> CountTable::getGroups(string seqName) {
+ try {
+ vector<string> thisGroups;
+ if (hasGroups) {
+ vector<int> thisCounts = getGroupCounts(seqName);
+ for (int i = 0; i < thisCounts.size(); i++) {
+ if (thisCounts[i] != 0) { thisGroups.push_back(groups[i]); }
+ }
+ }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
+
+ return thisGroups;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "getGroups");
+ exit(1);
+ }
+}
+/************************************************************/
+//total number of seqs represented by seq
+int CountTable::renameSeq(string oldSeqName, string newSeqName) {
+ try {
+
+ map<string, int>::iterator it = indexNameMap.find(oldSeqName);
+ if (it == indexNameMap.end()) {
+ m->mothurOut("[ERROR]: " + oldSeqName + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else {
+ int index = it->second;
+ indexNameMap.erase(it);
+ indexNameMap[newSeqName] = index;
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "renameSeq");
+ exit(1);
+ }
+}
+
/************************************************************/
//total number of seqs represented by seq
int CountTable::getNumSeqs(string seqName) {
}
}
/************************************************************/
+//remove sequence
+int CountTable::remove(string seqName) {
+ try {
+ map<string, int>::iterator it = indexNameMap.find(seqName);
+ if (it == indexNameMap.end()) {
+ uniques--;
+ if (hasGroups){ //remove this sequences counts from group totals
+ for (int i = 0; i < totalGroups.size(); i++) { totalGroups[i] -= counts[it->second][i]; counts[it->second][i] = 0; }
+ }
+ int thisTotal = totals[it->second]; totals[it->second] = 0;
+ total -= thisTotal;
+ indexNameMap.erase(it);
+ }else {
+ m->mothurOut("[ERROR]: Your count table contains does not include " + seqName + ", cannot remove."); m->mothurOutEndLine(); m->control_pressed = true;
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "push_back");
+ exit(1);
+ }
+}
+/************************************************************/
//add seqeunce without group info
int CountTable::push_back(string seqName, int thisTotal) {
try {
if ((hasGroups) && (groupCounts.size() != getNumGroups())) { m->mothurOut("[ERROR]: Your count table has a " + toString(getNumGroups()) + " groups and " + seqName + " has " + toString(groupCounts.size()) + ", please correct."); m->mothurOutEndLine(); m->control_pressed = true; }
int thisTotal = 0;
for (int i = 0; i < getNumGroups(); i++) { totalGroups[i] += groupCounts[i]; thisTotal += groupCounts[i]; }
+ if (hasGroups) { counts.push_back(groupCounts); }
indexNameMap[seqName] = uniques;
totals.push_back(thisTotal);
total+= thisTotal;
}
}
/************************************************************/
-//returns names of seqs
+//returns the names of all unique sequences in file
+vector<string> CountTable::getNamesOfSeqs(string group) {
+ try {
+ vector<string> names;
+ if (hasGroups) {
+ map<string, int>::iterator it = indexGroupMap.find(group);
+ if (it == indexGroupMap.end()) {
+ m->mothurOut("[ERROR]: " + group + " is not in your count table. Please correct.\n"); m->control_pressed = true;
+ }else {
+ for (map<string, int>::iterator it2 = indexNameMap.begin(); it2 != indexNameMap.end(); it2++) {
+ if (counts[it2->second][it->second] != 0) { names.push_back(it2->first); }
+ }
+ }
+ }else{ m->mothurOut("[ERROR]: Your count table does not have group info. Please correct.\n"); m->control_pressed = true; }
+
+ return names;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "getNamesOfSeqs");
+ exit(1);
+ }
+}
+/************************************************************/
+//merges counts of seq1 and seq2, saving in seq1
int CountTable::mergeCounts(string seq1, string seq2) {
try {
map<string, int>::iterator it = indexNameMap.find(seq1);
m->mothurOut("[ERROR]: " + seq2 + " is not in your count table. Please correct.\n"); m->control_pressed = true;
}else {
//merge data
- for (int i = 0; i < groups.size(); i++) {
- counts[it->second][i] += counts[it2->second][i];
- counts[it2->second][i] = 0;
- }
+ for (int i = 0; i < groups.size(); i++) { counts[it->second][i] += counts[it2->second][i]; }
totals[it->second] += totals[it2->second];
- totals[it2->second] = 0;
uniques--;
indexNameMap.erase(it2);
}
}
-
return 0;
}
catch(exception& e) {
exit(1);
}
}
+/************************************************************/
+int CountTable::copy(CountTable* ct) {
+ try {
+ vector<string> thisGroups = ct->getNamesOfGroups();
+ for (int i = 0; i < thisGroups.size(); i++) { addGroup(thisGroups[i]); }
+ vector<string> names = ct->getNamesOfSeqs();
+
+ for (int i = 0; i < names.size(); i++) {
+ vector<int> thisCounts = ct->getGroupCounts(names[i]);
+ push_back(names[i], thisCounts);
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CountTable", "copy");
+ exit(1);
+ }
+}
/************************************************************/
#include "mothurout.h"
#include "listvector.hpp"
+#include "groupmap.h"
class CountTable {
public:
- CountTable() { m = MothurOut::getInstance(); hasGroups = false; total = 0; }
+ CountTable() { m = MothurOut::getInstance(); hasGroups = false; total = 0; uniques = 0; }
~CountTable() {}
- int readTable(string);
+ int createTable(set<string>&, map<string, string>&, set<string>&); //seqNames, seqName->group, groupNames
+ int createTable(string, string, bool); //namefile, groupfile, createGroup
+ int readTable(string);
+ int printTable(string);
+ int printHeaders(ofstream&);
+ int printSeq(ofstream&, string);
bool testGroups(string file); //used to check if file has group data without reading it.
+ int copy(CountTable*);
bool hasGroupInfo() { return hasGroups; }
int getNumGroups() { return groups.size(); }
vector<string> getNamesOfGroups() { return groups; } //returns group names, if no group info vector is blank.
+ int addGroup(string);
+ int renameSeq(string, string); //used to change name of sequence for use with trees
+ int setAbund(string, string, int); //set abundance number of seqs for that group for that seq
int push_back(string); //add a sequence
int push_back(string, int); //add a sequence
int push_back(string, vector<int>); //add a sequence with group info
+ int remove(string); //remove seq
int get(string); //returns unique sequence index for reading distance matrices like NameAssignment
int size() { return indexNameMap.size(); }
+ vector<string> getGroups(string); //returns vector of groups represented by this sequences
vector<int> getGroupCounts(string); //returns group counts for a seq passed in, if no group info is in file vector is blank. Order is the same as the groups returned by getGroups function.
int getGroupCount(string, string); //returns number of seqs for that group for that seq
int getGroupCount(string); // returns total seqs for that group
- int getNumSeqs(string); //returns total seqs for that seq
+ int getNumSeqs(string); //returns total seqs for that seq, 0 if not found
int getNumSeqs() { return total; } //return total number of seqs
int getNumUniqueSeqs() { return uniques; } //return number of unique/representative seqs
int getGroupIndex(string); //returns index in getGroupCounts vector of specific group
vector<string> getNamesOfSeqs();
+ vector<string> getNamesOfSeqs(string);
int mergeCounts(string, string); //combines counts for 2 seqs, saving under the first name passed in.
ListVector getListVector();
vector<string> DeconvoluteCommand::setParameters(){
try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pname("name", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "namecount", "none", "none",false,false); parameters.push_back(pcount);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
string DeconvoluteCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The unique.seqs command reads a fastafile and creates a namesfile.\n";
+ helpString += "The unique.seqs command reads a fastafile and creates a name or count file.\n";
helpString += "It creates a file where the first column is the groupname and the second column is a list of sequence names who have the same sequence. \n";
helpString += "If the sequence is unique the second column will just contain its name. \n";
helpString += "The unique.seqs command parameters are fasta and name. fasta is required, unless there is a valid current fasta file.\n";
else {
if (type == "fasta") { outputFileName = "unique" + m->getExtension(inputName); }
else if (type == "name") { outputFileName = "names"; }
+ else if (type == "count") { outputFileName = "count.table"; }
else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
}
return outputFileName;
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
outputTypes["name"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
}
catch(exception& e) {
m->errorOut(e, "DeconvoluteCommand", "DeconvoluteCommand");
vector<string> tempOutNames;
outputTypes["fasta"] = tempOutNames;
outputTypes["name"] = tempOutNames;
+ outputTypes["count"] = tempOutNames;
//if the user changes the input directory command factory will send this info to us in the output parameter
string inputDir = validParameter.validFile(parameters, "inputdir", false);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["name"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
if (oldNameMapFName == "not open") { oldNameMapFName = ""; abort = true; }
else if (oldNameMapFName == "not found"){ oldNameMapFName = ""; }
else { m->setNameFile(oldNameMapFName); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { abort = true; countfile = ""; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
- if (oldNameMapFName == "") {
- vector<string> files; files.push_back(inFastaName);
- parser.getNameFile(files);
- }
+ if ((countfile != "") && (oldNameMapFName != "")) { m->mothurOut("When executing a unique.seqs command you must enter ONLY ONE of the following: count or name."); m->mothurOutEndLine(); abort = true; }
+
+
+ if (countfile == "") {
+ if (oldNameMapFName == "") {
+ vector<string> files; files.push_back(inFastaName);
+ parser.getNameFile(files);
+ }
+ }
}
//prepare filenames and open files
string outNameFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + getOutputFileNameTag("name");
+ string outCountFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + getOutputFileNameTag("count");
string outFastaFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + getOutputFileNameTag("fasta", inFastaName);
map<string, string> nameMap;
m->readNames(oldNameMapFName, nameMap);
if (oldNameMapFName == outNameFile){ outNameFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + "unique." + getOutputFileNameTag("name"); }
}
+ CountTable ct;
+ if (countfile != "") {
+ ct.readTable(countfile);
+ if (countfile == outCountFile){ outCountFile = outputDir + m->getRootName(m->getSimpleName(inFastaName)) + "unique." + getOutputFileNameTag("count"); }
+ }
if (m->control_pressed) { return 0; }
sequenceStrings[seq.getAligned()] = itNames->second;
nameFileOrder.push_back(seq.getAligned());
}
- }else { sequenceStrings[seq.getAligned()] = seq.getName(); nameFileOrder.push_back(seq.getAligned()); }
+ }else if (countfile != "") {
+ ct.getNumSeqs(seq.getName()); //checks to make sure seq is in table
+ sequenceStrings[seq.getAligned()] = seq.getName(); nameFileOrder.push_back(seq.getAligned());
+ }else { sequenceStrings[seq.getAligned()] = seq.getName(); nameFileOrder.push_back(seq.getAligned()); }
}else { //this is a dup
if (oldNameMapFName != "") {
itNames = nameMap.find(seq.getName());
}else {
sequenceStrings[seq.getAligned()] += "," + itNames->second;
}
- }else { sequenceStrings[seq.getAligned()] += "," + seq.getName(); }
+ }else if (countfile != "") {
+ int num = ct.getNumSeqs(seq.getName()); //checks to make sure seq is in table
+ if (num != 0) { //its in the table
+ ct.mergeCounts(itStrings->second, seq.getName()); //merges counts and saves in uniques name
+ }
+ }else { sequenceStrings[seq.getAligned()] += "," + seq.getName(); }
}
count++;
//print new names file
ofstream outNames;
- m->openOutputFile(outNameFile, outNames);
+ if (countfile == "") { m->openOutputFile(outNameFile, outNames); outputNames.push_back(outNameFile); outputTypes["name"].push_back(outNameFile); }
+ else { m->openOutputFile(outCountFile, outNames); ct.printHeaders(outNames); outputTypes["count"].push_back(outCountFile); outputNames.push_back(outCountFile); }
for (int i = 0; i < nameFileOrder.size(); i++) {
- //for (itStrings = sequenceStrings.begin(); itStrings != sequenceStrings.end(); itStrings++) {
- if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outFastaFile); outNames.close(); m->mothurRemove(outNameFile); return 0; }
+ if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outFastaFile); outNames.close(); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
itStrings = sequenceStrings.find(nameFileOrder[i]);
if (itStrings != sequenceStrings.end()) {
- //get rep name
- int pos = (itStrings->second).find_first_of(',');
-
- if (pos == string::npos) { // only reps itself
- outNames << itStrings->second << '\t' << itStrings->second << endl;
- }else {
- outNames << (itStrings->second).substr(0, pos) << '\t' << itStrings->second << endl;
- }
+ if (countfile == "") {
+ //get rep name
+ int pos = (itStrings->second).find_first_of(',');
+
+ if (pos == string::npos) { // only reps itself
+ outNames << itStrings->second << '\t' << itStrings->second << endl;
+ }else {
+ outNames << (itStrings->second).substr(0, pos) << '\t' << itStrings->second << endl;
+ }
+ }else { ct.printSeq(outNames, itStrings->second); }
}else{ m->mothurOut("[ERROR]: mismatch in namefile print."); m->mothurOutEndLine(); m->control_pressed = true; }
}
outNames.close();
- if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outFastaFile); m->mothurRemove(outNameFile); return 0; }
+ if (m->control_pressed) { outputTypes.clear(); m->mothurRemove(outFastaFile); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
- m->mothurOut(outFastaFile); m->mothurOutEndLine();
- m->mothurOut(outNameFile); m->mothurOutEndLine();
- outputNames.push_back(outFastaFile); outputNames.push_back(outNameFile); outputTypes["fasta"].push_back(outFastaFile); outputTypes["name"].push_back(outNameFile);
+ outputNames.push_back(outFastaFile); outputTypes["fasta"].push_back(outFastaFile);
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
m->mothurOutEndLine();
//set fasta file as new current fastafile
if (itTypes != outputTypes.end()) {
if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
}
+
+ itTypes = outputTypes.find("count");
+ if (itTypes != outputTypes.end()) {
+ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
+ }
return 0;
}
#include "command.hpp"
#include "fastamap.h"
+#include "counttable.h"
/* The unique.seqs command reads a fasta file, finds the duplicate sequences and outputs a names file
containing 2 columns. The first being the groupname and the second the list of identical sequence names. */
private:
- string inFastaName, oldNameMapFName, outputDir;
+ string inFastaName, oldNameMapFName, outputDir, countfile;
vector<string> outputNames;
bool abort;
TreeReader* reader = new TreeReader(treefile, "", namefile);
vector<Tree*> T = reader->getTrees();
- map<string, string> nameMap = reader->getNameMap();
+ map<string, string> nameMap;
+ m->readNames(namefile, nameMap);
delete reader;
//print new Tree
T[0]->print(out, nameMap);
out.close();
- delete (T[0]->getTreeMap());
+ delete (T[0]->getCountTable());
for (int i = 0; i < T.size(); i++) { delete T[i]; }
//set phylip file as new current phylipfile
string groupfile = "";
m->setTreeFile(treefile);
Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
- treeMap = new TreeMap();
+ ct = new CountTable();
bool mismatch = false;
-
- for (int i = 0; i < m->Treenames.size(); i++) {
- //sanity check - is this a group that is not in the sharedfile?
+
+ set<string> nameMap;
+ map<string, string> groupMap;
+ set<string> gps;
+ for (int i = 0; i < m->Treenames.size(); i++) {
+ nameMap.insert(m->Treenames[i]);
+ //sanity check - is this a group that is not in the sharedfile?
if (designfile == "") {
+ if (i == 0) { gps.insert("Group1"); }
if (!(m->inUsersGroups(m->Treenames[i], m->getAllGroups()))) {
m->mothurOut("[ERROR]: " + m->Treenames[i] + " is not a group in your shared or relabund file."); m->mothurOutEndLine();
mismatch = true;
}
- treeMap->addSeq(m->Treenames[i], "Group1");
+ groupMap[m->Treenames[i]] = "Group1";
}else{
vector<string> myGroups; myGroups.push_back(m->Treenames[i]);
vector<string> myNames = designMap->getNamesSeqs(myGroups);
mismatch = true;
}
}
- treeMap->addSeq(m->Treenames[i], "Group1");
+ groupMap[m->Treenames[i]] = "Group1";
}
- }
+ }
+ ct->createTable(nameMap, groupMap, gps);
if ((designfile != "") && (m->Treenames.size() != Groups.size())) { cout << Groups.size() << '\t' << m->Treenames.size() << endl; m->mothurOut("[ERROR]: You design file does not match your tree, aborting."); m->mothurOutEndLine(); mismatch = true; }
if (designfile != "") { delete designMap; }
if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } }
else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } }
- delete treeMap;
+ delete ct;
return 0;
}
read = new ReadNewickTree(treefile);
- int readOk = read->read(treeMap);
+ int readOk = read->read(ct);
- if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete treeMap; delete read; return 0; }
+ if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete ct; delete read; return 0; }
vector<Tree*> T = read->getTrees();
if (designfile != "") { delete designMap; }
if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } }
else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } }
- for (int i = 0; i < T.size(); i++) { delete T[i]; } delete treeMap; return 0;
+ for (int i = 0; i < T.size(); i++) { delete T[i]; } delete ct; return 0;
}
- map<string, string> nameMap;
- T[0]->assembleTree(nameMap);
+ T[0]->assembleTree();
/***************************************************/
// create ouptut tree - respecting pickedGroups //
/***************************************************/
- Tree* outputTree = new Tree(m->getNumGroups(), treeMap);
+ Tree* outputTree = new Tree(m->getNumGroups(), ct);
outputTree->getSubTree(T[0], m->getGroups());
- outputTree->assembleTree(nameMap);
+ outputTree->assembleTree();
//no longer need original tree, we have output tree to use and label
for (int i = 0; i < T.size(); i++) { delete T[i]; }
if (designfile != "") { delete designMap; }
if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } }
else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } }
- delete outputTree; delete treeMap; return 0;
+ delete outputTree; delete ct; return 0;
}
/***************************************************/
// get indicator species values //
/***************************************************/
GetIndicatorSpecies(outputTree);
- delete outputTree; delete treeMap;
+ delete outputTree; delete ct;
}else { //run with design file only
//get indicator species
#include "command.hpp"
#include "readtree.h"
-#include "treemap.h"
+#include "counttable.h"
#include "sharedrabundvector.h"
#include "sharedrabundfloatvector.h"
#include "inputdata.h"
private:
ReadTree* read;
- TreeMap* treeMap;
+ CountTable* ct;
GroupMap* designMap;
string treefile, sharedfile, relabundfile, groups, label, inputFileName, outputDir, designfile;
bool abort;
try {
processors = p;
outputDir = o;
- TreeMap* tmap = t->getTreeMap();
+ CountTable* ct = t->getCountTable();
//if the users enters no groups then give them the score of all groups
vector<string> mGroups = m->getGroups();
vector<string> groups;
if (numGroups == 0) {
//get score for all users groups
- vector<string> tGroups = tmap->getNamesOfGroups();
+ vector<string> tGroups = ct->getNamesOfGroups();
for (int i = 0; i < tGroups.size(); i++) {
if (tGroups[i] != "xxx") {
groups.push_back(tGroups[i]);
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
if(processors == 1){
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap);
+ data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
}else{
lines.clear();
int numPairs = namesOfGroupCombos.size();
lines.push_back(linePair(startPos, numPairsPerProcessor));
}
- data = createProcesses(t, namesOfGroupCombos, tmap);
+ data = createProcesses(t, namesOfGroupCombos, ct);
}
#else
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap);
+ data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
#endif
return data;
}
/**************************************************************************************************/
-EstOutput Parsimony::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, TreeMap* tmap) {
+EstOutput Parsimony::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, CountTable* ct) {
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
int process = 1;
process++;
}else if (pid == 0){
EstOutput myresults;
- myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, tmap);
+ myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, ct);
if (m->control_pressed) { exit(0); }
}
}
- results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, tmap);
+ results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct);
//force parent to wait until all the processes are done
for (int i=0;i<processIDS.size();i++) {
}
}
/**************************************************************************************************/
-EstOutput Parsimony::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, TreeMap* tmap) {
+EstOutput Parsimony::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, CountTable* ct) {
try {
EstOutput results; results.resize(num);
- Tree* copyTree = new Tree(tmap);
+ Tree* copyTree = new Tree(ct);
int count = 0;
for (int h = start; h < (start+num); h++) {
*/
#include "treecalculator.h"
-#include "treemap.h"
+#include "counttable.h"
/***********************************************************************/
int processors;
string outputDir;
- EstOutput driver(Tree*, vector< vector<string> >, int, int, TreeMap*);
- EstOutput createProcesses(Tree*, vector< vector<string> >, TreeMap*);
+ EstOutput driver(Tree*, vector< vector<string> >, int, int, CountTable*);
+ EstOutput createProcesses(Tree*, vector< vector<string> >, CountTable*);
};
/***********************************************************************/
vector<string> ParsimonyCommand::setParameters(){
try {
CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
CommandParameter prandom("random", "String", "", "", "", "", "",false,false); parameters.push_back(prandom);
CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
string ParsimonyCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The parsimony command parameters are tree, group, name, random, groups, processors and iters. tree parameter is required unless you have valid current tree file or are using random.\n";
+ helpString += "The parsimony command parameters are tree, group, name, count, random, groups, processors and iters. tree parameter is required unless you have valid current tree file or are using random.\n";
helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n";
helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
helpString += "The parsimony command should be in the following format: parsimony(random=yourOutputFilename, groups=yourGroups, iters=yourIters).\n";
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["name"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
if (namefile == "not open") { namefile = ""; abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
+
}
//if the user changes the output directory command factory will send this info to us in the output parameter
m->setProcessors(temp);
m->mothurConvert(temp, processors);
- if (namefile == "") {
- vector<string> files; files.push_back(treefile);
- parser.getNameFile(files);
- }
+ if (countfile=="") {
+ if (namefile == "") {
+ vector<string> files; files.push_back(treefile);
+ parser.getNameFile(files);
+ }
+ }
}
m->setTreeFile(treefile);
- TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
+ TreeReader* reader;
+ if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
+ else { reader = new TreeReader(treefile, countfile); }
T = reader->getTrees();
- tmap = T[0]->getTreeMap();
+ ct = T[0]->getCountTable();
delete reader;
if(outputDir == "") { outputDir += m->hasPath(treefile); }
//set users groups to analyze
SharedUtil util;
vector<string> mGroups = m->getGroups();
- vector<string> tGroups = tmap->getNamesOfGroups();
+ vector<string> tGroups = ct->getNamesOfGroups();
util.setGroups(mGroups, tGroups, allGroups, numGroups, "parsimony"); //sets the groups the user wants to analyze
util.getCombos(groupComb, mGroups, numComp);
m->setGroups(mGroups);
if (m->control_pressed) {
delete reading; delete output;
- delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
+ delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
if (randomtree == "") { outSum.close(); }
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
m->clearGroups();
if (m->control_pressed) {
delete reading; delete output;
- delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
+ delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
if (randomtree == "") { outSum.close(); }
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
m->clearGroups();
for (int j = 0; j < iters; j++) {
//create new tree with same num nodes and leaves as users
- randT = new Tree(tmap);
+ randT = new Tree(ct);
//create random relationships between nodes
randT->assembleRandomTree();
delete reading; delete output; delete randT;
if (randomtree == "") { outSum.close(); }
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
- delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
+ delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
m->clearGroups();
return 0;
}
for (int j = 0; j < iters; j++) {
//create new tree with same num nodes and leaves as users
- randT = new Tree(tmap);
+ randT = new Tree(ct);
//create random relationships between nodes
randT->assembleRandomTree();
if (m->control_pressed) {
- delete reading; delete output; delete randT; delete tmap;
+ delete reading; delete output; delete randT; delete ct;
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;
}
randomData = pars.getValues(randT, processors, outputDir);
if (m->control_pressed) {
- delete reading; delete output; delete randT; delete tmap;
+ delete reading; delete output; delete randT; delete ct;
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;
}
if (m->control_pressed) {
delete reading; delete output;
- delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
+ delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
if (randomtree == "") { outSum.close(); }
for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear();
return 0;
printParsimonyFile();
if (randomtree == "") { printUSummaryFile(); }
- delete output; delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }
+ delete output; delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;}
try {
//create treemap
- tmap = new TreeMap();
+ ct = new CountTable();
m->mothurOut("Please enter the number of groups you would like to analyze: ");
cin >> numGroups;
count = 1;
numEachGroup.resize(numGroups, 0);
-
+ set<string> nameMap;
+ map<string, string> groupMap;
+ set<string> gps;
+
for (int i = 1; i <= numGroups; i++) {
m->mothurOut("Please enter the number of sequences in group " + toString(i) + ": ");
cin >> num;
m->mothurOutJustToLog(toString(num)); m->mothurOutEndLine();
-
- //set tmaps seqsPerGroup
- tmap->seqsPerGroup[toString(i)] = num;
- tmap->addGroup(toString(i));
+ gps.insert(toString(i));
+
//set tmaps namesOfSeqs
for (int j = 0; j < num; j++) {
- tmap->namesOfSeqs.push_back(toString(count));
- tmap->treemap[toString(count)].groupname = toString(i);
+ groupMap[toString(count)] = i;
+ nameMap.insert(toString(count));
count++;
}
}
-
+ ct->createTable(nameMap, groupMap, gps);
+
//clears buffer so next command doesn't have error
string s;
getline(cin, s);
- m->Treenames = tmap->namesOfSeqs;
-
+ m->Treenames = ct->getNamesOfSeqs();
}
catch(exception& e) {
m->errorOut(e, "ParsimonyCommand", "getUserInput");
#include "command.hpp"
#include "parsimony.h"
-#include "treemap.h"
+#include "counttable.h"
#include "progress.hpp"
#include "sharedutilities.h"
#include "fileoutput.h"
vector<Tree*> T; //user trees
Tree* randT; //random tree
Tree* copyUserTree;
- TreeMap* tmap;
- TreeMap* savetmap;
+ CountTable* ct;
+ CountTable* savect;
vector<string> groupComb; // AB. AC, BC...
- string sumFile, randomtree, allGroups, outputDir, treefile, groupfile, namefile;
+ string sumFile, randomtree, allGroups, outputDir, treefile, groupfile, namefile, countfile;
int iters, numGroups, numComp, counter, processors, numUniquesInName;
vector<int> numEachGroup; //vector containing the number of sequences in each group the users wants for random distrib.
vector< vector<float> > userTreeScores; //scores for users trees for each comb.
try {
CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pgroup);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
CommandParameter pfreq("freq", "Number", "", "100", "", "", "",false,false); parameters.push_back(pfreq);
string PhyloDiversityCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The phylo.diversity command parameters are tree, group, name, groups, iters, freq, processors, scale, rarefy, collect and summary. tree and group are required, unless you have valid current files.\n";
+ helpString += "The phylo.diversity command parameters are tree, group, name, count, groups, iters, freq, processors, scale, rarefy, collect and summary. tree and group are required, unless you have valid current files.\n";
helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. The group names are separated by dashes. By default all groups are used.\n";
helpString += "The iters parameter allows you to specify the number of randomizations to preform, by default iters=1000, if you set rarefy to true.\n";
helpString += "The freq parameter is used indicate when to output your data, by default it is set to 100. But you can set it to a percentage of the number of sequence. For example freq=0.10, means 10%. \n";
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["name"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
//check for required parameters
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
+
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
string temp;
if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; }
- if (namefile == "") {
- vector<string> files; files.push_back(treefile);
- parser.getNameFile(files);
- }
+ if (countfile=="") {
+ if (namefile == "") {
+ vector<string> files; files.push_back(treefile);
+ parser.getNameFile(files);
+ }
+ }
}
}
int start = time(NULL);
m->setTreeFile(treefile);
- TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
+ TreeReader* reader;
+ if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
+ else { reader = new TreeReader(treefile, countfile); }
vector<Tree*> trees = reader->getTrees();
- tmap = trees[0]->getTreeMap();
+ ct = trees[0]->getCountTable();
delete reader;
SharedUtil util;
vector<string> mGroups = m->getGroups();
- vector<string> tGroups = tmap->getNamesOfGroups();
+ vector<string> tGroups = ct->getNamesOfGroups();
util.setGroups(mGroups, tGroups, "phylo.diversity"); //sets the groups the user wants to analyze
//incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
//for each of the users trees
for(int i = 0; i < trees.size(); i++) {
- if (m->control_pressed) { delete tmap; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
+ if (m->control_pressed) { delete ct; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
ofstream outSum, outRare, outCollect;
string outSumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + "." + getOutputFileNameTag("summary");
//find largest group total
int largestGroup = 0;
- for (int j = 0; j < mGroups.size(); j++) {
- if (tmap->seqsPerGroup[mGroups[j]] > largestGroup) { largestGroup = tmap->seqsPerGroup[mGroups[j]]; }
+ for (int j = 0; j < mGroups.size(); j++) {
+ int numSeqsThisGroup = ct->getGroupCount(mGroups[j]);
+ if (numSeqsThisGroup > largestGroup) { largestGroup = numSeqsThisGroup; }
//initialize diversity
- diversity[mGroups[j]].resize(tmap->seqsPerGroup[mGroups[j]]+1, 0.0); //numSampled
+ diversity[mGroups[j]].resize(numSeqsThisGroup+1, 0.0); //numSampled
//groupA 0.0 0.0
//initialize sumDiversity
- sumDiversity[mGroups[j]].resize(tmap->seqsPerGroup[mGroups[j]]+1, 0.0);
+ sumDiversity[mGroups[j]].resize(numSeqsThisGroup+1, 0.0);
}
//convert freq percentage to number
map<string, bool> done;
//initialize root for all groups to -1
- for (int k = 0; k < (t->getTreeMap())->getNamesOfGroups().size(); k++) { done[(t->getTreeMap())->getNamesOfGroups()[k]] = false; }
+ for (int k = 0; k < (t->getCountTable())->getNamesOfGroups().size(); k++) { done[(t->getCountTable())->getNamesOfGroups()[k]] = false; }
for (int i = 0; i < t->getNumLeaves(); i++) {
*/
#include "command.hpp"
-#include "treemap.h"
+#include "counttable.h"
#include "sharedutilities.h"
#include "tree.h"
int execute();
void help() { m->mothurOut(getHelpString()); }
private:
- TreeMap* tmap;
+ CountTable* ct;
float freq;
int iters, processors, numUniquesInName;
bool abort, rarefy, summary, collect, scale;
- string groups, outputDir, treefile, groupfile, namefile;
+ string groups, outputDir, treefile, groupfile, namefile, countfile;
vector<string> Groups, outputNames; //holds groups to be used, and outputFile names
map<string, int> getRootForGroups(Tree* t);
*/
#include "phylosummary.h"
-
/**************************************************************************************************/
-PhyloSummary::PhyloSummary(string refTfile, string groupFile){
+PhyloSummary::PhyloSummary(string refTfile, CountTable* c){
try {
m = MothurOut::getInstance();
maxLevel = 0;
ignore = false;
+ numSeqs = 0;
+
+ ct = c;
+ groupmap = NULL;
+
+ //check for necessary files
+ string taxFileNameTest = m->getFullPathName((refTfile.substr(0,refTfile.find_last_of(".")+1) + "tree.sum"));
+ ifstream FileTest(taxFileNameTest.c_str());
- if (groupFile != "") {
- groupmap = new GroupMap(groupFile);
- groupmap->readMap();
+ if (!FileTest) {
+ m->mothurOut("Error: can't find " + taxFileNameTest + "."); m->mothurOutEndLine(); exit(1);
}else{
- groupmap = NULL;
+ readTreeStruct(FileTest);
}
+
+ tree[0].rank = "0";
+ assignRank(0);
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PhyloSummary", "PhyloSummary");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+PhyloSummary::PhyloSummary(CountTable* c){
+ try {
+ m = MothurOut::getInstance();
+ maxLevel = 0;
+ ignore = true;
+ numSeqs = 0;
+
+ ct = c;
+ groupmap = NULL;
+
+ tree.push_back(rawTaxNode("Root"));
+ tree[0].rank = "0";
+ }
+ catch(exception& e) {
+ m->errorOut(e, "PhyloSummary", "PhyloSummary");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+PhyloSummary::PhyloSummary(string refTfile, GroupMap* g){
+ try {
+ m = MothurOut::getInstance();
+ maxLevel = 0;
+ ignore = false;
+ numSeqs = 0;
+
+ groupmap = g;
+ ct = NULL;
//check for necessary files
string taxFileNameTest = m->getFullPathName((refTfile.substr(0,refTfile.find_last_of(".")+1) + "tree.sum"));
/**************************************************************************************************/
-PhyloSummary::PhyloSummary(string groupFile){
+PhyloSummary::PhyloSummary(GroupMap* g){
try {
m = MothurOut::getInstance();
maxLevel = 0;
ignore = true;
+ numSeqs = 0;
- if (groupFile != "") {
- groupmap = new GroupMap(groupFile);
- groupmap->readMap();
- }else{
- groupmap = NULL;
- }
+ groupmap = g;
+ ct = NULL;
tree.push_back(rawTaxNode("Root"));
tree[0].rank = "0";
-
-
}
catch(exception& e) {
m->errorOut(e, "PhyloSummary", "PhyloSummary");
for (map<string, string>::iterator itTemp = temp.begin(); itTemp != temp.end();) {
addSeqToTree(itTemp->first, itTemp->second);
- numSeqs++;
temp.erase(itTemp++);
}
childPointer = tree[currentNode].children.find(taxon);
if(childPointer != tree[currentNode].children.end()){ //if the node already exists, update count and move on
- if (groupmap != NULL) {
+ int thisCount = 1;
+
+ if (groupmap != NULL) {
//find out the sequences group
string group = groupmap->getGroup(seqName);
if (itGroup != tree[childPointer->second].groupCount.end()) {
tree[childPointer->second].groupCount[group]++;
}
- }
+ }else if (ct != NULL) {
+ if (ct->hasGroupInfo()) {
+ vector<int> groupCounts = ct->getGroupCounts(seqName);
+ vector<string> groups = ct->getNamesOfGroups();
+ for (int i = 0; i < groups.size(); i++) {
+
+ if (groupCounts[i] != 0) {
+ //do you have a count for this group?
+ map<string, int>::iterator itGroup = tree[childPointer->second].groupCount.find(groups[i]);
+
+ //if yes, increment it - there should not be a case where we can't find it since we load group in read
+ if (itGroup != tree[childPointer->second].groupCount.end()) {
+ tree[childPointer->second].groupCount[groups[i]] += groupCounts[i];
+ }
+ }
+ }
+ }
+ thisCount = ct->getNumSeqs(seqName);
+ }
- tree[childPointer->second].total++;
+ tree[childPointer->second].total += thisCount;
currentNode = childPointer->second;
}else{
tree[index].parent = currentNode;
tree[index].level = (level+1);
- tree[index].total = 1;
tree[currentNode].children[taxon] = index;
+ int thisCount = 1;
//initialize groupcounts
if (groupmap != NULL) {
//if yes, increment it - there should not be a case where we can't find it since we load group in read
if (itGroup != tree[index].groupCount.end()) {
tree[index].groupCount[group]++;
- }
- }
+ }
+ }else if (ct != NULL) {
+ if (ct->hasGroupInfo()) {
+ vector<string> mGroups = ct->getNamesOfGroups();
+ for (int j = 0; j < mGroups.size(); j++) {
+ tree[index].groupCount[mGroups[j]] = 0;
+ }
+ vector<int> groupCounts = ct->getGroupCounts(seqName);
+ vector<string> groups = ct->getNamesOfGroups();
+
+ for (int i = 0; i < groups.size(); i++) {
+ if (groupCounts[i] != 0) {
+
+ //do you have a count for this group?
+ map<string, int>::iterator itGroup = tree[index].groupCount.find(groups[i]);
+
+ //if yes, increment it - there should not be a case where we can't find it since we load group in read
+ if (itGroup != tree[index].groupCount.end()) {
+ tree[index].groupCount[groups[i]]+=groupCounts[i];
+ }
+ }
+ }
+ }
+ thisCount = ct->getNumSeqs(seqName);
+ }
+ tree[index].total = thisCount;
currentNode = index;
}else{ //otherwise, error
}
/**************************************************************************************************/
-int PhyloSummary::addSeqToTree(string seqTaxonomy, vector<string> names){
+int PhyloSummary::addSeqToTree(string seqTaxonomy, map<string, bool> containsGroup){
try {
numSeqs++;
childPointer = tree[currentNode].children.find(taxon);
if(childPointer != tree[currentNode].children.end()){ //if the node already exists, update count and move on
- if (groupmap != NULL) {
-
- map<string, bool> containsGroup;
- vector<string> mGroups = groupmap->getNamesOfGroups();
- for (int j = 0; j < mGroups.size(); j++) {
- containsGroup[mGroups[j]] = false;
- }
-
- for (int k = 0; k < names.size(); k++) {
- //find out the sequences group
- string group = groupmap->getGroup(names[k]);
-
- if (group == "not found") { m->mothurOut("[WARNING]: " + names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); }
- else {
- containsGroup[group] = true;
- }
- }
+ for (map<string, bool>::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) {
+ if (itGroup->second == true) {
+ tree[childPointer->second].groupCount[itGroup->first]++;
+ }
+ }
- for (map<string, bool>::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) {
- if (itGroup->second == true) {
- tree[childPointer->second].groupCount[itGroup->first]++;
- }
- }
-
- }
-
tree[childPointer->second].total++;
currentNode = childPointer->second;
tree[index].level = (level+1);
tree[index].total = 1;
tree[currentNode].children[taxon] = index;
-
- //initialize groupcounts
- if (groupmap != NULL) {
- map<string, bool> containsGroup;
- vector<string> mGroups = groupmap->getNamesOfGroups();
- for (int j = 0; j < mGroups.size(); j++) {
- tree[index].groupCount[mGroups[j]] = 0;
- containsGroup[mGroups[j]] = false;
- }
-
- for (int k = 0; k < names.size(); k++) {
- //find out the sequences group
- string group = groupmap->getGroup(names[k]);
-
- if (group == "not found") { m->mothurOut("[WARNING]: " + names[k] + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine(); }
- else {
- containsGroup[group] = true;
- }
- }
-
- for (map<string, bool>::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) {
- if (itGroup->second == true) {
- tree[index].groupCount[itGroup->first]++;
- }
- }
- }
+ for (map<string, bool>::iterator itGroup = containsGroup.begin(); itGroup != containsGroup.end(); itGroup++) {
+ if (itGroup->second == true) {
+ tree[index].groupCount[itGroup->first]++;
+ }
+ }
currentNode = index;
try {
if (ignore) { assignRank(0); }
-
+ vector<string> mGroups;
//print labels
out << "taxlevel\t rankID\t taxon\t daughterlevels\t total\t";
if (groupmap != NULL) {
//so the labels match the counts below, since the map sorts them automatically...
//sort(groupmap->namesOfGroups.begin(), groupmap->namesOfGroups.end());
- vector<string> mGroups = groupmap->getNamesOfGroups();
+ mGroups = groupmap->getNamesOfGroups();
for (int i = 0; i < mGroups.size(); i++) {
out << mGroups[i] << '\t';
}
- }
+ }else if (ct != NULL) {
+ if (ct->hasGroupInfo()) {
+ mGroups = ct->getNamesOfGroups();
+ for (int i = 0; i < mGroups.size(); i++) {
+ out << mGroups[i] << '\t';
+ }
+ }
+ }
out << endl;
tree[0].total += tree[it->second].total;
if (groupmap != NULL) {
- vector<string> mGroups = groupmap->getNamesOfGroups();
for (int i = 0; i < mGroups.size(); i++) { tree[0].groupCount[mGroups[i]] += tree[it->second].groupCount[mGroups[i]]; }
- }
+ }else if ( ct != NULL) {
+ if (ct->hasGroupInfo()) { for (int i = 0; i < mGroups.size(); i++) { tree[0].groupCount[mGroups[i]] += tree[it->second].groupCount[mGroups[i]]; } }
+ }
}
}
if (groupmap != NULL) {
- //for (itGroup = tree[0].groupCount.begin(); itGroup != tree[0].groupCount.end(); itGroup++) {
- // out << itGroup->second << '\t';
- //}
- vector<string> mGroups = groupmap->getNamesOfGroups();
- for (int i = 0; i < mGroups.size(); i++) { out << tree[0].groupCount[mGroups[i]] << '\t'; }
- }
+ for (int i = 0; i < mGroups.size(); i++) { out << tree[0].groupCount[mGroups[i]] << '\t'; }
+ }else if ( ct != NULL) {
+ if (ct->hasGroupInfo()) { for (int i = 0; i < mGroups.size(); i++) { out << tree[0].groupCount[mGroups[i]] << '\t'; } }
+ }
out << endl;
//print rest
//}
vector<string> mGroups = groupmap->getNamesOfGroups();
for (int i = 0; i < mGroups.size(); i++) { out << tree[it->second].groupCount[mGroups[i]] << '\t'; }
- }
+ }else if (ct != NULL) {
+ if (ct->hasGroupInfo()) {
+ vector<string> mGroups = ct->getNamesOfGroups();
+ for (int i = 0; i < mGroups.size(); i++) { out << tree[it->second].groupCount[mGroups[i]] << '\t'; }
+ }
+ }
out << endl;
}
for (int j = 0; j < (groupmap->getNamesOfGroups()).size(); j++) {
tree[i].groupCount[(groupmap->getNamesOfGroups())[j]] = 0;
}
- }
+ }else if (ct != NULL) {
+ if (ct->hasGroupInfo()) {
+ for (int j = 0; j < (ct->getNamesOfGroups()).size(); j++) {
+ tree[i].groupCount[(ct->getNamesOfGroups())[j]] = 0;
+ }
+ }
+ }
tree[i].total = 0;
#include "mothur.h"
#include "mothurout.h"
#include "groupmap.h"
+#include "counttable.h"
/**************************************************************************************************/
class PhyloSummary {
public:
- PhyloSummary(string);
- PhyloSummary(string, string);
- ~PhyloSummary() { if (groupmap != NULL) { delete groupmap; } }
+ PhyloSummary(GroupMap*);
+ PhyloSummary(string, GroupMap*);
+ PhyloSummary(CountTable*);
+ PhyloSummary(string, CountTable*);
+ ~PhyloSummary() {}
int summarize(string); //pass it a taxonomy file and a group file and it makes the tree
int addSeqToTree(string, string);
- int addSeqToTree(string, vector<string>);
+ int addSeqToTree(string, map<string, bool>);
void print(ofstream&);
int getMaxLevel() { return maxLevel; }
void assignRank(int);
void readTreeStruct(ifstream&);
GroupMap* groupmap;
+ CountTable* ct;
bool ignore;
int numNodes;
}
}
/***********************************************************************/
-int ReadTree::AssembleTrees(map<string, string> nameMap) {
+int ReadTree::AssembleTrees() {
try {
//assemble users trees
for (int i = 0; i < Trees.size(); i++) {
if (m->control_pressed) { return 0; }
- Trees[i]->assembleTree(nameMap);
+ Trees[i]->assembleTree();
}
return 0;
}
/***********************************************************************/
//This class reads a file in Newick form and stores it in a tree.
-int ReadNewickTree::read(TreeMap* tmap) {
+int ReadNewickTree::read(CountTable* ct) {
try {
holder = "";
int c, error;
}
//make new tree
- T = new Tree(tmap);
+ T = new Tree(ct);
numNodes = T->getNumNodes();
numLeaves = T->getNumLeaves();
- error = readTreeString(tmap);
+ error = readTreeString(ct);
//save trees for later commands
Trees.push_back(T);
//if you are a nexus file
}else if ((c = filehandle.peek()) == '#') {
//get right number of seqs from nexus file.
- Tree* temp = new Tree(tmap); delete temp;
+ Tree* temp = new Tree(ct); delete temp;
- nexusTranslation(tmap); //reads file through the translation and updates treemap
+ nexusTranslation(ct); //reads file through the translation and updates treemap
while((c = filehandle.peek()) != EOF) {
// get past comments
while ((c = filehandle.peek()) != EOF) {
filehandle.putback(c); //put back first ( of tree.
//make new tree
- T = new Tree(tmap);
+ T = new Tree(ct);
numNodes = T->getNumNodes();
numLeaves = T->getNumLeaves();
//read tree info
- error = readTreeString(tmap);
+ error = readTreeString(ct);
//save trees for later commands
Trees.push_back(T);
}
/**************************************************************************************************/
//This function read the file through the translation of the sequences names and updates treemap.
-string ReadNewickTree::nexusTranslation(TreeMap* tmap) {
+string ReadNewickTree::nexusTranslation(CountTable* ct) {
try {
holder = "";
filehandle >> holder;
if(holder == "tree" && comment != 1){return holder;}
}
-
- //update treemap
- tmap->namesOfSeqs.clear();
-
- /*char c;
- string number, name;
- while ((c = filehandle.peek()) != EOF) {
-
- filehandle >> number;
-
- if ((number == "tree") || (number == ";") ) { name = number; break; }
-
- filehandle >> name;
-
- char lastChar;
- if (name.length() != 0) { lastChar = name[name.length()-1]; }
-
- if ((name == "tree") || (name == ";") ) { break; }
-
- if (lastChar == ',') { name.erase(name.end()-1); } //erase the comma
- */
-
+
string number, name;
for(int i=0;i<numSeqs;i++){
filehandle >> number;
filehandle >> name;
name.erase(name.end()-1); //erase the comma
-
- //insert new one with new name
- string group = tmap->getGroup(name);
- tmap->treemap[toString(number)].groupname = group;
- tmap->treemap[toString(number)].vectorIndex = tmap->getIndex(name);
- //erase old one. so treemap[sarah].groupnumber is now treemap[1].groupnumber. if number is 1 and name is sarah.
- tmap->treemap.erase(name);
- tmap->namesOfSeqs.push_back(number);
+ ct->renameSeq(name, toString(number));
}
return name;
}
/**************************************************************************************************/
-int ReadNewickTree::readTreeString(TreeMap* tmap) {
+int ReadNewickTree::readTreeString(CountTable* ct) {
try {
int n = 0;
if(ch == '('){
n = numLeaves; //number of leaves / sequences, we want node 1 to start where the leaves left off
- lc = readNewickInt(filehandle, n, T, tmap);
+ lc = readNewickInt(filehandle, n, T, ct);
if (lc == -1) { m->mothurOut("error with lc"); m->mothurOutEndLine(); return -1; } //reports an error in reading
if(filehandle.peek()==','){
}
if(rooted != 1){
- rc = readNewickInt(filehandle, n, T, tmap);
+ rc = readNewickInt(filehandle, n, T, ct);
if (rc == -1) { m->mothurOut("error with rc"); m->mothurOutEndLine(); return -1; } //reports an error in reading
if(filehandle.peek() == ')'){
readSpecialChar(filehandle,')',"right parenthesis");
}
/**************************************************************************************************/
-int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T, TreeMap* tmap) {
+int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T, CountTable* ct) {
try {
if (m->control_pressed) { return -1; }
//read all children
vector<int> childrenNodes;
while(f.peek() != ')'){
- int child = readNewickInt(f, n, T, tmap);
+ int child = readNewickInt(f, n, T, ct);
if (child == -1) { return -1; } //reports an error in reading
//cout << "child = " << child << endl;
childrenNodes.push_back(child);
}else{
T->tree[n].setBranchLength(0.0);
}
-
- //T->tree[n].setChildren(lc,rc);
- //T->tree[lc].setParent(n);
- //T->tree[rc].setParent(n);
- //T->printTree(); cout << endl;
-
+
return n++;
}else{
f.putback(d);
//set group info
- string group = tmap->getGroup(name);
+ vector<string> group = ct->getGroups(name);
//find index in tree of name
int n1 = T->getIndex(name);
//adds sequence names that are not in group file to the "xxx" group
- if(group == "not found") {
+ if(group.size() == 0) {
m->mothurOut("Name: " + name + " is not in your groupfile, and will be disregarded. \n"); //readOk = -1; return n1;
- tmap->namesOfSeqs.push_back(name);
- tmap->treemap[name].groupname = "xxx";
-
- map<string, int>::iterator it;
- it = tmap->seqsPerGroup.find("xxx");
- if (it == tmap->seqsPerGroup.end()) { //its a new group
- tmap->addGroup("xxx");
- tmap->seqsPerGroup["xxx"] = 1;
- }else {
- tmap->seqsPerGroup["xxx"]++;
- }
-
- group = "xxx";
- }
-
- vector<string> tempGroup; tempGroup.push_back(group);
-
- T->tree[n1].setGroup(tempGroup);
+ vector<string> currentGroups = ct->getNamesOfGroups();
+ if (!m->inUsersGroups("xxx", currentGroups)) { ct->addGroup("xxx"); }
+ currentGroups = ct->getNamesOfGroups();
+ vector<int> thisCounts; thisCounts.resize(currentGroups.size(), 0);
+ for (int h = 0; h < currentGroups.size(); h++) {
+ if (currentGroups[h] == "xxx") { thisCounts[h] = 1; break; }
+ }
+ ct->push_back(name, thisCounts);
+
+ group.push_back("xxx");
+ }
+ T->tree[n1].setGroup(group);
T->tree[n1].setChildren(-1,-1);
if(blen == 1){
#include "mothur.h"
#include "tree.h"
+#include "counttable.h"
#define MAX_LINE 513
#define SKIPLINE(f,c) {while((c=f.get())!=EOF && ((c) != '\n')){}}
ReadTree();
virtual ~ReadTree() {};
- virtual int read(TreeMap*) = 0;
+ virtual int read(CountTable*) = 0;
int readSpecialChar(istream&, char, string);
int readNodeChar(istream& f);
float readBranchLength(istream& f);
vector<Tree*> getTrees() { return Trees; }
- int AssembleTrees(map<string, string>);
+ int AssembleTrees();
protected:
vector<Tree*> Trees;
- TreeMap* treeMap;
+ CountTable* ct;
int numNodes, numLeaves;
MothurOut* m;
public:
ReadNewickTree(string file) : treeFile(file) { m->openInputFile(file, filehandle); readOk = 0; }
~ReadNewickTree() {};
- int read(TreeMap*);
+ int read(CountTable*);
private:
Tree* T;
- int readNewickInt(istream&, int&, Tree*, TreeMap*);
- int readTreeString(TreeMap*);
- string nexusTranslation(TreeMap*);
+ int readNewickInt(istream&, int&, Tree*, CountTable*);
+ int readTreeString(CountTable*);
+ string nexusTranslation(CountTable*);
ifstream filehandle;
string treeFile;
string holder;
try {
CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
CommandParameter pmap("map", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pmap);
- CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
+ CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
vector<string> myArray;
string AlignCheckCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The align.check command reads a fasta file and map file.\n";
+ helpString += "The align.check command reads a fasta file and map file as well as an optional name file.\n";
helpString += "It outputs a file containing the secondary structure matches in the .align.check file.\n";
helpString += "The align.check command parameters are fasta and map, both are required.\n";
helpString += "The align.check command should be in the following format: align.check(fasta=yourFasta, map=yourMap).\n";
try {
CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist);
CommandParameter pphylip("phylip", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pphylip);
- //CommandParameter pname("name", "InputTypes", "", "", "none", "none", "ColumnName",false,false); parameters.push_back(pname);
CommandParameter pcolumn("column", "InputTypes", "", "", "PhylipColumn", "PhylipColumn", "none",false,false); parameters.push_back(pcolumn);
CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
CommandParameter pcutoff("cutoff", "Number", "", "-1.00", "", "", "",false,false); parameters.push_back(pcutoff);
path = m->hasPath(it->second);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["column"] = inputDir + it->second; }
- }
-
- //it = parameters.find("name");
- //user has given a template file
- //if(it != parameters.end()){
- //path = m->hasPath(it->second);
- //if the user has not given a path then, add inputdir. else leave path alone.
- //if (path == "") { parameters["name"] = inputDir + it->second; }
- //}
-
+ }
}
//check for required parameters
listFile = validParameter.validFile(parameters, "list", true);
else if(!m->isTrue(temp)) { hard = 0; }
else if(m->isTrue(temp)) { hard = 1; }
-// temp = validParameter.validFile(parameters, "name", true);
-// if (temp == "not found") { nameFile = ""; }
-// else if(temp == "not open") { abort = true; }
-// else { nameFile = temp; }
-// cout << "name:\t" << nameFile << endl;
-
temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "-1.00"; }
m->mothurConvert(temp, cutoff);
// cout << cutoff << endl;
int ShhherCommand::execute(){
try {
- if (abort == true) { return 0; }
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
getSingleLookUp(); if (m->control_pressed) { return 0; }
getJointLookUp(); if (m->control_pressed) { return 0; }
#include "subsample.h"
//**********************************************************************************************************************
-Tree* SubSample::getSample(Tree* T, TreeMap* tmap, TreeMap* newTmap, int size, map<string, string> originalNameMap) {
+Tree* SubSample::getSample(Tree* T, CountTable* ct, CountTable* newCt, int size) {
try {
Tree* newTree = NULL;
- map<string, vector<string> > newGroups;
- vector<string> subsampledSeqs = getSample(tmap, size, newGroups);
+ //remove seqs not in sample from counttable
+ vector<string> Groups = ct->getNamesOfGroups();
+ newCt->copy(ct);
+ newCt->addGroup("doNotIncludeMe");
- //remove seqs not in sample from treemap
- for (map<string, vector<string> >::iterator it = newGroups.begin(); it != newGroups.end(); it++) {
- for (int i = 0; i < (it->second).size(); i++) {
- newTmap->addSeq((it->second)[i], it->first);
- }
- }
-
- newTree = new Tree(newTmap);
- newTree->getCopy(T, originalNameMap);
-
- return newTree;
- }
- catch(exception& e) {
- m->errorOut(e, "SubSample", "getSample-Tree");
- exit(1);
- }
-}
-/**********************************************************************************************************************
-Tree* SubSample::getSample(Tree* T, TreeMap* tmap, map<string, string> whole, int size) {
- try {
- Tree* newTree = NULL;
-
- vector<string> subsampledSeqs = getSample(tmap, size);
- map<string, string> sampledNameMap = deconvolute(whole, subsampledSeqs);
+ map<string, int> doNotIncludeTotals;
+ vector<string> namesSeqs = ct->getNamesOfSeqs();
+ for (int i = 0; i < namesSeqs.size(); i++) { doNotIncludeTotals[namesSeqs[i]] = 0; }
+
+ for (int i = 0; i < Groups.size(); i++) {
+ if (m->inUsersGroups(Groups[i], m->getGroups())) {
+ if (m->control_pressed) { break; }
- //remove seqs not in sample from treemap
- for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
- //is that name in the subsample?
- int count = 0;
- for (int j = 0; j < subsampledSeqs.size(); j++) {
- if (tmap->namesOfSeqs[i] == subsampledSeqs[j]) { break; } //found it
- count++;
+ int thisSize = ct->getGroupCount(Groups[i]);
+
+ if (thisSize >= size) {
+
+ vector<string> names = ct->getNamesOfSeqs(Groups[i]);
+ vector<int> random;
+ for (int j = 0; j < names.size(); j++) {
+ int num = ct->getGroupCount(names[j], Groups[i]);
+ for (int k = 0; k < num; k++) { random.push_back(j); }
+ }
+ random_shuffle(random.begin(), random.end());
+
+ vector<int> sampleRandoms; sampleRandoms.resize(names.size(), 0);
+ for (int j = 0; j < size; j++) { sampleRandoms[random[j]]++; }
+ for (int j = 0; j < sampleRandoms.size(); j++) {
+ newCt->setAbund(names[j], Groups[i], sampleRandoms[j]);
+ }
+ sampleRandoms.clear(); sampleRandoms.resize(names.size(), 0);
+ for (int j = size; j < thisSize; j++) { sampleRandoms[random[j]]++; }
+ for (int j = 0; j < sampleRandoms.size(); j++) { doNotIncludeTotals[names[j]] += sampleRandoms[j]; }
+ }else { m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; }
}
- if (m->control_pressed) { return newTree; }
-
- //if you didnt find it, remove it
- if (count == subsampledSeqs.size()) {
- tmap->removeSeq(tmap->namesOfSeqs[i]);
- i--; //need this because removeSeq removes name from namesOfSeqs
- }
}
- //create new tree
- int numUniques = sampledNameMap.size();
- if (sampledNameMap.size() == 0) { numUniques = subsampledSeqs.size(); }
+ for (map<string, int>::iterator it = doNotIncludeTotals.begin(); it != doNotIncludeTotals.end(); it++) {
+ newCt->setAbund(it->first, "doNotIncludeMe", it->second);
+ }
- newTree = new Tree(numUniques, tmap); //numNodes, treemap
- newTree->getSubTree(T, subsampledSeqs, sampledNameMap);
+ newTree = new Tree(newCt);
+ newTree->getCopy(T, true);
return newTree;
}
m->errorOut(e, "SubSample", "getSample-Tree");
exit(1);
}
-}*/
+}
//**********************************************************************************************************************
//assumes whole maps dupName -> uniqueName
map<string, string> SubSample::deconvolute(map<string, string> whole, vector<string>& wanted) {
}
}
//**********************************************************************************************************************
-vector<string> SubSample::getSample(TreeMap* tMap, int size, map<string, vector<string> >& sample) {
- try {
- vector<string> temp2;
- sample["doNotIncludeMe"] = temp2;
-
- vector<string> namesInSample;
-
- vector<string> Groups = tMap->getNamesOfGroups();
- for (int i = 0; i < Groups.size(); i++) {
-
- if (m->inUsersGroups(Groups[i], m->getGroups())) {
- if (m->control_pressed) { break; }
-
- vector<string> thisGroup; thisGroup.push_back(Groups[i]);
- vector<string> thisGroupsSeqs = tMap->getNamesSeqs(thisGroup);
- int thisSize = thisGroupsSeqs.size();
- vector<string> temp;
- sample[Groups[i]] = temp;
-
- if (thisSize >= size) {
-
- random_shuffle(thisGroupsSeqs.begin(), thisGroupsSeqs.end());
-
- for (int j = 0; j < size; j++) { sample[Groups[i]].push_back(thisGroupsSeqs[j]); namesInSample.push_back(thisGroupsSeqs[j]); }
- for (int j = size; j < thisSize; j++) { sample["doNotIncludeMe"].push_back(thisGroupsSeqs[j]); }
-
- }else { m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; }
- }
- }
-
- return namesInSample;
- }
- catch(exception& e) {
- m->errorOut(e, "SubSample", "getSample-TreeMap");
- exit(1);
- }
-}
-
-//**********************************************************************************************************************
-vector<string> SubSample::getSample(TreeMap* tMap, int size) {
- try {
- vector<string> sample;
-
- vector<string> Groups = tMap->getNamesOfGroups();
- for (int i = 0; i < Groups.size(); i++) {
-
- if (m->inUsersGroups(Groups[i], m->getGroups())) {
- if (m->control_pressed) { break; }
-
- vector<string> thisGroup; thisGroup.push_back(Groups[i]);
- vector<string> thisGroupsSeqs = tMap->getNamesSeqs(thisGroup);
- int thisSize = thisGroupsSeqs.size();
-
- if (thisSize >= size) {
-
- random_shuffle(thisGroupsSeqs.begin(), thisGroupsSeqs.end());
-
- for (int j = 0; j < size; j++) { sample.push_back(thisGroupsSeqs[j]); }
- }else { m->mothurOut("[ERROR]: You have selected a size that is larger than "+Groups[i]+" number of sequences.\n"); m->control_pressed = true; }
- }
- }
-
- return sample;
- }
- catch(exception& e) {
- m->errorOut(e, "SubSample", "getSample-TreeMap");
- exit(1);
- }
-}
-//**********************************************************************************************************************
-vector<string> SubSample::getSample(TreeMap* tMap, vector<string> Groups) {
- try {
- vector<string> sample;
-
- //vector<string> Groups = tMap->getNamesOfGroups();
- for (int i = 0; i < Groups.size(); i++) {
-
- if (m->control_pressed) { break; }
-
- vector<string> thisGroup; thisGroup.push_back(Groups[i]);
- vector<string> thisGroupsSeqs = tMap->getNamesSeqs(thisGroup);
- int thisSize = thisGroupsSeqs.size();
-
- for (int j = 0; j < thisSize; j++) { sample.push_back(thisGroupsSeqs[j]); }
- }
-
- return sample;
- }
- catch(exception& e) {
- m->errorOut(e, "SubSample", "getSample-TreeMap");
- exit(1);
- }
-}
-//**********************************************************************************************************************
vector<string> SubSample::getSample(vector<SharedRAbundVector*>& thislookup, int size) {
try {
#include "sharedrabundvector.h"
#include "treemap.h"
#include "tree.h"
+#include "counttable.h"
+
//subsampling overwrites the sharedRabunds. If you need to reuse the original use the getSamplePreserve function.
~SubSample() {}
vector<string> getSample(vector<SharedRAbundVector*>&, int); //returns the bin labels for the subsample, mothurOuts binlabels are preserved so you can run this multiple times. Overwrites original vector passed in, if you need to preserve it deep copy first.
-
- //Tree* getSample(Tree*, TreeMap*, map<string, string>, int); //creates new subsampled tree, destroys treemap so copy if needed.
- Tree* getSample(Tree*, TreeMap*, TreeMap*, int, map<string, string>); //creates new subsampled tree. Uses first treemap to fill new treemap with sabsampled seqs. Sets groups of seqs not in subsample to "doNotIncludeMe".
+ Tree* getSample(Tree*, CountTable*, CountTable*, int); //creates new subsampled tree. Uses first counttable to fill new counttable with sabsampled seqs. Sets groups of seqs not in subsample to "doNotIncludeMe".
int getSample(SAbundVector*&, int); //destroys sabundvector passed in, so copy it if you need it
private:
MothurOut* m;
int eliminateZeroOTUS(vector<SharedRAbundVector*>&);
-
- vector<string> getSample(TreeMap*, vector<string>);
- vector<string> getSample(TreeMap*, int); //names of seqs to include in sample tree
- vector<string> getSample(TreeMap* tMap, int size, map<string, vector<string> >& sample); //sample maps group -> seqs in group. seqs not in sample are in doNotIncludeMe group
- map<string, string> deconvolute(map<string, string> wholeSet, vector<string>& subsampleWanted); //returns new nameMap containing only subsampled names, and removes redundants from subsampled wanted because it makes the new nameMap.
+ map<string, string> deconvolute(map<string, string> wholeSet, vector<string>& subsampleWanted); //returns new nameMap containing only subsampled names, and removes redundants from subsampled wanted because it makes the new nameMap.
};
vector<string> SummaryTaxCommand::setParameters(){
try {
CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptaxonomy);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
CommandParameter preftaxonomy("reftaxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(preftaxonomy);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
try {
string helpString = "";
helpString += "The summary.tax command reads a taxonomy file and an optional name file, and summarizes the taxonomy information.\n";
- helpString += "The summary.tax command parameters are taxonomy, group and name. taxonomy is required, unless you have a valid current taxonomy file.\n";
+ helpString += "The summary.tax command parameters are taxonomy, count, group and name. taxonomy is required, unless you have a valid current taxonomy file.\n";
helpString += "The name parameter allows you to enter a name file associated with your taxonomy file. \n";
helpString += "The group parameter allows you add a group file so you can have the summary totals broken up by group.\n";
+ helpString += "The count parameter allows you add a count file so you can have the summary totals broken up by group.\n";
helpString += "The reftaxonomy parameter allows you give the name of the reference taxonomy file used when you classified your sequences. It is not required, but providing it will keep the rankIDs in the summary file static.\n";
helpString += "The summary.tax command should be in the following format: \n";
helpString += "summary.tax(taxonomy=yourTaxonomyFile) \n";
if (path == "") { parameters["reftaxonomy"] = inputDir + it->second; }
}
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
+
}
//initialize outputTypes
if (groupfile == "not open") { groupfile = ""; abort = true; }
else if (groupfile == "not found") { groupfile = ""; }
else { m->setGroupFile(groupfile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
+
refTaxonomy = validParameter.validFile(parameters, "reftaxonomy", true);
if (refTaxonomy == "not found") { refTaxonomy = ""; m->mothurOut("reftaxonomy is not required, but if given will keep the rankIDs in the summary file static."); m->mothurOutEndLine(); }
else if (refTaxonomy == "not open") { refTaxonomy = ""; abort = true; }
outputDir += m->hasPath(taxfile); //if user entered a file with a path then preserve it
}
- if (namefile == "") {
- vector<string> files; files.push_back(taxfile);
- parser.getNameFile(files);
+ if (countfile == "") {
+ if (namefile == "") {
+ vector<string> files; files.push_back(taxfile);
+ parser.getNameFile(files);
+ }
}
-
}
}
catch(exception& e) {
if (abort == true) { if (calledHelp) { return 0; } return 2; }
int start = time(NULL);
- PhyloSummary* taxaSum;
- if (refTaxonomy != "") {
- taxaSum = new PhyloSummary(refTaxonomy, groupfile);
- }else {
- taxaSum = new PhyloSummary(groupfile);
- }
+ GroupMap* groupMap = NULL;
+ CountTable* ct = NULL;
+ if (groupfile != "") {
+ groupMap = new GroupMap(groupfile);
+ groupMap->readMap();
+ }else if (countfile != "") {
+ ct = new CountTable();
+ ct->readTable(countfile);
+ }
- if (m->control_pressed) { delete taxaSum; return 0; }
+ PhyloSummary* taxaSum;
+ if (countfile != "") {
+ if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, ct); }
+ else { taxaSum = new PhyloSummary(ct); }
+ }else {
+ if (refTaxonomy != "") { taxaSum = new PhyloSummary(refTaxonomy, groupMap); }
+ else { taxaSum = new PhyloSummary(groupMap); }
+ }
+
+ if (m->control_pressed) { if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; } delete taxaSum; return 0; }
int numSeqs = 0;
- if (namefile == "") { numSeqs = taxaSum->summarize(taxfile); }
- else {
+ if ((namefile == "") || (countfile != "")) { numSeqs = taxaSum->summarize(taxfile); }
+ else if (namefile != "") {
map<string, vector<string> > nameMap;
map<string, vector<string> >::iterator itNames;
m->readNames(namefile, nameMap);
- if (m->control_pressed) { delete taxaSum; return 0; }
+ if (m->control_pressed) { if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; } delete taxaSum; return 0; }
ifstream in;
m->openInputFile(taxfile, in);
string name, taxon;
while(!in.eof()){
+
+ if (m->control_pressed) { break; }
+
in >> name >> taxon; m->gobble(in);
itNames = nameMap.find(name);
in.close();
}
- if (m->control_pressed) { delete taxaSum; return 0; }
+ if (m->control_pressed) { if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; } delete taxaSum; return 0; }
//print summary file
ofstream outTaxTree;
outTaxTree.close();
delete taxaSum;
+ if (groupMap != NULL) { delete groupMap; } if (ct != NULL) { delete ct; }
if (m->control_pressed) { m->mothurRemove(summaryFile); return 0; }
*/
#include "command.hpp"
+#include "counttable.h"
/**************************************************************************************************/
private:
bool abort;
- string taxfile, outputDir, namefile, groupfile, refTaxonomy;
+ string taxfile, outputDir, namefile, groupfile, refTaxonomy, countfile;
vector<string> outputNames;
map<string, int> nameMap;
};
#include "tree.h"
/*****************************************************************/
-Tree::Tree(int num, TreeMap* t) : tmap(t) {
+Tree::Tree(int num, CountTable* t) : ct(t) {
try {
m = MothurOut::getInstance();
}
}
/*****************************************************************/
-Tree::Tree(TreeMap* t) : tmap(t) {
+Tree::Tree(CountTable* t) : ct(t) {
try {
m = MothurOut::getInstance();
if (m->runParse == true) { parseTreeFile(); m->runParse = false; }
-//for(int i = 0; i < globaldata->Treenames.size(); i++) { cout << i << '\t' << globaldata->Treenames[i] << endl; }
+
numLeaves = m->Treenames.size();
numNodes = 2*numLeaves - 1;
tree.resize(numNodes);
//initialize groupNodeInfo
- for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
- groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0);
- }
+ vector<string> namesOfGroups = ct->getNamesOfGroups();
+ for (int i = 0; i < namesOfGroups.size(); i++) { groupNodeInfo[namesOfGroups[i]].resize(0); }
//initialize tree with correct number of nodes, name and group info.
for (int i = 0; i < numNodes; i++) {
tree[i].setName(m->Treenames[i]);
//save group info
- string group = tmap->getGroup(m->Treenames[i]);
-
- vector<string> tempGroups; tempGroups.push_back(group);
- tree[i].setGroup(tempGroups);
- groupNodeInfo[group].push_back(i);
-
- //set pcount and pGroup for groupname to 1.
- tree[i].pcount[group] = 1;
- tree[i].pGroups[group] = 1;
-
- //Treemap knows name, group and index to speed up search
- tmap->setIndex(m->Treenames[i], i);
-
+ int maxPars = 1;
+ vector<string> group;
+ vector<int> counts = ct->getGroupCounts(m->Treenames[i]);
+ for (int j = 0; j < namesOfGroups.size(); j++) {
+ if (counts[j] != 0) { //you have seqs from this group
+ groupNodeInfo[namesOfGroups[j]].push_back(i);
+ group.push_back(namesOfGroups[j]);
+ tree[i].pGroups[namesOfGroups[j]] = counts[j];
+ tree[i].pcount[namesOfGroups[j]] = counts[j];
+ //keep highest group
+ if(counts[j] > maxPars){ maxPars = counts[j]; }
+ }
+ }
+ tree[i].setGroup(group);
+ setIndex(m->Treenames[i], i);
+
+ if (maxPars > 1) { //then we have some more dominant groups
+ //erase all the groups that are less than maxPars because you found a more dominant group.
+ for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){
+ if(it->second < maxPars){
+ tree[i].pGroups.erase(it++);
+ }else { it++; }
+ }
+ //set one remaining groups to 1
+ for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){
+ tree[i].pGroups[it->first] = 1;
+ }
+ }//end if
+
//intialize non leaf nodes
}else if (i > (numLeaves-1)) {
tree[i].setName("");
}
}
/*****************************************************************/
-Tree::Tree(TreeMap* t, vector< vector<double> >& sims) : tmap(t) {
+Tree::Tree(CountTable* t, vector< vector<double> >& sims) : ct(t) {
try {
m = MothurOut::getInstance();
tree.resize(numNodes);
//initialize groupNodeInfo
- for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
- groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0);
- }
+ vector<string> namesOfGroups = ct->getNamesOfGroups();
+ for (int i = 0; i < namesOfGroups.size(); i++) { groupNodeInfo[namesOfGroups[i]].resize(0); }
//initialize tree with correct number of nodes, name and group info.
for (int i = 0; i < numNodes; i++) {
tree[i].setName(m->Treenames[i]);
//save group info
- string group = tmap->getGroup(m->Treenames[i]);
-
- vector<string> tempGroups; tempGroups.push_back(group);
- tree[i].setGroup(tempGroups);
- groupNodeInfo[group].push_back(i);
-
- //set pcount and pGroup for groupname to 1.
- tree[i].pcount[group] = 1;
- tree[i].pGroups[group] = 1;
-
- //Treemap knows name, group and index to speed up search
- tmap->setIndex(m->Treenames[i], i);
+ int maxPars = 1;
+ vector<string> group;
+ vector<int> counts = ct->getGroupCounts(m->Treenames[i]);
+ for (int j = 0; j < namesOfGroups.size(); j++) {
+ if (counts[j] != 0) { //you have seqs from this group
+ groupNodeInfo[namesOfGroups[j]].push_back(i);
+ group.push_back(namesOfGroups[j]);
+ tree[i].pGroups[namesOfGroups[j]] = counts[j];
+ tree[i].pcount[namesOfGroups[j]] = counts[j];
+ //keep highest group
+ if(counts[j] > maxPars){ maxPars = counts[j]; }
+ }
+ }
+ tree[i].setGroup(group);
+ setIndex(m->Treenames[i], i);
+
+ if (maxPars > 1) { //then we have some more dominant groups
+ //erase all the groups that are less than maxPars because you found a more dominant group.
+ for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){
+ if(it->second < maxPars){
+ tree[i].pGroups.erase(it++);
+ }else { it++; }
+ }
+ //set one remaining groups to 1
+ for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){
+ tree[i].pGroups[it->first] = 1;
+ }
+ }//end if
//intialize non leaf nodes
}else if (i > (numLeaves-1)) {
tree[i].setGroup(tempGroups);
}
}
+
//build tree from matrix
//initialize indexes
- map<int, int> indexes; //maps row in simMatrix to vector index in the tree
- for (int g = 0; g < numLeaves; g++) { indexes[g] = g; }
+ map<int, int> thisIndexes; //maps row in simMatrix to vector index in the tree
+ for (int g = 0; g < numLeaves; g++) { thisIndexes[g] = g; }
//do merges and create tree structure by setting parents and children
//there are numGroups - 1 merges to do
//set non-leaf node info and update leaves to know their parents
//non-leaf
- tree[numLeaves + i].setChildren(indexes[row], indexes[column]);
+ tree[numLeaves + i].setChildren(thisIndexes[row], thisIndexes[column]);
//parents
- tree[indexes[row]].setParent(numLeaves + i);
- tree[indexes[column]].setParent(numLeaves + i);
+ tree[thisIndexes[row]].setParent(numLeaves + i);
+ tree[thisIndexes[column]].setParent(numLeaves + i);
//blength = distance / 2;
float blength = ((1.0 - largest) / 2);
//branchlengths
- tree[indexes[row]].setBranchLength(blength - tree[indexes[row]].getLengthToLeaves());
- tree[indexes[column]].setBranchLength(blength - tree[indexes[column]].getLengthToLeaves());
+ tree[thisIndexes[row]].setBranchLength(blength - tree[thisIndexes[row]].getLengthToLeaves());
+ tree[thisIndexes[column]].setBranchLength(blength - tree[thisIndexes[column]].getLengthToLeaves());
//set your length to leaves to your childs length plus branchlength
- tree[numLeaves + i].setLengthToLeaves(tree[indexes[row]].getLengthToLeaves() + tree[indexes[row]].getBranchLength());
+ tree[numLeaves + i].setLengthToLeaves(tree[thisIndexes[row]].getLengthToLeaves() + tree[thisIndexes[row]].getBranchLength());
//update index
- indexes[row] = numLeaves+i;
- indexes[column] = numLeaves+i;
+ thisIndexes[row] = numLeaves+i;
+ thisIndexes[column] = numLeaves+i;
//remove highest value that caused the merge.
sims[row][column] = -1000.0;
}
/*****************************************************************/
Tree::~Tree() {}
-/*****************************************************************/
+/*****************************************************************
void Tree::addNamesToCounts(map<string, string> nameMap) {
try {
//ex. seq1 seq2,seq3,se4
m->errorOut(e, "Tree", "addNamesToCounts");
exit(1);
}
-}
+}*/
/*****************************************************************/
int Tree::getIndex(string searchName) {
try {
- //Treemap knows name, group and index to speed up search
- // getIndex function will return the vector index or -1 if seq is not found.
- int index = tmap->getIndex(searchName);
- return index;
-
+ map<string, int>::iterator itIndex = indexes.find(searchName);
+ if (itIndex != indexes.end()) {
+ return itIndex->second;
+ }
+ return -1;
}
catch(exception& e) {
m->errorOut(e, "Tree", "getIndex");
void Tree::setIndex(string searchName, int index) {
try {
- //set index in treemap
- tmap->setIndex(searchName, index);
+ map<string, int>::iterator itIndex = indexes.find(searchName);
+ if (itIndex == indexes.end()) {
+ indexes[searchName] = index;
+ }
}
catch(exception& e) {
m->errorOut(e, "Tree", "setIndex");
}
}
/*****************************************************************/
-int Tree::assembleTree(map<string, string> nameMap) {
- try {
- //save for later
- names = nameMap;
-
- //if user has given a names file we want to include that info in the pgroups and pcount info.
- if(nameMap.size() != 0) { addNamesToCounts(nameMap); }
-
+int Tree::assembleTree() {
+ try {
//build the pGroups in non leaf nodes to be used in the parsimony calcs.
for (int i = numLeaves; i < numNodes; i++) {
if (m->control_pressed) { return 1; }
exit(1);
}
}
-/*****************************************************************
-int Tree::assembleTree(string n) {
- try {
-
- //build the pGroups in non leaf nodes to be used in the parsimony calcs.
- for (int i = numLeaves; i < numNodes; i++) {
- if (m->control_pressed) { return 1; }
-
- tree[i].pGroups = (mergeGroups(i));
- tree[i].pcount = (mergeGcounts(i));
- }
- //float B = clock();
- //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl;
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "Tree", "assembleTree");
- exit(1);
- }
-}
/*****************************************************************/
//assumes leaf node names are in groups and no names file - used by indicator command
void Tree::getSubTree(Tree* Ctree, vector<string> Groups) {
try {
//copy Tree since we are going to destroy it
- Tree* copy = new Tree(tmap);
+ Tree* copy = new Tree(ct);
copy->getCopy(Ctree);
- map<string, string> empty;
- copy->assembleTree(empty);
+ copy->assembleTree();
//we want to select some of the leaf nodes to create the output tree
//go through the input Tree starting at parents of leaves
+ //initialize groupNodeInfo
+ vector<string> namesOfGroups = ct->getNamesOfGroups();
+ for (int i = 0; i < namesOfGroups.size(); i++) { groupNodeInfo[namesOfGroups[i]].resize(0); }
+
+ //initialize tree with correct number of nodes, name and group info.
for (int i = 0; i < numNodes; i++) {
-
//initialize leaf nodes
if (i <= (numLeaves-1)) {
tree[i].setName(Groups[i]);
//save group info
- string group = tmap->getGroup(Groups[i]);
- vector<string> tempGroups; tempGroups.push_back(group);
- tree[i].setGroup(tempGroups);
- groupNodeInfo[group].push_back(i);
-
- //set pcount and pGroup for groupname to 1.
- tree[i].pcount[group] = 1;
- tree[i].pGroups[group] = 1;
-
- //Treemap knows name, group and index to speed up search
- tmap->setIndex(Groups[i], i);
-
- //intialize non leaf nodes
+ int maxPars = 1;
+ vector<string> group;
+ vector<int> counts = ct->getGroupCounts(Groups[i]);
+ for (int j = 0; j < namesOfGroups.size(); j++) {
+ if (counts[j] != 0) { //you have seqs from this group
+ groupNodeInfo[namesOfGroups[j]].push_back(i);
+ group.push_back(namesOfGroups[j]);
+ tree[i].pGroups[namesOfGroups[j]] = counts[j];
+ tree[i].pcount[namesOfGroups[j]] = counts[j];
+ //keep highest group
+ if(counts[j] > maxPars){ maxPars = counts[j]; }
+ }
+ }
+ tree[i].setGroup(group);
+ setIndex(Groups[i], i);
+
+ if (maxPars > 1) { //then we have some more dominant groups
+ //erase all the groups that are less than maxPars because you found a more dominant group.
+ for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){
+ if(it->second < maxPars){
+ tree[i].pGroups.erase(it++);
+ }else { it++; }
+ }
+ //set one remaining groups to 1
+ for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){
+ tree[i].pGroups[it->first] = 1;
+ }
+ }//end if
+
+ //intialize non leaf nodes
}else if (i > (numLeaves-1)) {
tree[i].setName("");
vector<string> tempGroups;
tree[i].setGroup(tempGroups);
}
}
-
+
set<int> removedLeaves;
for (int i = 0; i < copy->getNumLeaves(); i++) {
exit(1);
}
}
-/*****************************************************************/
+/*****************************************************************
//assumes nameMap contains unique names as key or is empty.
//assumes numLeaves defined in tree constructor equals size of seqsToInclude and seqsToInclude only contains unique seqs.
int Tree::getSubTree(Tree* copy, vector<string> seqsToInclude, map<string, string> nameMap) {
return (index++);
}else { //you are a leaf
- int indexInNewTree = tmap->getIndex(oldtree[node].getName());
+ int indexInNewTree = getIndex(oldtree[node].getName());
return indexInNewTree;
}
}
}
}
/*****************************************************************/
-void Tree::getCopy(Tree* copy, map<string, string> nameMap) {
+void Tree::getCopy(Tree* copy, bool subsample) {
try {
//for each node in the tree copy its info
//copy children
tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
}
-
- if (nameMap.size() != 0) { addNamesToCounts(nameMap); }
//build the pGroups in non leaf nodes to be used in the parsimony calcs.
for (int i = numLeaves; i < numNodes; i++) {
tree[i].setChildren(copy->tree[i].getLChild(), copy->tree[i].getRChild());
//copy index in node and tmap
+ setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
tree[i].setIndex(copy->tree[i].getIndex());
- setIndex(copy->tree[i].getName(), getIndex(copy->tree[i].getName()));
//copy pGroups
tree[i].pGroups = copy->tree[i].pGroups;
try {
//initialize groupNodeInfo
- for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
- groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0);
+ for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) {
+ groupNodeInfo[(ct->getNamesOfGroups())[i]].resize(0);
}
for(int i = 0; i < numLeaves; i++){
/*************************************************************************************************/
void Tree::assembleRandomUnifracTree(vector<string> g) {
randomLabels(g);
- map<string, string> empty;
- assembleTree(empty);
+ assembleTree();
}
/*************************************************************************************************/
void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
vector<string> temp; temp.push_back(groupA); temp.push_back(groupB);
randomLabels(temp);
- map<string, string> empty;
- assembleTree(empty);
+ assembleTree();
}
/*************************************************************************************************/
//for now it's just random topology but may become random labels as well later that why this is such a simple function now...
void Tree::assembleRandomTree() {
randomTopology();
- map<string, string> empty;
- assembleTree(empty);
+ assembleTree();
}
/**************************************************************************************************/
}
}
}else { //you are a leaf
- string leafGroup = tmap->getGroup(tree[node].getName());
+ vector<string> leafGroup = ct->getGroups(tree[node].getName());
if (mode == "branch") {
- out << leafGroup;
+ out << leafGroup[0];
//if there is a branch length then print it
if (tree[node].getBranchLength() != -1) {
out << ":" << tree[node].getBranchLength();
}
}else if (mode == "boot") {
- out << leafGroup;
+ out << leafGroup[0];
//if there is a label then print it
if (tree[node].getLabel() != -1) {
out << tree[node].getLabel();
}
}
}else { //you are a leaf
- string leafGroup = tmap->getGroup(theseNodes[node].getName());
+ vector<string> leafGroup = ct->getGroups(theseNodes[node].getName());
if (mode == "branch") {
- out << leafGroup;
+ out << leafGroup[0];
//if there is a branch length then print it
if (theseNodes[node].getBranchLength() != -1) {
out << ":" << theseNodes[node].getBranchLength();
}
}else if (mode == "boot") {
- out << leafGroup;
+ out << leafGroup[0];
//if there is a label then print it
if (theseNodes[node].getLabel() != -1) {
out << theseNodes[node].getLabel();
*/
#include "treenode.h"
-#include "treemap.h"
+#include "counttable.h"
/* This class represents the treefile. */
class Tree {
public:
Tree(string); //do not use tree generated by this constructor its just to extract the treenames, its a chicken before the egg thing that needs to be revisited.
- Tree(int, TreeMap*);
- Tree(TreeMap*); //to generate a tree from a file
- Tree(TreeMap*, vector< vector<double> >&); //create tree from sim matrix
+ Tree(int, CountTable*);
+ Tree(CountTable*); //to generate a tree from a file
+ Tree(CountTable*, vector< vector<double> >&); //create tree from sim matrix
~Tree();
- TreeMap* getTreeMap() { return tmap; }
+ CountTable* getCountTable() { return ct; }
void getCopy(Tree*); //makes tree a copy of the one passed in.
- void getCopy(Tree* copy, map<string, string>); //makes a copy of the tree structure passed in, (just parents, children and br). Used with the Tree(TreeMap*) constructor. Assumes the tmap already has set seqs groups you want. Used by subsample to reassign seqs you don't want included to group "doNotIncludeMe".
+ void getCopy(Tree* copy, bool); //makes a copy of the tree structure passed in, (just parents, children and br). Used with the Tree(TreeMap*) constructor. Assumes the tmap already has set seqs groups you want. Used by subsample to reassign seqs you don't want included to group "doNotIncludeMe".
void getSubTree(Tree*, vector<string>); //makes tree a that contains only the names passed in.
- int getSubTree(Tree* originalToCopy, vector<string> seqToInclude, map<string, string> nameMap); //used with (int, TreeMap) constructor. SeqsToInclude contains subsample wanted - assumes these are unique seqs and size of vector=numLeaves passed into constructor. nameMap is unique -> redundantList can be empty if no namesfile was provided.
+ //int getSubTree(Tree* originalToCopy, vector<string> seqToInclude, map<string, string> nameMap); //used with (int, TreeMap) constructor. SeqsToInclude contains subsample wanted - assumes these are unique seqs and size of vector=numLeaves passed into constructor. nameMap is unique -> redundantList can be empty if no namesfile was provided.
void assembleRandomTree();
void assembleRandomUnifracTree(vector<string>);
int findRoot(); //return index of root node
//this function takes the leaf info and populates the non leaf nodes
- int assembleTree(map<string, string>);
+ int assembleTree();
vector<Node> tree; //the first n nodes are the leaves, where n is the number of sequences.
map< string, vector<int> > groupNodeInfo; //maps group to indexes of leaf nodes with that group, different groups may contain same node because of names file.
private:
- TreeMap* tmap;
+ CountTable* ct;
int numNodes, numLeaves;
ofstream out;
string filename;
- map<string, string> names;
+ //map<string, string> names;
map<string, int>::iterator it, it2;
map<string, int> mergeGroups(int); //returns a map with a groupname and the number of times that group was seen in the children
map<string,int> mergeGcounts(int);
+ map<string, int> indexes; //maps seqName -> index in tree vector
void addNamesToCounts(map<string, string>);
void randomTopology();
if (abort == false) {
if (format == "sharedfile") { delete input; }
else { delete list; }
- delete tmap;
+ delete ct;
}
}
m->runParse = false;
//create treemap class from groupmap for tree class to use
- tmap = new TreeMap();
- tmap->makeSim(m->getAllGroups());
+ ct = new CountTable();
+ set<string> nameMap;
+ map<string, string> groupMap;
+ set<string> gps;
+ for (int i = 0; i < m->getAllGroups().size(); i++) {
+ nameMap.insert(m->getAllGroups()[i]);
+ gps.insert(m->getAllGroups()[i]);
+ groupMap[m->getAllGroups()[i]] = m->getAllGroups()[i];
+ }
+ ct->createTable(nameMap, groupMap, gps);
//clear globaldatas old tree names if any
m->Treenames.clear();
nameMap = new NameAssignment(namefile);
nameMap->readMap();
}
- else{
- nameMap = NULL;
- }
+ else{ nameMap = NULL; }
readMatrix->read(nameMap);
list = readMatrix->getListVector();
SparseDistanceMatrix* dMatrix = readMatrix->getDMatrix();
//make treemap
- tmap = new TreeMap();
-
- if (m->control_pressed) { return 0; }
-
- tmap->makeSim(list);
+ ct = new CountTable();
+ set<string> nameMap;
+ map<string, string> groupMap;
+ set<string> gps;
+ for (int i = 0; i < list->getNumBins(); i++) {
+ string bin = list->get(i);
+ nameMap.insert(bin);
+ gps.insert(bin);
+ groupMap[bin] = bin;
+ }
+ ct->createTable(nameMap, groupMap, gps);
- vector<string> namesGroups = tmap->getNamesOfGroups();
+ vector<string> namesGroups = ct->getNamesOfGroups();
m->setGroups(namesGroups);
//clear globaldatas old tree names if any
Tree* TreeGroupCommand::createTree(vector< vector<double> >& simMatrix){
try {
//create tree
- t = new Tree(tmap, simMatrix);
+ t = new Tree(ct, simMatrix);
if (m->control_pressed) { delete t; t = NULL; return t; }
//assemble tree
- map<string, string> empty;
- t->assembleTree(empty);
+ t->assembleTree();
return t;
}
#include "groupmap.h"
#include "validcalculator.h"
#include "tree.h"
-#include "treemap.h"
+#include "counttable.h"
#include "readmatrix.hpp"
#include "readcolumn.h"
#include "readphylip.h"
NameAssignment* nameMap;
ListVector* list;
- TreeMap* tmap;
+ CountTable* ct;
Tree* t;
InputData* input;
vector<Calculator*> treeCalculators;
}
}
-/************************************************************/
+/************************************************************
void TreeMap::setIndex(string seq, int index) {
it = treemap.find(seq);
if (it != treemap.end()) { //sequence name was in group file
treemap[seq].groupname = "not found";
}
}
-/************************************************************/
+/************************************************************
int TreeMap::getIndex(string seq) {
it = treemap.find(seq);
int readMap(string);
int getNumGroups();
int getNumSeqs();
- void setIndex(string, int); //sequencename, index
- int getIndex(string); //returns vector index of sequence
+ //void setIndex(string, int); //sequencename, index
+ //int getIndex(string); //returns vector index of sequence
bool isValidGroup(string); //return true if string is a valid group
void removeSeq(string); //removes a sequence, this is to accomadate trees that do not contain all the seqs in your groupfile
string getGroup(string);
#include "treereader.h"
#include "readtree.h"
+#include "groupmap.h"
/***********************************************************************/
-
-TreeReader::TreeReader(string tf) : treefile(tf) {
+TreeReader::TreeReader(string tf, string cf) : treefile(tf), countfile(cf) {
try {
m = MothurOut::getInstance();
+ ct = new CountTable();
+ ct->readTable(cf);
+
+ //if no groupinfo in count file we need to add it
+ if (!ct->hasGroupInfo()) {
+ ct->addGroup("Group1");
+ vector<string> namesOfSeqs = ct->getNamesOfSeqs();
+ for (int i = 0; i < namesOfSeqs.size(); i++) {
+ ct->setAbund(namesOfSeqs[i], "Group1", ct->getNumSeqs(namesOfSeqs[i]));
+ }
+ }
namefile = "";
groupfile = "";
readTrees();
}
}
/***********************************************************************/
-
-TreeReader::TreeReader(string tf, string gf) : treefile(tf), groupfile(gf) {
- try {
- m = MothurOut::getInstance();
- namefile = "";
- readTrees();
- }
- catch(exception& e) {
- m->errorOut(e, "TreeReader", "TreeReader");
- exit(1);
- }
-}
-/***********************************************************************/
TreeReader::TreeReader(string tf, string gf, string nf) : treefile(tf), groupfile(gf), namefile(nf) {
try {
m = MothurOut::getInstance();
+ countfile = "";
+ ct = new CountTable();
+ if (namefile != "") { ct->createTable(namefile, groupfile, true); }
+ else {
+ Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
+ set<string> nameMap;
+ map<string, string> groupMap;
+ set<string> gps;
+ for (int i = 0; i < m->Treenames.size(); i++) { nameMap.insert(m->Treenames[i]); }
+ if (groupfile == "") { gps.insert("Group1"); for (int i = 0; i < m->Treenames.size(); i++) { groupMap[m->Treenames[i]] = "Group1"; } }
+ else {
+ GroupMap g(groupfile);
+ g.readMap();
+ vector<string> seqs = g.getNamesSeqs();
+ for (int i = 0; i < seqs.size(); i++) {
+ string group = g.getGroup(seqs[i]);
+ groupMap[seqs[i]] = group;
+ gps.insert(group);
+ }
+ }
+ ct->createTable(nameMap, groupMap, gps);
+ }
+
readTrees();
}
catch(exception& e) {
bool TreeReader::readTrees() {
try {
- tmap = new TreeMap();
- if (groupfile != "") { tmap->readMap(groupfile); }
- else{ //fake out by putting everyone in one group
- Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
- for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
- }
-
- int numUniquesInName = 0;
- if (namefile != "") { numUniquesInName = readNamesFile(); }
+ int numUniquesInName = ct->getNumUniqueSeqs();
+ //if (namefile != "") { numUniquesInName = readNamesFile(); }
ReadTree* read = new ReadNewickTree(treefile);
- int readOk = read->read(tmap);
+ int readOk = read->read(ct);
if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete read; m->control_pressed=true; return 0; }
- read->AssembleTrees(names);
+ read->AssembleTrees();
trees = read->getTrees();
delete read;
//if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
int numNamesInTree;
if (namefile != "") {
- if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
+ if (numUniquesInName == m->Treenames.size()) { numNamesInTree = ct->getNumSeqs(); }
else { numNamesInTree = m->Treenames.size(); }
}else { numNamesInTree = m->Treenames.size(); }
//output any names that are in group file but not in tree
- if (numNamesInTree < tmap->getNumSeqs()) {
- for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
+ if (numNamesInTree < ct->getNumSeqs()) {
+ vector<string> namesSeqsCt = ct->getNamesOfSeqs();
+ for (int i = 0; i < namesSeqsCt.size(); i++) {
//is that name in the tree?
int count = 0;
for (int j = 0; j < m->Treenames.size(); j++) {
- if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
+ if (namesSeqsCt[i] == m->Treenames[j]) { break; } //found it
count++;
}
//then you did not find it so report it
if (count == m->Treenames.size()) {
- //if it is in your namefile then don't remove
- map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
-
- if (it == nameMap.end()) {
- m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
- tmap->removeSeq(tmap->namesOfSeqs[i]);
- i--; //need this because removeSeq removes name from namesOfSeqs
- }
+ m->mothurOut(namesSeqsCt[i] + " is in your name or group file and not in your tree. It will be disregarded."); m->mothurOutEndLine();
+ ct->remove(namesSeqsCt[i]);
}
}
}
exit(1);
}
}
-/*****************************************************************/
-int TreeReader::readNamesFile() {
- try {
- nameMap.clear();
- names.clear();
- int numUniquesInName = 0;
-
- ifstream in;
- m->openInputFile(namefile, in);
-
- string first, second;
- map<string, string>::iterator itNames;
-
- while(!in.eof()) {
- in >> first >> second; m->gobble(in);
-
- numUniquesInName++;
-
- itNames = nameMap.find(first);
- if (itNames == nameMap.end()) {
- names[first] = second;
-
- //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
- vector<string> dupNames;
- m->splitAtComma(second, dupNames);
-
- for (int i = 0; i < dupNames.size(); i++) {
- nameMap[dupNames[i]] = first;
- if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); }
- }
- }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); nameMap.clear(); names.clear(); namefile = ""; return 1; }
- }
- in.close();
-
- return numUniquesInName;
- }
- catch(exception& e) {
- m->errorOut(e, "TreeReader", "readNamesFile");
- exit(1);
- }
-}
/***********************************************************************/
#include "mothurout.h"
#include "tree.h"
+#include "counttable.h"
class TreeReader {
public:
- TreeReader(string tf);
- TreeReader(string tf, string gf);
+ TreeReader(string tf, string cf);
TreeReader(string tf, string gf, string nf);
~TreeReader() {}
vector<Tree*> getTrees() { return trees; }
- map<string, string> getNames() { return nameMap; } //dups -> unique
- map<string, string> getNameMap() { return names; } //unique -> dups list
-
private:
MothurOut* m;
vector<Tree*> trees;
- TreeMap* tmap;
- map<string, string> nameMap; //dupName -> uniqueName
- map<string, string> names;
+ CountTable* ct;
+ //map<string, string> nameMap; //dupName -> uniqueName
+ // map<string, string> names;
- string treefile, groupfile, namefile;
+ string treefile, groupfile, namefile, countfile;
bool readTrees();
int readNamesFile();
while(!inOligos.eof()){
inOligos >> type;
-
+
+ if (m->debug) { m->mothurOut("[DEBUG]: reading type - " + type + ".\n"); }
+
if(type[0] == '#'){
while (!inOligos.eof()) { char c = inOligos.get(); if (c == 10 || c == 13){ break; } } // get rest of line if there's any crap there
m->gobble(inOligos);
for(int i=0;i<type.length();i++){ type[i] = toupper(type[i]); }
inOligos >> oligo;
+
+ if (m->debug) { m->mothurOut("[DEBUG]: reading - " + oligo + ".\n"); }
for(int i=0;i<oligo.length();i++){
oligo[i] = toupper(oligo[i]);
map<string, int>::iterator itPrime = primers.find(oligo);
if (itPrime != primers.end()) { m->mothurOut("primer " + oligo + " is in your oligos file already."); m->mothurOutEndLine(); }
+ if (m->debug) { if (group != "") { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); }else{ m->mothurOut("[DEBUG]: no group for primer " + oligo + ".\n"); } }
+
primers[oligo]=indexPrimer; indexPrimer++;
primerNameVector.push_back(group);
}
//then this is illumina data with 4 columns
if (temp != "") {
- string reverseBarcode = reverseOligo(group); //reverse barcode
- group = temp;
+ for(int i=0;i<group.length();i++){
+ group[i] = toupper(group[i]);
+ if(group[i] == 'U') { group[i] = 'T'; }
+ }
+
+ if (m->debug) { m->mothurOut("[DEBUG]: reading reverse " + group + ", and group = " + temp + ".\n"); }
+
+ string reverseBarcode = reverseOligo(group); //reverse barcode
//check for repeat barcodes
map<string, int>::iterator itBar = rbarcodes.find(reverseBarcode);
- if (itBar != rbarcodes.end()) { m->mothurOut("barcode " + reverseBarcode + " is in your oligos file already."); m->mothurOutEndLine(); }
-
+ if (itBar != rbarcodes.end()) { m->mothurOut("reverse barcode " + group + " is in your oligos file already."); m->mothurOutEndLine(); }
+
+ group = temp;
rbarcodes[reverseBarcode]=indexBarcode;
- }
+ }else { if (m->debug) { m->mothurOut("[DEBUG]: reading group " + group + ".\n"); } }
//check for repeat barcodes
map<string, int>::iterator itBar = barcodes.find(oligo);
break;
}
}
-
+
if (allBlank) {
m->mothurOut("[WARNING]: your oligos file does not contain any group names. mothur will not create a groupfile."); m->mothurOutEndLine();
allFiles = false;
vector<string> UnifracUnweightedCommand::setParameters(){
try {
CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
string UnifracUnweightedCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The unifrac.unweighted command parameters are tree, group, name, groups, iters, distance, processors, root and random. tree parameter is required unless you have valid current tree file.\n";
+ helpString += "The unifrac.unweighted command parameters are tree, group, name, count, groups, iters, distance, processors, root and random. tree parameter is required unless you have valid current tree file.\n";
helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 1 valid group.\n";
helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
helpString += "The distance parameter allows you to create a distance file from the results. The default is false. You may set distance to lt, square or column.\n";
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["name"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
//check for required parameters
if (namefile == "not open") { namefile = ""; abort = true; }
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
consensus = m->isTrue(temp);
if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; }
- if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; }
+ if (countfile == "") { if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; } }
+ else {
+ CountTable testCt;
+ if ((!testCt.testGroups(countfile)) && (subsample)) {
+ m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;
+ }
+ }
if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
m->setGroups(Groups);
}
- if (namefile == "") {
- vector<string> files; files.push_back(treefile);
- parser.getNameFile(files);
- }
+ if (countfile=="") {
+ if (namefile == "") {
+ vector<string> files; files.push_back(treefile);
+ parser.getNameFile(files);
+ }
+ }
}
}
m->setTreeFile(treefile);
- TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
+ TreeReader* reader;
+ if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
+ else { reader = new TreeReader(treefile, countfile); }
T = reader->getTrees();
- tmap = T[0]->getTreeMap();
- map<string, string> nameMap = reader->getNames();
- map<string, string> unique2Dup = reader->getNameMap();
- delete reader;
+ ct = T[0]->getCountTable();
+ delete reader;
sumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + getOutputFileNameTag("uwsummary");
outputNames.push_back(sumFile); outputTypes["uwsummary"].push_back(sumFile);
SharedUtil util;
Groups = m->getGroups();
- vector<string> namesGroups = tmap->getNamesOfGroups();
+ vector<string> namesGroups = ct->getNamesOfGroups();
util.setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze
Unweighted unweighted(includeRoot);
//user has not set size, set size = smallest samples size
if (subsampleSize == -1) {
vector<string> temp; temp.push_back(Groups[0]);
- subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
+ subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
for (int i = 1; i < Groups.size(); i++) {
- temp.clear(); temp.push_back(Groups[i]);
- int thisSize = (tmap->getNamesSeqs(temp)).size();
+ int thisSize = ct->getGroupCount(Groups[i]);
if (thisSize < subsampleSize) { subsampleSize = thisSize; }
}
m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
vector<string> newGroups = Groups;
Groups.clear();
for (int i = 0; i < newGroups.size(); i++) {
- vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
- vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
- int thisSize = thisGroupsSeqs.size();
+ int thisSize = ct->getGroupCount(newGroups[i]);
if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); }
else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
//get pscores for users trees
for (int i = 0; i < T.size(); i++) {
- if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+ if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
counter = 0;
userData = unweighted.getValues(T[i], processors, outputDir); //userData[0] = unweightedscore
- if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }return 0; }
+ if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }return 0; }
//output scores for each combination
for(int k = 0; k < numComp; k++) {
if (random) { runRandomCalcs(T[i], userData); }
- if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+ if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
int startSubsample = time(NULL);
if (m->control_pressed) { break; }
//copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
- TreeMap* newTmap = new TreeMap();
- //newTmap->getCopy(*tmap);
-
- //SubSample sample;
- //Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
-
+ CountTable* newCt = new CountTable();
+
//uses method of setting groups to doNotIncludeMe
SubSample sample;
- Tree* subSampleTree = sample.getSample(T[i], tmap, newTmap, subsampleSize, unique2Dup);
+ Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
//call new weighted function
vector<double> iterData; iterData.resize(numComp,0);
Unweighted thisUnweighted(includeRoot);
iterData = thisUnweighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore
-
+
//save data to make ave dist, std dist
calcDistsTotals.push_back(iterData);
- delete newTmap;
+ delete newCt;
delete subSampleTree;
if((thisIter+1) % 100 == 0){ m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine(); }
}
- m->mothurOut("It took " + toString(time(NULL) - startSubsample) + " secs to run the subsampling."); m->mothurOutEndLine();
+ if (subsample) { m->mothurOut("It took " + toString(time(NULL) - startSubsample) + " secs to run the subsampling."); m->mothurOutEndLine(); }
- if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+ if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; }if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); }
if (consensus) { getConsensusTrees(calcDistsTotals, i); }
outSum.close();
- delete tmap;
+ delete ct;
for (int i = 0; i < T.size(); i++) { delete T[i]; }
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
//find standard deviation
vector<double> stdDev; stdDev.resize(numComp, 0);
- for (int thisIter = 0; thisIter < iters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
+ for (int thisIter = 0; thisIter < subsampleIters; thisIter++) { //compute the difference of each dist from the mean, and square the result of each
for (int j = 0; j < dists[thisIter].size(); j++) {
stdDev[j] += ((dists[thisIter][j] - averages[j]) * (dists[thisIter][j] - averages[j]));
}
m->runParse = false;
//create treemap class from groupmap for tree class to use
- TreeMap newTmap;
- newTmap.makeSim(m->getGroups());
+ CountTable newCt;
+ set<string> nameMap;
+ map<string, string> groupMap;
+ set<string> gps;
+ for (int i = 0; i < m->getGroups().size(); i++) {
+ nameMap.insert(m->getGroups()[i]);
+ gps.insert(m->getGroups()[i]);
+ groupMap[m->getGroups()[i]] = m->getGroups()[i];
+ }
+ newCt.createTable(nameMap, groupMap, gps);
//clear old tree names if any
m->Treenames.clear();
//fills globaldatas tree names
m->Treenames = m->getGroups();
- vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
+ vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
if (m->control_pressed) { return 0; }
}
/**************************************************************************************************/
-vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
+vector<Tree*> UnifracUnweightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
try {
vector<Tree*> trees;
}
//create tree
- Tree* tempTree = new Tree(&mytmap, sims);
- map<string, string> empty;
- tempTree->assembleTree(empty);
+ Tree* tempTree = new Tree(&myct, sims);
+ tempTree->assembleTree();
trees.push_back(tempTree);
#include "command.hpp"
#include "unweighted.h"
-#include "treemap.h"
+#include "counttable.h"
#include "sharedutilities.h"
#include "fileoutput.h"
#include "readtree.h"
private:
FileOutput* output;
vector<Tree*> T; //user trees
- TreeMap* tmap;
+ CountTable* ct;
string sumFile, allGroups;
vector<string> groupComb; // AB. AC, BC...
int iters, numGroups, numComp, counter, processors, subsampleSize, subsampleIters;
vector< map<float, float> > rCumul; //map <unweighted score, cumulative percentage of number of random trees with that score or higher.> -vector entry for each combination.
bool abort, phylip, random, includeRoot, consensus, subsample;
- string groups, itersString, outputDir, outputForm, treefile, groupfile, namefile;
+ string groups, itersString, outputDir, outputForm, treefile, groupfile, namefile, countfile;
vector<string> Groups, outputNames; //holds groups to be used
ofstream outSum, out;
void printUWSummaryFile(int);
void printUnweightedFile();
void createPhylipFile(int);
- vector<Tree*> buildTrees(vector< vector<double> >&, int, TreeMap&);
+ vector<Tree*> buildTrees(vector< vector<double> >&, int, CountTable&);
int getConsensusTrees(vector< vector<double> >&, int);
int getAverageSTDMatrices(vector< vector<double> >&, int);
vector<string> UnifracWeightedCommand::setParameters(){
try {
CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
- CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters);
CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors);
string UnifracWeightedCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The unifrac.weighted command parameters are tree, group, name, groups, iters, distance, processors, root, subsample, consensus and random. tree parameter is required unless you have valid current tree file.\n";
+ helpString += "The unifrac.weighted command parameters are tree, group, name, count, groups, iters, distance, processors, root, subsample, consensus and random. tree parameter is required unless you have valid current tree file.\n";
helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups.\n";
helpString += "The group names are separated by dashes. The iters parameter allows you to specify how many random trees you would like compared to your tree.\n";
helpString += "The distance parameter allows you to create a distance file from the results. The default is false.\n";
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["name"] = inputDir + it->second; }
}
+
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
}
//check for required parameters
else if (namefile == "not found") { namefile = ""; }
else { m->setNameFile(namefile); }
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not open") { countfile = ""; abort = true; }
+ else if (countfile == "not found") { countfile = ""; }
+ else { m->setCountTableFile(countfile); }
+
+ if ((namefile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
+ }
+
+ if ((groupfile != "") && (countfile != "")) {
+ m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
+ }
+
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
consensus = m->isTrue(temp);
if (subsample && random) { m->mothurOut("[ERROR]: random must be false, if subsample=t.\n"); abort=true; }
- if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; }
+ if (countfile == "") { if (subsample && (groupfile == "")) { m->mothurOut("[ERROR]: if subsample=t, a group file must be provided.\n"); abort=true; } }
+ else {
+ CountTable testCt;
+ if ((!testCt.testGroups(countfile)) && (subsample)) {
+ m->mothurOut("[ERROR]: if subsample=t, a count file with group info must be provided.\n"); abort=true;
+ }
+ }
if (subsample && (!phylip)) { phylip=true; outputForm = "lt"; }
if (consensus && (!subsample)) { m->mothurOut("[ERROR]: you cannot use consensus without subsample.\n"); abort=true; }
- if (namefile == "") {
- vector<string> files; files.push_back(treefile);
- parser.getNameFile(files);
- }
+ if (countfile=="") {
+ if (namefile == "") {
+ vector<string> files; files.push_back(treefile);
+ parser.getNameFile(files);
+ }
+ }
}
m->setTreeFile(treefile);
- TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
+ TreeReader* reader;
+ if (countfile == "") { reader = new TreeReader(treefile, groupfile, namefile); }
+ else { reader = new TreeReader(treefile, countfile); }
T = reader->getTrees();
- tmap = T[0]->getTreeMap();
- map<string, string> nameMap = reader->getNames();
- map<string, string> unique2Dup = reader->getNameMap();
+ ct = T[0]->getCountTable();
delete reader;
-
- if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
+
+ if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
sumFile = outputDir + m->getSimpleName(treefile) + getOutputFileNameTag("wsummary");
m->openOutputFile(sumFile, outSum);
SharedUtil util;
string s; //to make work with setgroups
Groups = m->getGroups();
- vector<string> nameGroups = tmap->getNamesOfGroups();
+ vector<string> nameGroups = ct->getNamesOfGroups();
util.setGroups(Groups, nameGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze
m->setGroups(Groups);
- if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
+ if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; }
Weighted weighted(includeRoot);
//user has not set size, set size = smallest samples size
if (subsampleSize == -1) {
vector<string> temp; temp.push_back(Groups[0]);
- subsampleSize = (tmap->getNamesSeqs(temp)).size(); //num in first group
+ subsampleSize = ct->getGroupCount(Groups[0]); //num in first group
for (int i = 1; i < Groups.size(); i++) {
- temp.clear(); temp.push_back(Groups[i]);
- int thisSize = (tmap->getNamesSeqs(temp)).size();
+ int thisSize = ct->getGroupCount(Groups[i]);
if (thisSize < subsampleSize) { subsampleSize = thisSize; }
}
m->mothurOut("\nSetting subsample size to " + toString(subsampleSize) + ".\n\n");
vector<string> newGroups = Groups;
Groups.clear();
for (int i = 0; i < newGroups.size(); i++) {
- vector<string> thisGroup; thisGroup.push_back(newGroups[i]);
- vector<string> thisGroupsSeqs = tmap->getNamesSeqs(thisGroup);
- int thisSize = thisGroupsSeqs.size();
+ int thisSize = ct->getGroupCount(newGroups[i]);
if (thisSize >= subsampleSize) { Groups.push_back(newGroups[i]); }
- else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
+ else { m->mothurOut("You have selected a size that is larger than "+newGroups[i]+" number of sequences, removing "+newGroups[i]+".\n"); }
}
m->setGroups(Groups);
}
//get weighted scores for users trees
for (int i = 0; i < T.size(); i++) {
- if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+ if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
counter = 0;
rScores.resize(numComp); //data[0] = weightedscore AB, data[1] = weightedscore AC...
}
userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore
- if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+ if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
//save users score
for (int s=0; s<numComp; s++) {
if (m->control_pressed) { break; }
//copy to preserve old one - would do this in subsample but memory cleanup becomes messy.
- TreeMap* newTmap = new TreeMap();
- //newTmap->getCopy(*tmap);
-
- //SubSample sample;
- //Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize);
+ CountTable* newCt = new CountTable();
//uses method of setting groups to doNotIncludeMe
SubSample sample;
- Tree* subSampleTree = sample.getSample(T[i], tmap, newTmap, subsampleSize, unique2Dup);
-
+ Tree* subSampleTree = sample.getSample(T[i], ct, newCt, subsampleSize);
+
//call new weighted function
vector<double> iterData; iterData.resize(numComp,0);
Weighted thisWeighted(includeRoot);
//save data to make ave dist, std dist
calcDistsTotals.push_back(iterData);
- delete newTmap;
+ delete newCt;
delete subSampleTree;
if((thisIter+1) % 100 == 0){ m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine(); }
}
- if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+ if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
if (subsample) { getAverageSTDMatrices(calcDistsTotals, i); }
if (consensus) { getConsensusTrees(calcDistsTotals, i); }
}
- if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+ if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
if (phylip) { createPhylipFile(); }
//clear out users groups
m->clearGroups();
- delete tmap;
+ delete ct;
for (int i = 0; i < T.size(); i++) { delete T[i]; }
if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
//used in tree constructor
m->runParse = false;
- //create treemap class from groupmap for tree class to use
- TreeMap newTmap;
- newTmap.makeSim(m->getGroups());
+ ///create treemap class from groupmap for tree class to use
+ CountTable newCt;
+ set<string> nameMap;
+ map<string, string> groupMap;
+ set<string> gps;
+ for (int i = 0; i < m->getGroups().size(); i++) {
+ nameMap.insert(m->getGroups()[i]);
+ gps.insert(m->getGroups()[i]);
+ groupMap[m->getGroups()[i]] = m->getGroups()[i];
+ }
+ newCt.createTable(nameMap, groupMap, gps);
//clear old tree names if any
m->Treenames.clear();
//fills globaldatas tree names
m->Treenames = m->getGroups();
- vector<Tree*> newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created
+ vector<Tree*> newTrees = buildTrees(dists, treeNum, newCt); //also creates .all.tre file containing the trees created
if (m->control_pressed) { return 0; }
}
/**************************************************************************************************/
-vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, TreeMap& mytmap) {
+vector<Tree*> UnifracWeightedCommand::buildTrees(vector< vector<double> >& dists, int treeNum, CountTable& myct) {
try {
vector<Tree*> trees;
}
//create tree
- Tree* tempTree = new Tree(&mytmap, sims);
- map<string, string> empty;
- tempTree->assembleTree(empty);
+ Tree* tempTree = new Tree(&myct, sims);
+ tempTree->assembleTree();
trees.push_back(tempTree);
//get scores for random trees
for (int j = 0; j < iters; j++) {
-
+ cout << j << endl;
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
if(processors == 1){
driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores);
#endif
- if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+ if (m->control_pressed) { delete ct; for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
//report progress
// m->mothurOut("Iter: " + toString(j+1)); m->mothurOutEndLine();
/**************************************************************************************************/
int UnifracWeightedCommand::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, vector< vector<double> >& scores) {
try {
- Tree* randT = new Tree(tmap);
+ Tree* randT = new Tree(ct);
Weighted weighted(includeRoot);
#include "command.hpp"
#include "weighted.h"
-#include "treemap.h"
+#include "counttable.h"
#include "progress.hpp"
#include "sharedutilities.h"
#include "fileoutput.h"
linePair(int i, int j) : start(i), num(j) {}
};
vector<linePair> lines;
- TreeMap* tmap;
+ CountTable* ct;
FileOutput* output;
vector<Tree*> T; //user trees
vector<double> utreeScores; //user tree unweighted scores
map<float, float> validScores; //map contains scores from random
bool abort, phylip, random, includeRoot, subsample, consensus;
- string groups, itersString, outputForm, treefile, groupfile, namefile;
+ string groups, itersString, outputForm, treefile, groupfile, namefile, countfile;
vector<string> Groups, outputNames; //holds groups to be used
int processors, subsampleSize, subsampleIters;
ofstream outSum;
int createProcesses(Tree*, vector< vector<string> >, vector< vector<double> >&);
int driver(Tree*, vector< vector<string> >, int, int, vector< vector<double> >&);
int runRandomCalcs(Tree*, vector<double>);
- vector<Tree*> buildTrees(vector< vector<double> >&, int, TreeMap&);
+ vector<Tree*> buildTrees(vector< vector<double> >&, int, CountTable&);
int getConsensusTrees(vector< vector<double> >&, int);
int getAverageSTDMatrices(vector< vector<double> >&, int);
processors = p;
outputDir = o;
- TreeMap* tmap = t->getTreeMap();
+ CountTable* ct = t->getCountTable();
//if the users enters no groups then give them the score of all groups
int numGroups = m->getNumGroups();
vector<string> groups;
if (numGroups == 0) {
//get score for all users groups
- for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
- if ((tmap->getNamesOfGroups())[i] != "xxx") {
- groups.push_back((tmap->getNamesOfGroups())[i]);
+ for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) {
+ if ((ct->getNamesOfGroups())[i] != "xxx") {
+ groups.push_back((ct->getNamesOfGroups())[i]);
}
}
namesOfGroupCombos.push_back(groups);
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
if(processors == 1){
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap);
+ data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
}else{
int numPairs = namesOfGroupCombos.size();
lines.push_back(linePair(startPos, numPairsPerProcessor));
}
- data = createProcesses(t, namesOfGroupCombos, tmap);
+ data = createProcesses(t, namesOfGroupCombos, ct);
lines.clear();
}
#else
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap);
+ data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
#endif
return data;
}
/**************************************************************************************************/
-EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, TreeMap* tmap) {
+EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, CountTable* ct) {
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
int process = 1;
process++;
}else if (pid == 0){
EstOutput myresults;
- myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, tmap);
+ myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, ct);
if (m->control_pressed) { exit(0); }
}
}
- results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, tmap);
+ results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct);
//force parent to wait until all the processes are done
for (int i=0;i<(processors-1);i++) {
}
}
/**************************************************************************************************/
-EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, TreeMap* tmap) {
+EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, CountTable* ct) {
try {
processors = p;
outputDir = o;
- TreeMap* tmap = t->getTreeMap();
+ CountTable* ct = t->getCountTable();
//if the users enters no groups then give them the score of all groups
int numGroups = m->getNumGroups();
vector<string> groups;
if (numGroups == 0) {
//get score for all users groups
- for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) {
- if ((tmap->getNamesOfGroups())[i] != "xxx") {
- groups.push_back((tmap->getNamesOfGroups())[i]);
+ for (int i = 0; i < (ct->getNamesOfGroups()).size(); i++) {
+ if ((ct->getNamesOfGroups())[i] != "xxx") {
+ groups.push_back((ct->getNamesOfGroups())[i]);
}
}
namesOfGroupCombos.push_back(groups);
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
if(processors == 1){
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true, tmap);
+ data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true, ct);
}else{
int numPairs = namesOfGroupCombos.size();
lines.push_back(linePair(startPos, numPairsPerProcessor));
}
- data = createProcesses(t, namesOfGroupCombos, true, tmap);
+ data = createProcesses(t, namesOfGroupCombos, true, ct);
lines.clear();
}
#else
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true, tmap);
+ data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true, ct);
#endif
return data;
}
/**************************************************************************************************/
-EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, bool usingGroups, TreeMap* tmap) {
+EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, bool usingGroups, CountTable* ct) {
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
int process = 1;
process++;
}else if (pid == 0){
EstOutput myresults;
- myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, usingGroups, tmap);
+ myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, usingGroups, ct);
if (m->control_pressed) { exit(0); }
}
}
- results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups, tmap);
+ results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups, ct);
//force parent to wait until all the processes are done
for (int i=0;i<(processors-1);i++) {
}
}
/**************************************************************************************************/
-EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, bool usingGroups, TreeMap* tmap) {
+EstOutput Unweighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, bool usingGroups, CountTable* ct) {
try {
EstOutput results; results.resize(num);
int count = 0;
- Tree* copyTree = new Tree(tmap);
+ Tree* copyTree = new Tree(ct);
for (int h = start; h < (start+num); h++) {
*/
#include "treecalculator.h"
-#include "treemap.h"
+#include "counttable.h"
/***********************************************************************/
map< vector<string>, set<int> > rootForGrouping; //maps a grouping combo to the roots for that combo
bool includeRoot;
- EstOutput driver(Tree*, vector< vector<string> >, int, int, TreeMap*);
- EstOutput createProcesses(Tree*, vector< vector<string> >, TreeMap*);
- EstOutput driver(Tree*, vector< vector<string> >, int, int, bool, TreeMap*);
- EstOutput createProcesses(Tree*, vector< vector<string> >, bool, TreeMap*);
+ EstOutput driver(Tree*, vector< vector<string> >, int, int, CountTable*);
+ EstOutput createProcesses(Tree*, vector< vector<string> >, CountTable*);
+ EstOutput driver(Tree*, vector< vector<string> >, int, int, bool, CountTable*);
+ EstOutput createProcesses(Tree*, vector< vector<string> >, bool, CountTable*);
int getRoot(Tree*, int, vector<string>);
};
if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ". I suspect you entered a column formatted file as a phylip file, aborting."); m->mothurOutEndLine(); return "not found"; }
}
+
+ //check for blank file
+ if (ableToOpen != 1) {
+ if (m->isBlank(container[parameter])) {
+ m->mothurOut("[ERROR]: " + container[parameter] + " is blank, aborting."); m->mothurOutEndLine(); return "not found";
+ }
+ }
+
}
}else { return "not found"; }
processors = p;
outputDir = o;
- TreeMap* tmap = t->getTreeMap();
+ CountTable* ct = t->getCountTable();
numGroups = m->getNumGroups();
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
if(processors == 1){
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap);
+ data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
}else{
int numPairs = namesOfGroupCombos.size();
lines.push_back(linePair(startPos, numPairsPerProcessor));
}
- data = createProcesses(t, namesOfGroupCombos, tmap);
+ data = createProcesses(t, namesOfGroupCombos, ct);
lines.clear();
}
#else
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), tmap);
+ data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
#endif
return data;
}
/**************************************************************************************************/
-EstOutput Weighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, TreeMap* tmap) {
+EstOutput Weighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, CountTable* ct) {
try {
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
int process = 1;
}else if (pid == 0){
EstOutput Myresults;
- Myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, tmap);
+ Myresults = driver(t, namesOfGroupCombos, lines[process].start, lines[process].num, ct);
//m->mothurOut("Merging results."); m->mothurOutEndLine();
}
}
- results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, tmap);
+ results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct);
//force parent to wait until all the processes are done
for (int i=0;i<(processors-1);i++) {
}
}
/**************************************************************************************************/
-EstOutput Weighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, TreeMap* tmap) {
+EstOutput Weighted::driver(Tree* t, vector< vector<string> > namesOfGroupCombos, int start, int num, CountTable* ct) {
try {
EstOutput results;
vector<double> D;
int numSeqsInGroupI = it->second;
double sum = getLengthToRoot(t, t->groupNodeInfo[groupA][j], groupA, groupB);
- double weightedSum = ((numSeqsInGroupI * sum) / (double)tmap->seqsPerGroup[groupA]);
+ double weightedSum = ((numSeqsInGroupI * sum) / (double)ct->getGroupCount(groupA));
D[count] += weightedSum;
}
int numSeqsInGroupL = it->second;
double sum = getLengthToRoot(t, t->groupNodeInfo[groupB][j], groupA, groupB);
- double weightedSum = ((numSeqsInGroupL * sum) / (double)tmap->seqsPerGroup[groupB]);
+ double weightedSum = ((numSeqsInGroupL * sum) / (double)ct->getGroupCount(groupB));
D[count] += weightedSum;
}
it = t->tree[i].pcount.find(groupA);
//if it does u = # of its descendants with a certain group / total number in tree with a certain group
if (it != t->tree[i].pcount.end()) {
- u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA];
+ u = (double) t->tree[i].pcount[groupA] / (double) ct->getGroupCount(groupA);
}else { u = 0.00; }
//if it does subtract their percentage from u
if (it != t->tree[i].pcount.end()) {
- u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB];
+ u -= (double) t->tree[i].pcount[groupB] / (double) ct->getGroupCount(groupB);
}
if (includeRoot) {
data.clear(); //clear out old values
- TreeMap* tmap = t->getTreeMap();
+ CountTable* ct = t->getCountTable();
if (m->control_pressed) { return data; }
int numSeqsInGroupI = it->second;
double sum = getLengthToRoot(t, t->groupNodeInfo[groups[0]][j], groups[0], groups[1]);
- double weightedSum = ((numSeqsInGroupI * sum) / (double)tmap->seqsPerGroup[groups[0]]);
+ double weightedSum = ((numSeqsInGroupI * sum) / (double)ct->getGroupCount(groups[0]));
D += weightedSum;
}
int numSeqsInGroupL = it->second;
double sum = getLengthToRoot(t, t->groupNodeInfo[groups[1]][j], groups[0], groups[1]);
- double weightedSum = ((numSeqsInGroupL * sum) / (double)tmap->seqsPerGroup[groups[1]]);
+ double weightedSum = ((numSeqsInGroupL * sum) / (double)ct->getGroupCount(groups[1]));
D += weightedSum;
}
it = t->tree[i].pcount.find(groupA);
//if it does u = # of its descendants with a certain group / total number in tree with a certain group
if (it != t->tree[i].pcount.end()) {
- u = (double) t->tree[i].pcount[groupA] / (double) tmap->seqsPerGroup[groupA];
+ u = (double) t->tree[i].pcount[groupA] / (double) ct->getGroupCount(groupA);
}else { u = 0.00; }
it = t->tree[i].pcount.find(groupB);
//if it does subtract their percentage from u
if (it != t->tree[i].pcount.end()) {
- u -= (double) t->tree[i].pcount[groupB] / (double) tmap->seqsPerGroup[groupB];
+ u -= (double) t->tree[i].pcount[groupB] / (double) ct->getGroupCount(groupB);
}
if (includeRoot) {
*/
#include "treecalculator.h"
-#include "treemap.h"
+#include "counttable.h"
/***********************************************************************/
map< vector<string>, set<int> > rootForGrouping; //maps a grouping combo to the root for that combo
bool includeRoot;
- EstOutput driver(Tree*, vector< vector<string> >, int, int, TreeMap*);
- EstOutput createProcesses(Tree*, vector< vector<string> >, TreeMap*);
+ EstOutput driver(Tree*, vector< vector<string> >, int, int, CountTable*);
+ EstOutput createProcesses(Tree*, vector< vector<string> >, CountTable*);
double getLengthToRoot(Tree*, int, string, string);
};