*/
#include "phylodiversitycommand.h"
+#include "treereader.h"
//**********************************************************************************************************************
vector<string> PhyloDiversityCommand::setParameters(){
}
}
- m->runParse = true;
- m->Groups.clear();
- m->namesOfGroups.clear();
- m->Treenames.clear();
- m->names.clear();
-
//check for required parameters
treefile = validParameter.validFile(parameters, "tree", true);
- if (treefile == "not open") { abort = true; }
+ if (treefile == "not open") { treefile = ""; abort = true; }
else if (treefile == "not found") {
//if there is a current design file, use it
treefile = m->getTreeFile();
if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); }
else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; }
- }
+ }else { m->setTreeFile(treefile); }
//check for required parameters
groupfile = validParameter.validFile(parameters, "group", true);
if (groupfile == "not open") { groupfile = ""; abort = true; }
else if (groupfile == "not found") { groupfile = ""; }
+ else { m->setGroupFile(groupfile); }
namefile = validParameter.validFile(parameters, "name", true);
- if (namefile == "not open") { abort = true; }
+ if (namefile == "not open") { namefile = ""; abort = true; }
else if (namefile == "not found") { namefile = ""; }
+ else { m->setNameFile(namefile); }
outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(treefile); }
string temp;
temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; }
- convert(temp, freq);
+ m->mothurConvert(temp, freq);
temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; }
- convert(temp, iters);
+ m->mothurConvert(temp, iters);
temp = validParameter.validFile(parameters, "rarefy", false); if (temp == "not found") { temp = "F"; }
rarefy = m->isTrue(temp);
temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
m->setProcessors(temp);
- convert(temp, processors);
+ m->mothurConvert(temp, processors);
groups = validParameter.validFile(parameters, "groups", false);
if (groups == "not found") { groups = ""; }
else {
m->splitAtDash(groups, Groups);
- m->Groups = Groups;
+ m->setGroups(Groups);
}
if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; }
+
+ if (namefile == "") {
+ vector<string> files; files.push_back(treefile);
+ parser.getNameFile(files);
+ }
}
}
if (abort == true) { if (calledHelp) { return 0; } return 2; }
m->setTreeFile(treefile);
-
- if (groupfile != "") {
- //read in group map info.
- tmap = new TreeMap(groupfile);
- tmap->readMap();
- }else{ //fake out by putting everyone in one group
- Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap
- tmap = new TreeMap();
-
- for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); }
- }
-
- if (namefile != "") { readNamesFile(); }
-
- read = new ReadNewickTree(treefile);
- int readOk = read->read(tmap);
-
- if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; }
-
- read->AssembleTrees();
- vector<Tree*> trees = read->getTrees();
- delete read;
-
- //make sure all files match
- //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
- int numNamesInTree;
- if (namefile != "") {
- if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); }
- else { numNamesInTree = m->Treenames.size(); }
- }else { numNamesInTree = m->Treenames.size(); }
-
-
- //output any names that are in group file but not in tree
- if (numNamesInTree < tmap->getNumSeqs()) {
- for (int i = 0; i < tmap->namesOfSeqs.size(); i++) {
- //is that name in the tree?
- int count = 0;
- for (int j = 0; j < m->Treenames.size(); j++) {
- if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it
- count++;
- }
-
- if (m->control_pressed) {
- delete tmap; for (int i = 0; i < trees.size(); i++) { delete trees[i]; }
- for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } outputTypes.clear();
- m->Groups.clear();
- return 0;
- }
-
- //then you did not find it so report it
- if (count == m->Treenames.size()) {
- //if it is in your namefile then don't remove
- map<string, string>::iterator it = nameMap.find(tmap->namesOfSeqs[i]);
-
- if (it == nameMap.end()) {
- m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine();
- tmap->removeSeq(tmap->namesOfSeqs[i]);
- i--; //need this because removeSeq removes name from namesOfSeqs
- }
- }
- }
- }
-
- SharedUtil* util = new SharedUtil();
- util->setGroups(m->Groups, tmap->namesOfGroups, "phylo.diversity"); //sets the groups the user wants to analyze
- delete util;
+ TreeReader* reader = new TreeReader(treefile, groupfile, namefile);
+ vector<Tree*> trees = reader->getTrees();
+ tmap = trees[0]->getTreeMap();
+ delete reader;
+
+ SharedUtil util;
+ vector<string> mGroups = m->getGroups();
+ vector<string> tGroups = tmap->getNamesOfGroups();
+ util.setGroups(mGroups, tGroups, "phylo.diversity"); //sets the groups the user wants to analyze
//incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed
- for (int i = 0; i < m->Groups.size(); i++) { if (m->Groups[i] == "xxx") { m->Groups.erase(m->Groups.begin()+i); break; } }
+ for (int i = 0; i < mGroups.size(); i++) { if (mGroups[i] == "xxx") { mGroups.erase(mGroups.begin()+i); break; } }
+ m->setGroups(mGroups);
vector<string> outputNames;
//for each of the users trees
for(int i = 0; i < trees.size(); i++) {
- if (m->control_pressed) { delete tmap; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) { remove(outputNames[j].c_str()); } return 0; }
+ if (m->control_pressed) { delete tmap; for (int j = 0; j < trees.size(); j++) { delete trees[j]; } for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; }
ofstream outSum, outRare, outCollect;
string outSumFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(i+1) + ".phylodiv.summary";
//create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator
vector<int> randomLeaf;
for (int j = 0; j < numLeafNodes; j++) {
- if (m->inUsersGroups(trees[i]->tree[j].getGroup(), m->Groups) == true) { //is this a node from the group the user selected.
+ if (m->inUsersGroups(trees[i]->tree[j].getGroup(), mGroups) == true) { //is this a node from the group the user selected.
randomLeaf.push_back(j);
}
}
//find largest group total
int largestGroup = 0;
- for (int j = 0; j < m->Groups.size(); j++) {
- if (tmap->seqsPerGroup[m->Groups[j]] > largestGroup) { largestGroup = tmap->seqsPerGroup[m->Groups[j]]; }
+ for (int j = 0; j < mGroups.size(); j++) {
+ if (tmap->seqsPerGroup[mGroups[j]] > largestGroup) { largestGroup = tmap->seqsPerGroup[mGroups[j]]; }
//initialize diversity
- diversity[m->Groups[j]].resize(tmap->seqsPerGroup[m->Groups[j]]+1, 0.0); //numSampled
+ diversity[mGroups[j]].resize(tmap->seqsPerGroup[mGroups[j]]+1, 0.0); //numSampled
//groupA 0.0 0.0
//initialize sumDiversity
- sumDiversity[m->Groups[j]].resize(tmap->seqsPerGroup[m->Groups[j]]+1, 0.0);
+ sumDiversity[mGroups[j]].resize(tmap->seqsPerGroup[mGroups[j]]+1, 0.0);
}
//convert freq percentage to number
if(largestGroup % increment != 0){ numSampledList.insert(largestGroup); }
//add other groups ending points
- for (int j = 0; j < m->Groups.size(); j++) {
- if (numSampledList.count(diversity[m->Groups[j]].size()-1) == 0) { numSampledList.insert(diversity[m->Groups[j]].size()-1); }
+ for (int j = 0; j < mGroups.size(); j++) {
+ if (numSampledList.count(diversity[mGroups[j]].size()-1) == 0) { numSampledList.insert(diversity[mGroups[j]].size()-1); }
}
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
if(processors == 1){
driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);
}else{
}
- if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
m->mothurOutEndLine();
m->mothurOut("Output File Names: "); m->mothurOutEndLine();
//**********************************************************************************************************************
int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum){
try {
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
int process = 1;
vector<int> processIDS;
}
in.close();
- remove(inTemp.c_str());
+ m->mothurRemove(inTemp);
}
#endif
int PhyloDiversityCommand::driver(Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum, bool doSumCollect){
try {
int numLeafNodes = randomLeaf.size();
+ vector<string> mGroups = m->getGroups();
for (int l = 0; l < numIters; l++) {
random_shuffle(randomLeaf.begin(), randomLeaf.end());
//initialize counts
map<string, int> counts;
map< string, set<int> > countedBranch;
- for (int j = 0; j < m->Groups.size(); j++) { counts[m->Groups[j]] = 0; countedBranch[m->Groups[j]].insert(-2); } //add dummy index to initialize countedBranch sets
+ for (int j = 0; j < mGroups.size(); j++) { counts[mGroups[j]] = 0; countedBranch[mGroups[j]].insert(-2); } //add dummy index to initialize countedBranch sets
for(int k = 0; k < numLeafNodes; k++){
if (rarefy) {
//add this diversity to the sum
- for (int j = 0; j < m->Groups.size(); j++) {
- for (int g = 0; g < div[m->Groups[j]].size(); g++) {
- sumDiv[m->Groups[j]][g] += div[m->Groups[j]][g];
+ for (int j = 0; j < mGroups.size(); j++) {
+ for (int g = 0; g < div[mGroups[j]].size(); g++) {
+ sumDiv[mGroups[j]][g] += div[mGroups[j]][g];
}
}
}
out << "Groups\tnumSampled\tphyloDiversity" << endl;
out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
-
- for (int j = 0; j < m->Groups.size(); j++) {
- int numSampled = (div[m->Groups[j]].size()-1);
- out << m->Groups[j] << '\t' << numSampled << '\t';
+
+ vector<string> mGroups = m->getGroups();
+ for (int j = 0; j < mGroups.size(); j++) {
+ int numSampled = (div[mGroups[j]].size()-1);
+ out << mGroups[j] << '\t' << numSampled << '\t';
float score;
- if (scale) { score = (div[m->Groups[j]][numSampled] / (float)numIters) / (float)numSampled; }
- else { score = div[m->Groups[j]][numSampled] / (float)numIters; }
+ if (scale) { score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; }
+ else { score = div[mGroups[j]][numSampled] / (float)numIters; }
out << setprecision(4) << score << endl;
}
try {
out << "numSampled\t";
- for (int i = 0; i < m->Groups.size(); i++) { out << m->Groups[i] << '\t'; }
+ vector<string> mGroups = m->getGroups();
+ for (int i = 0; i < mGroups.size(); i++) { out << mGroups[i] << '\t'; }
out << endl;
out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
int numSampled = *it;
out << numSampled << '\t';
-
- for (int j = 0; j < m->Groups.size(); j++) {
- if (numSampled < div[m->Groups[j]].size()) {
+
+ for (int j = 0; j < mGroups.size(); j++) {
+ if (numSampled < div[mGroups[j]].size()) {
float score;
- if (scale) { score = (div[m->Groups[j]][numSampled] / (float)numIters) / (float)numSampled; }
- else { score = div[m->Groups[j]][numSampled] / (float)numIters; }
+ if (scale) { score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; }
+ else { score = div[mGroups[j]][numSampled] / (float)numIters; }
out << setprecision(4) << score << '\t';
}else { out << "NA" << '\t'; }
exit(1);
}
}
-/*****************************************************************/
-int PhyloDiversityCommand::readNamesFile() {
- try {
- m->names.clear();
- numUniquesInName = 0;
-
- ifstream in;
- m->openInputFile(namefile, in);
-
- string first, second;
- map<string, string>::iterator itNames;
-
- while(!in.eof()) {
- in >> first >> second; m->gobble(in);
-
- numUniquesInName++;
-
- itNames = m->names.find(first);
- if (itNames == m->names.end()) {
- m->names[first] = second;
-
- //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
- vector<string> dupNames;
- m->splitAtComma(second, dupNames);
-
- for (int i = 0; i < dupNames.size(); i++) {
- nameMap[dupNames[i]] = dupNames[i];
- if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); }
- }
- }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; }
- }
- in.close();
-
- return 0;
- }
- catch(exception& e) {
- m->errorOut(e, "PhyloDiversityCommand", "readNamesFile");
- exit(1);
- }
-}
-
//**********************************************************************************************************************