vector<string> Estimators, Groups; //holds estimators to be used
set<string> labels; //holds labels to be used
vector<string> Treenames;
+ map<string, string> names;
string getPhylipFile();
if (path.rfind("./") == -1) { return fileName; } //already complete name
else { newFileName = fileName.substr(fileName.rfind("./")+2); } //save the complete part of the name
- char* cwdpath;
+ char* cwdpath = new char[1024];
size_t size;
cwdpath=getcwd(cwdpath,size);
cwd = cwdpath;
void NameAssignment::print(ostream& out){
try {
map<string,int>::iterator it;
-cout << (*this).size() << endl;
+//cout << (*this).size() << endl;
for(it = (*this).begin(); it!=(*this).end(); it++){
out << it->first << '\t' << it->second << endl; //prints out keys and values of the map this.
}
else {
//valid paramters for this command
- string Array[] = {"tree","group","outputdir","inputdir"};
+ string Array[] = {"tree","group","name","outputdir","inputdir"};
vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
OptionParser parser(option);
//if the user has not given a path then, add inputdir. else leave path alone.
if (path == "") { parameters["group"] = inputDir + it->second; }
}
+
+ it = parameters.find("name");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["name"] = inputDir + it->second; }
+ }
+
}
globaldata->gTreemap = treeMap;
}
+ namefile = validParameter.validFile(parameters, "name", true);
+ if (namefile == "not open") { abort = true; }
+ else if (namefile == "not found") { treefile = ""; }
+ else { readNamesFile(); }
+
if (abort == false) {
filename = treefile;
read = new ReadNewickTree(filename);
T[i]->assembleTree();
}
- //output any names that are in names file but not in tree
+ //output any names that are in group file but not in tree
if (globaldata->Treenames.size() < treeMap->getNumSeqs()) {
for (int i = 0; i < treeMap->namesOfSeqs.size(); i++) {
//is that name in the tree?
//then you did not find it so report it
if (count == globaldata->Treenames.size()) {
- mothurOut(treeMap->namesOfSeqs[i] + " is in your namefile and not in your tree. It will be disregarded."); mothurOutEndLine();
- treeMap->removeSeq(treeMap->namesOfSeqs[i]);
+ //if it is in your namefile then don't remove
+ map<string, string>::iterator it = nameMap.find(treeMap->namesOfSeqs[i]);
+
+ if (it == nameMap.end()) {
+ mothurOut(treeMap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); mothurOutEndLine();
+ treeMap->removeSeq(treeMap->namesOfSeqs[i]);
+ }
}
}
}
exit(1);
}
}
+/*****************************************************************/
+int ReadTreeCommand::readNamesFile() {
+ try {
+ globaldata->names.clear();
+
+ ifstream in;
+ openInputFile(namefile, in);
+
+ string first, second;
+ map<string, string>::iterator itNames;
+
+ while(!in.eof()) {
+ in >> first >> second; gobble(in);
+
+ itNames = globaldata->names.find(first);
+ if (itNames == globaldata->names.end()) {
+ globaldata->names[first] = second;
+
+ //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them
+ vector<string> dupNames;
+ splitAtComma(second, dupNames);
+
+ for (int i = 0; i < dupNames.size(); i++) { nameMap[dupNames[i]] = dupNames[i]; }
+ }else { mothurOut(first + " has already been seen in namefile, disregarding names file."); mothurOutEndLine(); in.close(); globaldata->names.clear(); return 1; }
+ }
+ in.close();
+
+ return 0;
+ }
+ catch(exception& e) {
+ errorOut(e, "ReadTreeCommand", "readNamesFile");
+ exit(1);
+ }
+}
//**********************************************************************************************************************
GlobalData* globaldata;
ReadTree* read;
TreeMap* treeMap;
- string filename, treefile, groupfile;
+ string filename, treefile, groupfile, namefile;
bool abort;
+ map<string, string> nameMap;
+
+ int readNamesFile();
};
/*****************************************************************/
Tree::~Tree() {}
/*****************************************************************/
+void Tree::addNamesToCounts() {
+ try {
+ //ex. seq1 seq2,seq3,se4
+ // seq1 = pasture
+ // seq2 = forest
+ // seq4 = pasture
+ // seq3 = ocean
+
+ //before this function seq1.pcount = pasture -> 1
+ //after seq1.pcount = pasture -> 2, forest -> 1, ocean -> 1
+
+ //before this function seq1.pgroups = pasture -> 1
+ //after seq1.pgroups = pasture -> 1 since that is the dominant group
+
+
+ //go through each leaf and update its pcounts and pgroups
+ for (int i = 0; i < numLeaves; i++) {
+ string name = tree[i].getName();
+
+ map<string, string>::iterator itNames = globaldata->names.find(name);
+
+ if (itNames == globaldata->names.end()) { mothurOut(name + " is not in your name file, please correct."); mothurOutEndLine(); exit(1); }
+ else {
+ vector<string> dupNames;
+ splitAtComma(globaldata->names[name], dupNames);
+
+ map<string, int>::iterator itCounts;
+ int maxPars = 1;
+ for (int j = 0; j < dupNames.size(); j++) {
+
+ if (dupNames[j] != name) {//you already added yourself in the constructor
+ string group = globaldata->gTreemap->getGroup(dupNames[j]);
+
+ //update pcounts
+ itCounts = tree[i].pcount.find(group);
+ if (itCounts == tree[i].pcount.end()) { //new group, add it
+ tree[i].pcount[group] = 1;
+ }else {
+ tree[i].pcount[group]++;
+ }
+
+ //update pgroups
+ itCounts = tree[i].pGroups.find(group);
+ if (itCounts == tree[i].pGroups.end()) { //new group, add it
+ tree[i].pGroups[group] = 1;
+ }else {
+ tree[i].pGroups[group]++;
+ }
+
+ //keep highest group
+ if(tree[i].pGroups[group] > maxPars){
+ maxPars = tree[i].pGroups[group];
+ }
+ }//end if
+ }//end for
+
+ if (maxPars > 1) { //then we have some more dominant groups
+ //erase all the groups that are less than maxPars because you found a more dominant group.
+ for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();){
+ if(it->second < maxPars){
+ tree[i].pGroups.erase(it++);
+ }else { it++; }
+ }
+ //set one remaining groups to 1
+ for(it=tree[i].pGroups.begin();it!=tree[i].pGroups.end();it++){
+ tree[i].pGroups[it->first] = 1;
+ }
+ }//end if
+
+ }//end else
+ }//end for
+
+ }
+ catch(exception& e) {
+ errorOut(e, "Tree", "addNamesToCounts");
+ exit(1);
+ }
+}
+/*****************************************************************/
int Tree::getIndex(string searchName) {
try {
//Treemap knows name, group and index to speed up search
/*****************************************************************/
void Tree::assembleTree() {
try {
+
+ //if user has given a names file we want to include that info in the pgroups and pcount info.
+ if(globaldata->names.size() != 0) { addNamesToCounts(); }
+
//build the pGroups in non leaf nodes to be used in the parsimony calcs.
for (int i = numLeaves; i < numNodes; i++) {
tree[i].pGroups = (mergeGroups(i));
//copy pcount
tree[i].pcount = copy->tree[i].pcount;
}
+
}
catch(exception& e) {
errorOut(e, "Tree", "getCopy");
int findRoot(); //return index of root node
//this function takes the leaf info and populates the non leaf nodes
- void assembleTree();
+ void assembleTree();
vector<Node> tree; //the first n nodes are the leaves, where n is the number of sequences.
+
private:
GlobalData* globaldata;
int numNodes, numLeaves;
map<string, int>::iterator it, it2;
map<string, int> mergeGroups(int); //returns a map with a groupname and the number of times that group was seen in the children
-
map<string,int> mergeGcounts(int);
+
+ void addNamesToCounts();
void randomTopology();
void randomBlengths();
void randomLabels(vector<string>);
//not included in the tree.
//only takes names from the first tree in the tree file and assumes that all trees use the same names.
int readTreeString(ifstream&);
+
};
#endif