]> git.donarmstrong.com Git - mothur.git/blob - doTaxonomy.cpp
06337b446bf0d79ffad7b29f4fb904c99f54058e
[mothur.git] / doTaxonomy.cpp
1 /*
2  *  doTaxonomy.cpp
3  *  
4  *
5  *  Created by Pat Schloss on 6/17/09.
6  *  Copyright 2009 Patrick D. Schloss. All rights reserved.
7  *
8  */
9
10 #include "doTaxonomy.h"
11
12 /**************************************************************************************************/
13
14 PhyloTree::PhyloTree(){
15         
16         numNodes = 1;
17         numSeqs = 0;
18         tree.push_back(TaxNode("Root"));
19         tree[0].heirarchyID = "0";
20 }
21
22 /**************************************************************************************************/
23
24 string PhyloTree::getNextTaxon(string& heirarchy){
25         
26         string currentLevel = "";
27         if(heirarchy != ""){
28                 currentLevel=heirarchy.substr(0,heirarchy.find_first_of(';'));
29                 heirarchy=heirarchy.substr(heirarchy.find_first_of(';')+1);
30         }
31         return currentLevel;
32         
33 }
34
35 /**************************************************************************************************/
36
37 void PhyloTree::addSeqToTree(string seqName, string seqTaxonomy){
38
39         numSeqs++;
40
41         map<string, int>::iterator childPointer;
42
43         int currentNode = 0;
44         int level = 1;
45         
46         tree[0].accessions.push_back(seqName);
47         string taxon;// = getNextTaxon(seqTaxonomy);
48
49         while(seqTaxonomy != ""){
50
51                 level++;
52
53 //somehow the parent is getting one too many accnos
54 //use print to reassign the taxa id
55                 taxon = getNextTaxon(seqTaxonomy);
56
57                 childPointer = tree[currentNode].children.find(taxon);
58
59                 if(childPointer != tree[currentNode].children.end()){   //if the node already exists, move on
60                         currentNode = childPointer->second;
61                         tree[currentNode].accessions.push_back(seqName);
62                         name2Taxonomy[seqName] = currentNode;
63                 }
64                 else{                                                                                   //otherwise, create it
65                         tree.push_back(TaxNode(taxon));
66                         numNodes++;
67                         tree[currentNode].children[taxon] = numNodes-1;
68                         tree[numNodes-1].parent = currentNode;
69
70 //                      int numChildren = tree[currentNode].children.size();
71 //                      string heirarchyID = tree[currentNode].heirarchyID;
72 //                      tree[currentNode].accessions.push_back(seqName);
73                         
74                         currentNode = tree[currentNode].children[taxon];
75                         tree[currentNode].accessions.push_back(seqName);
76                         name2Taxonomy[seqName] = currentNode;
77 //                      tree[currentNode].level = level;
78 //                      tree[currentNode].childNumber = numChildren;
79 //                      tree[currentNode].heirarchyID = heirarchyID + '.' + toString(tree[currentNode].childNumber);
80                 }
81                 
82                 if (seqTaxonomy == "") {   uniqueTaxonomies[currentNode] = currentNode; }
83         }
84 }
85 /**************************************************************************************************/
86 vector<int> PhyloTree::getGenusNodes()  {
87         try {
88                 genusIndex.clear();
89                 //generate genusIndexes
90                 map<int, int>::iterator it2;
91                 for (it2=uniqueTaxonomies.begin(); it2!=uniqueTaxonomies.end(); it2++) {  genusIndex.push_back(it2->first);     }
92                 
93                 return genusIndex;
94         }
95         catch(exception& e) {
96                 errorOut(e, "PhyloTree", "getGenusNodes");
97                 exit(1);
98         }
99 }
100 /**************************************************************************************************/
101
102 void PhyloTree::assignHeirarchyIDs(int index){
103         
104         map<string,int>::iterator it;
105         int counter = 1;
106         
107         for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
108                 tree[it->second].heirarchyID = tree[index].heirarchyID + '.' + toString(counter);
109                 counter++;
110                 tree[it->second].level = tree[index].level + 1;
111                 assignHeirarchyIDs(it->second);
112         }
113 }
114
115 /**************************************************************************************************/
116
117 void PhyloTree::print(ofstream& out){
118         
119         out << tree[0].level << '\t'<< tree[0].heirarchyID << '\t' << tree[0].name << '\t' << tree[0].children.size() << '\t' << tree[0].accessions.size() << endl;
120         print(0, out);
121
122         
123 }
124
125 /**************************************************************************************************/
126
127 void PhyloTree::print(int i, ofstream& out){
128         
129         map<string,int>::iterator it;
130         for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
131                 out <<tree[it->second].level << '\t' << tree[it->second].heirarchyID << '\t' << tree[it->second].name << '\t' << tree[it->second].children.size() << '\t' << tree[it->second].accessions.size() << endl;
132                 print(it->second, out);
133         }
134
135 }
136
137 /**************************************************************************************************/
138
139
140