]> git.donarmstrong.com Git - mothur.git/blob - phylosummary.cpp
added name parameter to phylotype command
[mothur.git] / phylosummary.cpp
1 /*
2  *  rawTrainingDataMaker.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/21/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "phylosummary.h"
11
12 /**************************************************************************************************/
13
14 PhyloSummary::PhyloSummary(string refTfile, string groupFile){
15         try {
16                 m = MothurOut::getInstance();
17                 maxLevel = 0;
18                 
19                 if (groupFile != "") {
20                         groupmap = new GroupMap(groupFile);
21                         groupmap->readMap();
22                 }else{
23                         groupmap = NULL;
24                 }
25                                 
26                 //check for necessary files
27                 string taxFileNameTest = refTfile.substr(0,refTfile.find_last_of(".")+1) + "tree.sum";
28                 ifstream FileTest(taxFileNameTest.c_str());
29                 
30                 if (!FileTest) { 
31                         m->mothurOut("Error: can't find " + taxFileNameTest + "."); m->mothurOutEndLine(); exit(1);
32                 }else{
33                         readTreeStruct(FileTest);
34                 }
35                 
36                 tree[0].rank = "0";
37                 assignRank(0);
38
39         }
40         catch(exception& e) {
41                 m->errorOut(e, "PhyloSummary", "PhyloSummary");
42                 exit(1);
43         }
44 }
45 /**************************************************************************************************/
46
47 void PhyloSummary::summarize(string userTfile){
48         try {
49                 
50                 ifstream in;
51                 openInputFile(userTfile, in);
52                 
53                 //read in users taxonomy file and add sequences to tree
54                 string name, tax;
55                 while(!in.eof()){
56                         in >> name >> tax; gobble(in);
57                         
58                         addSeqToTree(name, tax);
59                         
60                         if (m->control_pressed) { break;  }
61                 }
62                 in.close();
63         }
64         catch(exception& e) {
65                 m->errorOut(e, "PhyloSummary", "summarize");
66                 exit(1);
67         }
68 }
69
70 /**************************************************************************************************/
71
72 string PhyloSummary::getNextTaxon(string& heirarchy){
73         try {
74                 string currentLevel = "";
75                 if(heirarchy != ""){
76                         int pos = heirarchy.find_first_of(';');
77                         currentLevel=heirarchy.substr(0,pos);
78                         if (pos != (heirarchy.length()-1)) {  heirarchy=heirarchy.substr(pos+1);  }
79                         else { heirarchy = ""; }
80                 }
81                 return currentLevel;
82         }
83         catch(exception& e) {
84                 m->errorOut(e, "PhyloSummary", "getNextTaxon");
85                 exit(1);
86         }
87 }
88
89 /**************************************************************************************************/
90
91 int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
92         try {
93                 numSeqs++;
94                 
95                 map<string, int>::iterator childPointer;
96                 
97                 int currentNode = 0;
98                 string taxon;
99                 
100                 int level = 0;
101                 
102                 while (seqTaxonomy != "") {
103                         
104                         if (m->control_pressed) { return 0; }
105                         
106                         //somehow the parent is getting one too many accnos
107                         //use print to reassign the taxa id
108                         taxon = getNextTaxon(seqTaxonomy);
109                         
110                         childPointer = tree[currentNode].children.find(taxon);
111                         
112                         if(childPointer != tree[currentNode].children.end()){   //if the node already exists, update count and move on
113                                 if (groupmap != NULL) {
114                                         //find out the sequences group
115                                         string group = groupmap->getGroup(seqName);
116                                         
117                                         //do you have a count for this group?
118                                         map<string, int>::iterator itGroup = tree[currentNode].groupCount.find(group);
119                                         
120                                         //if yes, increment it - there should not be a case where we can't find it since we load group in read
121                                         if (itGroup != tree[currentNode].groupCount.end()) {
122                                                 tree[currentNode].groupCount[group]++;
123                                         }
124                                 }
125                                 
126                                 tree[currentNode].total++;
127
128                                 currentNode = childPointer->second;
129                         }else{  //otherwise, error
130                                 m->mothurOut("Warning: cannot find taxon " + taxon + " in reference taxonomy tree at level " + toString(tree[currentNode].level) + " for " + seqName + ". This may cause totals of daughter levels not to add up in summary file."); m->mothurOutEndLine();
131                                 seqTaxonomy = "";
132                         }
133                         
134                         level++;
135                         
136                         if ((seqTaxonomy == "") && (level < maxLevel)) {  //if you think you are done and you are not.
137                                 for (int k = level; k < maxLevel; k++) {  seqTaxonomy += "unclassified;";   }
138                         }
139                 }
140
141         }
142         catch(exception& e) {
143                 m->errorOut(e, "PhyloSummary", "addSeqToTree");
144                 exit(1);
145         }
146 }
147 /**************************************************************************************************/
148
149 void PhyloSummary::assignRank(int index){
150         try {
151                 map<string,int>::iterator it;
152                 int counter = 1;
153                 
154                 for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
155                         tree[it->second].rank = tree[index].rank + '.' + toString(counter);
156                         counter++;
157                                                                         
158                         assignRank(it->second);
159                 }
160         }
161         catch(exception& e) {
162                 m->errorOut(e, "PhyloSummary", "assignRank");
163                 exit(1);
164         }
165 }
166 /**************************************************************************************************/
167
168 void PhyloSummary::print(ofstream& out){
169         try {
170                 //print labels
171                 out << "taxlevel\t rank ID\t label\t daughterlevels\t total\t";
172                 if (groupmap != NULL) {
173                         for (int i = 0; i < groupmap->namesOfGroups.size(); i++) {
174                                 out << groupmap->namesOfGroups[i] << '\t';
175                         }
176                 }
177                 
178                 out << endl;
179                 
180                 //print root
181                 out << tree[0].level << "\t" << tree[0].rank << "\t" << tree[0].name << "\t" << tree[0].children.size() << "\t" << tree[0].total << "\t";
182                 
183                 map<string, int>::iterator itGroup;
184                 if (groupmap != NULL) {
185                         for (itGroup = tree[0].groupCount.begin(); itGroup != tree[0].groupCount.end(); itGroup++) {
186                                 out << itGroup->second << '\t';
187                         }
188                 }
189                 out << endl;
190                 
191                 //print rest
192                 print(0, out);
193                 
194         }
195         catch(exception& e) {
196                 m->errorOut(e, "PhyloSummary", "print");
197                 exit(1);
198         }
199 }
200
201 /**************************************************************************************************/
202
203 void PhyloSummary::print(int i, ofstream& out){
204         try {
205                 map<string,int>::iterator it;
206                 for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
207                         
208                         if (tree[it->second].total != 0)  {
209                                 out << tree[it->second].level << "\t" << tree[it->second].rank << "\t" << tree[it->second].name << "\t" << tree[it->second].children.size() << "\t" << tree[it->second].total << "\t";
210                                 
211                                 map<string, int>::iterator itGroup;
212                                 if (groupmap != NULL) {
213                                         for (itGroup = tree[it->second].groupCount.begin(); itGroup != tree[it->second].groupCount.end(); itGroup++) {
214                                                 out << itGroup->second << '\t';
215                                         }
216                                 }
217                                 out << endl;
218                         }
219                         
220                         print(it->second, out);
221                 }
222         }
223         catch(exception& e) {
224                 m->errorOut(e, "PhyloSummary", "print");
225                 exit(1);
226         }
227 }
228 /**************************************************************************************************/
229 void PhyloSummary::readTreeStruct(ifstream& in){
230         try {
231                 int num;
232                 
233                 in >> num; gobble(in);
234                 
235                 tree.resize(num);
236         
237                 //read the tree file
238                 for (int i = 0; i < tree.size(); i++) {
239         
240                         in >> tree[i].level >> tree[i].name >> num; //num contains the number of children tree[i] has
241                         
242                         //set children
243                         string childName;
244                         int childIndex;
245                         for (int j = 0; j < num; j++) {
246                                 in >> childName >> childIndex;
247                                 tree[i].children[childName] = childIndex;
248                         }
249                         
250                         //initialize groupcounts
251                         if (groupmap != NULL) {
252                                 for (int j = 0; j < groupmap->namesOfGroups.size(); j++) {
253                                         tree[i].groupCount[groupmap->namesOfGroups[j]] = 0;
254                                 }
255                         }
256                         
257                         tree[i].total = 0;
258                         
259                         gobble(in);
260                         
261                         if (tree[i].level > maxLevel) {  maxLevel = tree[i].level;  }
262                 }
263
264         }
265         catch(exception& e) {
266                 m->errorOut(e, "PhyloSummary", "print");
267                 exit(1);
268         }
269 }
270
271 /**************************************************************************************************/
272
273
274