]> git.donarmstrong.com Git - mothur.git/blob - phylosummary.cpp
started work on sffinfo command. fixed bug across all paralellized commands if the...
[mothur.git] / phylosummary.cpp
1 /*
2  *  rawTrainingDataMaker.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 4/21/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "phylosummary.h"
11
12 /**************************************************************************************************/
13
14 PhyloSummary::PhyloSummary(string refTfile, string groupFile){
15         try {
16                 m = MothurOut::getInstance();
17                 maxLevel = 0;
18                 
19                 if (groupFile != "") {
20                         groupmap = new GroupMap(groupFile);
21                         groupmap->readMap();
22                 }else{
23                         groupmap = NULL;
24                 }
25                                 
26                 //check for necessary files
27                 string taxFileNameTest = refTfile.substr(0,refTfile.find_last_of(".")+1) + "tree.sum";
28                 ifstream FileTest(taxFileNameTest.c_str());
29                 
30                 if (!FileTest) { 
31                         m->mothurOut("Error: can't find " + taxFileNameTest + "."); m->mothurOutEndLine(); exit(1);
32                 }else{
33                         readTreeStruct(FileTest);
34                 }
35                 
36                 tree[0].rank = "0";
37                 assignRank(0);
38
39         }
40         catch(exception& e) {
41                 m->errorOut(e, "PhyloSummary", "PhyloSummary");
42                 exit(1);
43         }
44 }
45 /**************************************************************************************************/
46
47 void PhyloSummary::summarize(string userTfile){
48         try {
49                 
50                 ifstream in;
51                 openInputFile(userTfile, in);
52                 
53                 //read in users taxonomy file and add sequences to tree
54                 string name, tax;
55                 while(!in.eof()){
56                         in >> name >> tax; gobble(in);
57                         
58                         addSeqToTree(name, tax);
59                         
60                         if (m->control_pressed) { break;  }
61                 }
62                 in.close();
63         }
64         catch(exception& e) {
65                 m->errorOut(e, "PhyloSummary", "summarize");
66                 exit(1);
67         }
68 }
69
70 /**************************************************************************************************/
71
72 string PhyloSummary::getNextTaxon(string& heirarchy){
73         try {
74                 string currentLevel = "";
75                 if(heirarchy != ""){
76                         int pos = heirarchy.find_first_of(';');
77                         currentLevel=heirarchy.substr(0,pos);
78                         if (pos != (heirarchy.length()-1)) {  heirarchy=heirarchy.substr(pos+1);  }
79                         else { heirarchy = ""; }
80                 }
81                 return currentLevel;
82         }
83         catch(exception& e) {
84                 m->errorOut(e, "PhyloSummary", "getNextTaxon");
85                 exit(1);
86         }
87 }
88
89 /**************************************************************************************************/
90
91 int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){
92         try {
93                 numSeqs++;
94                 
95                 map<string, int>::iterator childPointer;
96                 
97                 int currentNode = 0;
98                 string taxon;
99                 
100                 int level = 0;
101                 
102                 while (seqTaxonomy != "") {
103                         
104                         if (m->control_pressed) { return 0; }
105                         
106                         //somehow the parent is getting one too many accnos
107                         //use print to reassign the taxa id
108                         taxon = getNextTaxon(seqTaxonomy);
109                         
110                         childPointer = tree[currentNode].children.find(taxon);
111                         
112                         if(childPointer != tree[currentNode].children.end()){   //if the node already exists, update count and move on
113                                 if (groupmap != NULL) {
114                                         //find out the sequences group
115                                         string group = groupmap->getGroup(seqName);
116                                         
117                                         if (group == "not found") {  m->mothurOut(seqName + " is not in your groupfile, and will be included in the overall total, but not any group total."); m->mothurOutEndLine();  }
118                                         
119                                         //do you have a count for this group?
120                                         map<string, int>::iterator itGroup = tree[currentNode].groupCount.find(group);
121                                         
122                                         //if yes, increment it - there should not be a case where we can't find it since we load group in read
123                                         if (itGroup != tree[currentNode].groupCount.end()) {
124                                                 tree[currentNode].groupCount[group]++;
125                                         }
126                                 }
127                                 
128                                 tree[currentNode].total++;
129
130                                 currentNode = childPointer->second;
131                         }else{  //otherwise, error
132                                 m->mothurOut("Warning: cannot find taxon " + taxon + " in reference taxonomy tree at level " + toString(tree[currentNode].level) + " for " + seqName + ". This may cause totals of daughter levels not to add up in summary file."); m->mothurOutEndLine();
133                                 break;
134                         }
135                         
136                         level++;
137                         
138                         if ((seqTaxonomy == "") && (level < maxLevel)) {  //if you think you are done and you are not.
139                                 for (int k = level; k < maxLevel; k++) {  seqTaxonomy += "unclassified;";   }
140                         }
141                 }
142
143         }
144         catch(exception& e) {
145                 m->errorOut(e, "PhyloSummary", "addSeqToTree");
146                 exit(1);
147         }
148 }
149 /**************************************************************************************************/
150
151 void PhyloSummary::assignRank(int index){
152         try {
153                 map<string,int>::iterator it;
154                 int counter = 1;
155                 
156                 for(it=tree[index].children.begin();it!=tree[index].children.end();it++){
157                         tree[it->second].rank = tree[index].rank + '.' + toString(counter);
158                         counter++;
159                                                                         
160                         assignRank(it->second);
161                 }
162         }
163         catch(exception& e) {
164                 m->errorOut(e, "PhyloSummary", "assignRank");
165                 exit(1);
166         }
167 }
168 /**************************************************************************************************/
169
170 void PhyloSummary::print(ofstream& out){
171         try {
172                 //print labels
173                 out << "taxlevel\t rankID\t taxon\t daughterlevels\t total\t";
174                 if (groupmap != NULL) {
175                         //so the labels match the counts below, since the map sorts them automatically...
176                         sort(groupmap->namesOfGroups.begin(), groupmap->namesOfGroups.end());
177                         
178                         for (int i = 0; i < groupmap->namesOfGroups.size(); i++) {
179                                 out << groupmap->namesOfGroups[i] << '\t';
180                         }
181                 }
182                 
183                 out << endl;
184                 
185                 int totalChildrenInTree = 0;
186                 
187                 map<string,int>::iterator it;
188                 for(it=tree[0].children.begin();it!=tree[0].children.end();it++){   
189                         if (tree[it->second].total != 0)  {   totalChildrenInTree++; }
190                 }
191                 
192                 //print root
193                 out << tree[0].level << "\t" << tree[0].rank << "\t" << tree[0].name << "\t" << totalChildrenInTree << "\t" << tree[0].total << "\t";
194                 
195                 map<string, int>::iterator itGroup;
196                 if (groupmap != NULL) {
197                         for (itGroup = tree[0].groupCount.begin(); itGroup != tree[0].groupCount.end(); itGroup++) {
198                                 out << itGroup->second << '\t';
199                         }
200                 }
201                 out << endl;
202                 
203                 //print rest
204                 print(0, out);
205                 
206         }
207         catch(exception& e) {
208                 m->errorOut(e, "PhyloSummary", "print");
209                 exit(1);
210         }
211 }
212
213 /**************************************************************************************************/
214
215 void PhyloSummary::print(int i, ofstream& out){
216         try {
217                 map<string,int>::iterator it;
218                 for(it=tree[i].children.begin();it!=tree[i].children.end();it++){
219                         
220                         if (tree[it->second].total != 0)  {
221                         
222                                 int totalChildrenInTree = 0;
223                 
224                                 map<string,int>::iterator it2;
225                                 for(it2=tree[it->second].children.begin();it2!=tree[it->second].children.end();it2++){   
226                                         if (tree[it2->second].total != 0)  {   totalChildrenInTree++; }
227                                 }
228                         
229                                 out << tree[it->second].level << "\t" << tree[it->second].rank << "\t" << tree[it->second].name << "\t" << totalChildrenInTree << "\t" << tree[it->second].total << "\t";
230                                 
231                                 map<string, int>::iterator itGroup;
232                                 if (groupmap != NULL) {
233                                         for (itGroup = tree[it->second].groupCount.begin(); itGroup != tree[it->second].groupCount.end(); itGroup++) {
234                                                 out << itGroup->second << '\t';
235                                         }
236                                 }
237                                 out << endl;
238                         }
239                         
240                         print(it->second, out);
241                 }
242         }
243         catch(exception& e) {
244                 m->errorOut(e, "PhyloSummary", "print");
245                 exit(1);
246         }
247 }
248 /**************************************************************************************************/
249 void PhyloSummary::readTreeStruct(ifstream& in){
250         try {
251                 int num;
252                 
253                 in >> num; gobble(in);
254                 
255                 tree.resize(num);
256         
257                 //read the tree file
258                 for (int i = 0; i < tree.size(); i++) {
259         
260                         in >> tree[i].level >> tree[i].name >> num; //num contains the number of children tree[i] has
261                         
262                         //set children
263                         string childName;
264                         int childIndex;
265                         for (int j = 0; j < num; j++) {
266                                 in >> childName >> childIndex;
267                                 tree[i].children[childName] = childIndex;
268                         }
269                         
270                         //initialize groupcounts
271                         if (groupmap != NULL) {
272                                 for (int j = 0; j < groupmap->namesOfGroups.size(); j++) {
273                                         tree[i].groupCount[groupmap->namesOfGroups[j]] = 0;
274                                 }
275                         }
276                         
277                         tree[i].total = 0;
278                         
279                         gobble(in);
280                         
281                         if (tree[i].level > maxLevel) {  maxLevel = tree[i].level;  }
282                 }
283
284         }
285         catch(exception& e) {
286                 m->errorOut(e, "PhyloSummary", "print");
287                 exit(1);
288         }
289 }
290
291 /**************************************************************************************************/
292
293
294