]> git.donarmstrong.com Git - mothur.git/blob - treereader.cpp
changed random forest output filename
[mothur.git] / treereader.cpp
1 //
2 //  treereader.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 4/11/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "treereader.h"
10 #include "readtree.h"
11 #include "groupmap.h"
12
13 /***********************************************************************/
14 TreeReader::TreeReader(string tf, string cf) : treefile(tf), countfile(cf)  { 
15     try {
16         m = MothurOut::getInstance();
17         ct = new CountTable();
18         ct->readTable(cf, true);
19         
20         //if no groupinfo in count file we need to add it
21         if (!ct->hasGroupInfo()) {
22             ct->addGroup("Group1");
23             vector<string> namesOfSeqs = ct->getNamesOfSeqs();
24             for (int i = 0; i < namesOfSeqs.size(); i++) { 
25                 ct->setAbund(namesOfSeqs[i], "Group1", ct->getNumSeqs(namesOfSeqs[i]));
26             }
27         }
28         namefile = "";
29         groupfile = "";
30         readTrees();
31     }
32         catch(exception& e) {
33                 m->errorOut(e, "TreeReader", "TreeReader");
34                 exit(1);
35         }
36 }
37 /***********************************************************************/
38 TreeReader::TreeReader(string tf, string gf, string nf) : treefile(tf),  groupfile(gf), namefile(nf)  { 
39     try {
40         m = MothurOut::getInstance();
41         countfile = "";
42         ct = new CountTable();
43         if (namefile != "") { ct->createTable(namefile, groupfile, true); }
44         else {
45             Tree* tree = new Tree(treefile); delete tree;  //extracts names from tree to make faked out groupmap
46             set<string> nameMap;
47             map<string, string> groupMap;
48             set<string> gps;
49             for (int i = 0; i < m->Treenames.size(); i++) { nameMap.insert(m->Treenames[i]);  }
50             if (groupfile == "") { gps.insert("Group1"); for (int i = 0; i < m->Treenames.size(); i++) { groupMap[m->Treenames[i]] = "Group1"; } }
51             else {
52                 GroupMap g(groupfile); 
53                 g.readMap();
54                 vector<string> seqs = g.getNamesSeqs();
55                 for (int i = 0; i < seqs.size(); i++) {  
56                     string group = g.getGroup(seqs[i]);
57                     groupMap[seqs[i]] = group;
58                     gps.insert(group);
59                 }
60             }
61             ct->createTable(nameMap, groupMap, gps);
62         }
63
64         readTrees();
65     }
66         catch(exception& e) {
67                 m->errorOut(e, "TreeReader", "TreeReader");
68                 exit(1);
69         }
70 }
71 /***********************************************************************/
72 bool TreeReader::readTrees()  { 
73     try {
74         
75         int numUniquesInName = ct->getNumUniqueSeqs();
76                 //if (namefile != "") { numUniquesInName = readNamesFile(); }
77                 
78                 ReadTree* read = new ReadNewickTree(treefile);
79                 int readOk = read->read(ct); 
80                 
81                 if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine();  delete read; m->control_pressed=true; return 0; }
82                 
83                 read->AssembleTrees();
84                 trees = read->getTrees();
85                 delete read;
86         
87                 //make sure all files match
88                 //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size.
89                 int numNamesInTree;
90                 if (namefile != "")  {  
91                         if (numUniquesInName == m->Treenames.size()) {  numNamesInTree = ct->getNumSeqs();  }
92                         else {   numNamesInTree = m->Treenames.size();  }
93                 }else {  numNamesInTree = m->Treenames.size();  }
94                 
95                 
96                 //output any names that are in group file but not in tree
97                 if (numNamesInTree < ct->getNumSeqs()) {
98             vector<string> namesSeqsCt = ct->getNamesOfSeqs();
99                         for (int i = 0; i < namesSeqsCt.size(); i++) {
100                                 //is that name in the tree?
101                                 int count = 0;
102                                 for (int j = 0; j < m->Treenames.size(); j++) {
103                                         if (namesSeqsCt[i] == m->Treenames[j]) { break; } //found it
104                                         count++;
105                                 }
106                                 
107                                 if (m->control_pressed) { for (int i = 0; i < trees.size(); i++) { delete trees[i]; } return 0; }
108                                 
109                                 //then you did not find it so report it 
110                                 if (count == m->Treenames.size()) { 
111                     m->mothurOut(namesSeqsCt[i] + " is in your name or group file and not in your tree. It will be disregarded."); m->mothurOutEndLine();
112                     ct->remove(namesSeqsCt[i]);
113                                 }
114                         }
115                 }
116         
117         return true;
118     }
119         catch(exception& e) {
120                 m->errorOut(e, "TreeReader", "readTrees");
121                 exit(1);
122         }
123 }
124 /***********************************************************************/
125
126