]> git.donarmstrong.com Git - mothur.git/blob - treemap.cpp
changed random forest output filename
[mothur.git] / treemap.cpp
1 /*
2  *  treemap.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 1/26/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "treemap.h"
11
12 /************************************************************/
13
14  TreeMap::TreeMap(string filename) {
15         m = MothurOut::getInstance();
16     ofstream out2;
17     m->openOutputFileAppend(filename, out2);
18     out2 << endl; out2.close();
19         groupFileName = filename;
20         m->openInputFile(filename, fileHandle);
21 }
22
23 /************************************************************/
24  TreeMap::~TreeMap(){}
25 /************************************************************/
26 int TreeMap::readMap(string gf) {
27     try {
28         ofstream out2;
29         m->openOutputFileAppend(gf, out2);
30         out2 << endl; out2.close();
31         
32         groupFileName = gf;
33         m->openInputFile(gf, fileHandle);
34         
35         string seqName, seqGroup;
36         int error = 0;
37
38         string rest = "";
39         char buffer[4096];
40         bool pairDone = false;
41         bool columnOne = true;
42         
43         while (!fileHandle.eof()) {
44             if (m->control_pressed) { fileHandle.close();  return 1; }
45             
46             fileHandle.read(buffer, 4096);
47             vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
48             
49             for (int i = 0; i < pieces.size(); i++) {
50                 if (columnOne) {  seqName = pieces[i]; columnOne=false; }
51                 else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
52                 
53                 if (pairDone) { 
54                     setNamesOfGroups(seqGroup);
55                     
56                     map<string, GroupIndex>::iterator itCheck = treemap.find(seqName);
57                     if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
58                     else {
59                         namesOfSeqs.push_back(seqName);
60                         treemap[seqName].groupname = seqGroup;  //store data in map
61                         
62                         it2 = seqsPerGroup.find(seqGroup);
63                         if (it2 == seqsPerGroup.end()) { //if it's a new group
64                             seqsPerGroup[seqGroup] = 1;
65                         }else {//it's a group we already have
66                             seqsPerGroup[seqGroup]++;
67                         }                               
68                     }
69                     pairDone = false; 
70                 } 
71             }
72         }
73         fileHandle.close();
74         
75         if (rest != "") {
76             vector<string> pieces = m->splitWhiteSpace(rest);
77             
78             for (int i = 0; i < pieces.size(); i++) {
79                 if (columnOne) {  seqName = pieces[i]; columnOne=false; }
80                 else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
81                 
82                 if (pairDone) { 
83                     setNamesOfGroups(seqGroup);
84                     
85                     map<string, GroupIndex>::iterator itCheck = treemap.find(seqName);
86                     if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
87                     else {
88                         namesOfSeqs.push_back(seqName);
89                         treemap[seqName].groupname = seqGroup;  //store data in map
90                         
91                         it2 = seqsPerGroup.find(seqGroup);
92                         if (it2 == seqsPerGroup.end()) { //if it's a new group
93                             seqsPerGroup[seqGroup] = 1;
94                         }else {//it's a group we already have
95                             seqsPerGroup[seqGroup]++;
96                         }                               
97                     }
98                     pairDone = false; 
99                 } 
100             }
101         }
102         
103         return error;
104     }
105         catch(exception& e) {
106                 m->errorOut(e, "TreeMap", "readMap");
107                 exit(1);
108         }
109 }
110
111 /************************************************************/
112 int TreeMap::readMap() {
113     try {
114         string seqName, seqGroup;
115         int error = 0;
116         
117         string rest = "";
118         char buffer[4096];
119         bool pairDone = false;
120         bool columnOne = true;
121         
122         while (!fileHandle.eof()) {
123             if (m->control_pressed) { fileHandle.close();  return 1; }
124             
125             fileHandle.read(buffer, 4096);
126             vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
127             
128             for (int i = 0; i < pieces.size(); i++) {
129                 if (columnOne) {  seqName = pieces[i]; columnOne=false; }
130                 else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
131                 
132                 if (pairDone) { 
133                     setNamesOfGroups(seqGroup);
134                     
135                     map<string, GroupIndex>::iterator itCheck = treemap.find(seqName);
136                     if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
137                     else {
138                         namesOfSeqs.push_back(seqName);
139                         treemap[seqName].groupname = seqGroup;  //store data in map
140                         
141                         it2 = seqsPerGroup.find(seqGroup);
142                         if (it2 == seqsPerGroup.end()) { //if it's a new group
143                             seqsPerGroup[seqGroup] = 1;
144                         }else {//it's a group we already have
145                             seqsPerGroup[seqGroup]++;
146                         }                               
147                     }
148                     pairDone = false; 
149                 } 
150             }
151         }
152         fileHandle.close();
153         
154         if (rest != "") {
155             vector<string> pieces = m->splitWhiteSpace(rest);
156             
157             for (int i = 0; i < pieces.size(); i++) {
158                 if (columnOne) {  seqName = pieces[i]; columnOne=false; }
159                 else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
160                 
161                 if (pairDone) { 
162                     setNamesOfGroups(seqGroup);
163                     
164                     map<string, GroupIndex>::iterator itCheck = treemap.find(seqName);
165                     if (itCheck != treemap.end()) { error = 1; m->mothurOut("[WARNING]: Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
166                     else {
167                         namesOfSeqs.push_back(seqName);
168                         treemap[seqName].groupname = seqGroup;  //store data in map
169                         
170                         it2 = seqsPerGroup.find(seqGroup);
171                         if (it2 == seqsPerGroup.end()) { //if it's a new group
172                             seqsPerGroup[seqGroup] = 1;
173                         }else {//it's a group we already have
174                             seqsPerGroup[seqGroup]++;
175                         }                               
176                     }
177                     pairDone = false; 
178                 } 
179             }
180         }
181         
182         return error;
183     }
184         catch(exception& e) {
185                 m->errorOut(e, "TreeMap", "readMap");
186                 exit(1);
187         }
188 }
189 /************************************************************/
190 void TreeMap::addSeq(string seqName, string seqGroup) {
191         
192                 namesOfSeqs.push_back(seqName);
193                 setNamesOfGroups(seqGroup);
194                                         
195                 treemap[seqName].groupname = seqGroup;  //store data in map
196                         
197                 it2 = seqsPerGroup.find(seqGroup);
198                 if (it2 == seqsPerGroup.end()) { //if it's a new group
199                         seqsPerGroup[seqGroup] = 1;
200                 }else {//it's a group we already have
201                         seqsPerGroup[seqGroup]++;
202                 }
203 }
204 /************************************************************/
205 void TreeMap::removeSeq(string seqName) {
206         
207         //erase name from namesOfSeqs
208         for (int i = 0; i < namesOfSeqs.size(); i++) {
209                 if (namesOfSeqs[i] == seqName)  {
210                         namesOfSeqs.erase(namesOfSeqs.begin()+i);
211                         break;
212                 }
213         }
214         
215         //decrement sequences in this group
216         string group = treemap[seqName].groupname;
217         seqsPerGroup[group]--;
218         
219         //remove seq from treemap
220         it = treemap.find(seqName);
221         treemap.erase(it);
222 }
223 /************************************************************/
224
225 int TreeMap::getNumGroups() {
226                         
227         return namesOfGroups.size();    
228                 
229 }
230 /************************************************************/
231
232 int TreeMap::getNumSeqs() {
233                         
234         return namesOfSeqs.size();      
235                 
236 }
237
238 /************************************************************/
239
240 string TreeMap::getGroup(string sequenceName) {
241                         
242         it = treemap.find(sequenceName);
243         if (it != treemap.end()) { //sequence name was in group file
244                 return it->second.groupname;    
245         }else {
246                 return "not found";
247         }
248                 
249 }
250 /************************************************************/
251
252 void TreeMap::setNamesOfGroups(string seqGroup) {
253                         int i, count;
254                         count = 0;
255                         for (i=0; i<namesOfGroups.size(); i++) {
256                                 if (namesOfGroups[i] != seqGroup) {
257                                         count++; //you have not found this group
258                                 }else {
259                                         break; //you already have it
260                                 }
261                         }
262                         if (count == namesOfGroups.size()) {
263                                 namesOfGroups.push_back(seqGroup); //new group
264                         }
265 }
266 /************************************************************/
267 bool TreeMap::isValidGroup(string groupname) {
268         try {
269                 for (int i = 0; i < namesOfGroups.size(); i++) {
270                         if (groupname == namesOfGroups[i]) { return true; }
271                 }
272                 
273                 return false;
274         }
275         catch(exception& e) {
276                 m->errorOut(e, "TreeMap", "isValidGroup");
277                 exit(1);
278         }
279 }
280 /***********************************************************************/
281
282 void TreeMap::print(ostream& output){
283         try {
284                 
285                 for(it = treemap.begin(); it != treemap.end(); it++){
286                         output << it->first << '\t' << it->second.groupname << '\t' << it->second.vectorIndex << endl;
287                 }
288         }
289         catch(exception& e) {
290                 m->errorOut(e, "TreeMap", "print");
291                 exit(1);
292         }
293 }
294
295 /************************************************************/
296 void TreeMap::makeSim(vector<string> ThisnamesOfGroups) {
297         try {
298                 //set names of groups
299                 namesOfGroups = ThisnamesOfGroups;
300                 
301                 //set names of seqs to names of groups
302                 namesOfSeqs = ThisnamesOfGroups;
303                 
304                 // make map where key and value are both the group name since that what the tree.shared command wants
305                 for (int i = 0; i < namesOfGroups.size(); i++) {
306                         treemap[namesOfGroups[i]].groupname = namesOfGroups[i];
307                         seqsPerGroup[namesOfGroups[i]] = 1;
308                 }
309                 
310                 numGroups = namesOfGroups.size();
311                 
312         }
313         catch(exception& e) {
314                 m->errorOut(e, "TreeMap", "makeSim");
315                 exit(1);
316         }
317 }
318 /************************************************************/
319 void TreeMap::makeSim(ListVector* list) {
320         try {
321                 //set names of groups
322                 namesOfGroups.clear();
323                 for(int i = 0; i < list->size(); i++) {
324                         namesOfGroups.push_back(list->get(i));
325                 }
326                 
327                 //set names of seqs to names of groups
328                 namesOfSeqs = namesOfGroups;
329                 
330                 // make map where key and value are both the group name since that what the tree.shared command wants
331                 for (int i = 0; i < namesOfGroups.size(); i++) {
332                         treemap[namesOfGroups[i]].groupname = namesOfGroups[i];
333                         seqsPerGroup[namesOfGroups[i]] = 1;
334                 }
335                 
336                 numGroups = namesOfGroups.size();
337                 
338         }
339         catch(exception& e) {
340                 m->errorOut(e, "TreeMap", "makeSim");
341                 exit(1);
342         }
343 }
344 /************************************************************/
345 int TreeMap::getCopy(TreeMap& copy){
346         try {
347          
348         namesOfGroups = copy.getNamesOfGroups();
349                 numGroups = copy.getNumGroups();
350         namesOfSeqs = copy.namesOfSeqs;
351         seqsPerGroup = copy.seqsPerGroup;
352         treemap = copy.treemap;
353         
354         return 0;
355         }
356         catch(exception& e) {
357                 m->errorOut(e, "TreeMap", "getCopy");
358                 exit(1);
359         }
360 }
361 /************************************************************/
362 vector<string> TreeMap::getNamesSeqs(){
363         try {
364         
365                 vector<string> names;
366                 
367         for(it = treemap.begin(); it != treemap.end(); it++){
368             names.push_back(it->first);
369                 }
370                 
371                 return names;
372         }
373         catch(exception& e) {
374                 m->errorOut(e, "TreeMap", "getNamesSeqs");
375                 exit(1);
376         }
377 }
378 /************************************************************/
379 vector<string> TreeMap::getNamesSeqs(vector<string> picked){
380         try {
381                 
382                 vector<string> names;
383                 
384                 for(it = treemap.begin(); it != treemap.end(); it++){
385                         //if you are belong to one the the groups in the picked vector add you
386                         if (m->inUsersGroups(it->second.groupname, picked)) {
387                                 names.push_back(it->first);
388                         }
389                 }
390                 
391                 return names;
392         }
393         catch(exception& e) {
394                 m->errorOut(e, "TreeMap", "getNamesSeqs");
395                 exit(1);
396         }
397 }
398
399 /************************************************************/
400