]> git.donarmstrong.com Git - mothur.git/blob - groupmap.cpp
added rename.seqs command.
[mothur.git] / groupmap.cpp
1 /*
2  *  groupmap.cpp
3  *  Dotur
4  *
5  *  Created by Sarah Westcott on 12/1/08.
6  *  Copyright 2008 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "groupmap.h"
11
12 /************************************************************/
13
14  GroupMap::GroupMap(string filename) {
15         m = MothurOut::getInstance();
16         groupFileName = filename;
17         m->openInputFile(filename, fileHandle);
18         index = 0;
19 }
20
21 /************************************************************/
22  GroupMap::~GroupMap(){}
23 /************************************************************/
24 int GroupMap::readMap() {
25     try {
26         string seqName, seqGroup;
27                 int error = 0;
28         string rest = "";
29         char buffer[4096];
30         bool pairDone = false;
31         bool columnOne = true;
32     
33         while (!fileHandle.eof()) {
34             if (m->control_pressed) { fileHandle.close();  return 1; }
35         
36             fileHandle.read(buffer, 4096);
37             vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
38         
39             for (int i = 0; i < pieces.size(); i++) {
40                 if (columnOne) {  seqName = pieces[i]; columnOne=false; }
41                 else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
42             
43                 if (pairDone) { 
44                     setNamesOfGroups(seqGroup);
45                     
46                     if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
47                     m->checkName(seqName);
48                     it = groupmap.find(seqName);
49                     
50                     if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
51                     else {
52                         groupmap[seqName] = seqGroup;   //store data in map
53                         seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
54                     }
55                     pairDone = false; 
56                 } 
57             }
58         }
59                 fileHandle.close();
60         
61         if (rest != "") {
62             vector<string> pieces = m->splitWhiteSpace(rest);
63             
64             for (int i = 0; i < pieces.size(); i++) {
65                 if (columnOne) {  seqName = pieces[i]; columnOne=false; }
66                 else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
67                 
68                 if (pairDone) { 
69                     setNamesOfGroups(seqGroup);
70                     
71                     if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
72                     m->checkName(seqName);
73                     it = groupmap.find(seqName);
74                     
75                     if (it != groupmap.end()) { error = 1; m->mothurOut("Your groupfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
76                     else {
77                         groupmap[seqName] = seqGroup;   //store data in map
78                         seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
79                     }
80                     pairDone = false; 
81                 } 
82             }
83         }
84         
85                 m->setAllGroups(namesOfGroups);
86                 return error;
87     }
88         catch(exception& e) {
89                 m->errorOut(e, "GroupMap", "readMap");
90                 exit(1);
91         }
92 }
93 /************************************************************/
94 int GroupMap::readDesignMap() {
95     try {
96         string seqName, seqGroup;
97                 int error = 0;
98         string rest = "";
99         char buffer[4096];
100         bool pairDone = false;
101         bool columnOne = true;
102         
103         while (!fileHandle.eof()) {
104             if (m->control_pressed) { fileHandle.close();  return 1; }
105             
106             fileHandle.read(buffer, 4096);
107             vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
108             
109             for (int i = 0; i < pieces.size(); i++) {
110                 if (columnOne) {  seqName = pieces[i]; columnOne=false; }
111                 else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
112                 
113                 if (pairDone) { 
114                     setNamesOfGroups(seqGroup);
115                     
116                     if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
117                     m->checkName(seqName);
118                     it = groupmap.find(seqName);
119                     
120                     if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
121                     else {
122                         groupmap[seqName] = seqGroup;   //store data in map
123                         seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
124                     }
125                     pairDone = false; 
126                 } 
127             }
128         }
129                 fileHandle.close();
130         
131         if (rest != "") {
132             vector<string> pieces = m->splitWhiteSpace(rest);
133             
134             for (int i = 0; i < pieces.size(); i++) {
135                 if (columnOne) {  seqName = pieces[i]; columnOne=false; }
136                 else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
137                 
138                 if (pairDone) { 
139                     setNamesOfGroups(seqGroup);
140                     
141                     if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
142                     m->checkName(seqName);
143                     it = groupmap.find(seqName);
144                     
145                     if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
146                     else {
147                         groupmap[seqName] = seqGroup;   //store data in map
148                         seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
149                     }
150                     pairDone = false; 
151                 } 
152             }
153
154         }
155         
156                 m->setAllGroups(namesOfGroups);
157                 return error;
158     }
159         catch(exception& e) {
160                 m->errorOut(e, "GroupMap", "readDesignMap");
161                 exit(1);
162         }
163 }
164 /************************************************************/
165 int GroupMap::readMap(string filename) {
166     try {
167         groupFileName = filename;
168         m->openInputFile(filename, fileHandle);
169         index = 0;
170         string seqName, seqGroup;
171                 int error = 0;
172         string rest = "";
173         char buffer[4096];
174         bool pairDone = false;
175         bool columnOne = true;
176         
177         while (!fileHandle.eof()) {
178             if (m->control_pressed) { fileHandle.close();  return 1; }
179             
180             fileHandle.read(buffer, 4096);
181             vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
182             
183             for (int i = 0; i < pieces.size(); i++) {
184                 if (columnOne) {  seqName = pieces[i]; columnOne=false; }
185                 else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
186                 
187                 if (pairDone) { 
188                     setNamesOfGroups(seqGroup);
189                     
190                     if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
191                     m->checkName(seqName);
192                     it = groupmap.find(seqName);
193                     
194                     if (it != groupmap.end()) { error = 1; m->mothurOut("Your group file contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
195                     else {
196                         groupmap[seqName] = seqGroup;   //store data in map
197                         seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
198                     }
199                     pairDone = false; 
200                 } 
201             }
202         }
203                 fileHandle.close();
204         
205         if (rest != "") {
206             vector<string> pieces = m->splitWhiteSpace(rest);
207             
208             for (int i = 0; i < pieces.size(); i++) {
209                 if (columnOne) {  seqName = pieces[i]; columnOne=false; }
210                 else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
211                 
212                 if (pairDone) { 
213                     setNamesOfGroups(seqGroup);
214                     
215                     if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
216                     m->checkName(seqName);
217                     it = groupmap.find(seqName);
218                     
219                     if (it != groupmap.end()) { error = 1; m->mothurOut("Your group file contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
220                     else {
221                         groupmap[seqName] = seqGroup;   //store data in map
222                         seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
223                     }
224                     pairDone = false; 
225                 } 
226             }
227         }
228         
229                 m->setAllGroups(namesOfGroups);
230                 return error;
231     }
232         catch(exception& e) {
233                 m->errorOut(e, "GroupMap", "readMap");
234                 exit(1);
235         }
236 }
237 /************************************************************/
238 int GroupMap::readDesignMap(string filename) {
239     try {
240         groupFileName = filename;
241         m->openInputFile(filename, fileHandle);
242         index = 0;
243         string seqName, seqGroup;
244                 int error = 0;
245         string rest = "";
246         char buffer[4096];
247         bool pairDone = false;
248         bool columnOne = true;
249         
250         while (!fileHandle.eof()) {
251             if (m->control_pressed) { fileHandle.close();  return 1; }
252             
253             fileHandle.read(buffer, 4096);
254             vector<string> pieces = m->splitWhiteSpace(rest, buffer, fileHandle.gcount());
255             
256             for (int i = 0; i < pieces.size(); i++) {
257                 if (columnOne) {  seqName = pieces[i]; columnOne=false; }
258                 else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
259                 
260                 if (pairDone) { 
261                     setNamesOfGroups(seqGroup);
262                     
263                     if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
264                     m->checkName(seqName);
265                     it = groupmap.find(seqName);
266                     
267                     if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
268                     else {
269                         groupmap[seqName] = seqGroup;   //store data in map
270                         seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
271                     }
272                     pairDone = false; 
273                 } 
274             }
275         }
276                 fileHandle.close();
277         
278         if (rest != "") {
279             vector<string> pieces = m->splitWhiteSpace(rest);
280             
281             for (int i = 0; i < pieces.size(); i++) {
282                 if (columnOne) {  seqName = pieces[i]; columnOne=false; }
283                 else  { seqGroup = pieces[i]; pairDone = true; columnOne=true; }
284                 
285                 if (pairDone) { 
286                     setNamesOfGroups(seqGroup);
287                     
288                     if (m->debug) { m->mothurOut("[DEBUG]: name = '" + seqName + "', group = '" + seqGroup + "'\n"); }
289                     m->checkName(seqName);
290                     it = groupmap.find(seqName);
291                     
292                     if (it != groupmap.end()) { error = 1; m->mothurOut("Your designfile contains more than 1 sequence named " + seqName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
293                     else {
294                         groupmap[seqName] = seqGroup;   //store data in map
295                         seqsPerGroup[seqGroup]++;  //increment number of seqs in that group
296                     }
297                     pairDone = false; 
298                 } 
299             }
300         }
301         
302                 m->setAllGroups(namesOfGroups);
303                 return error;
304     }
305         catch(exception& e) {
306                 m->errorOut(e, "GroupMap", "readDesignMap");
307                 exit(1);
308         }
309 }
310 /************************************************************/
311 int GroupMap::getNumGroups() { return namesOfGroups.size();     }
312 /************************************************************/
313
314 string GroupMap::getGroup(string sequenceName) {
315                         
316         it = groupmap.find(sequenceName);
317         if (it != groupmap.end()) { //sequence name was in group file
318                 return it->second;      
319         }else {
320         //look for it in names of groups to see if the user accidently used the wrong file
321         if (m->inUsersGroups(sequenceName, namesOfGroups)) {
322             m->mothurOut("[WARNING]: Your group or design file contains a group named " + sequenceName + ".  Perhaps you are used a group file instead of a design file? A common cause of this is using a tree file that relates your groups (created by the tree.shared command) with a group file that assigns sequences to a group."); m->mothurOutEndLine(); 
323         }
324                 return "not found";
325         }
326 }
327
328 /************************************************************/
329
330 void GroupMap::setGroup(string sequenceName, string groupN) {
331         setNamesOfGroups(groupN);
332         m->checkName(sequenceName);
333         it = groupmap.find(sequenceName);
334         
335         if (it != groupmap.end()) {  m->mothurOut("Your groupfile contains more than 1 sequence named " + sequenceName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine();  }
336         else {
337                 groupmap[sequenceName] = groupN;        //store data in map
338                 seqsPerGroup[groupN]++;  //increment number of seqs in that group
339         }
340 }
341
342 /************************************************************/
343 void GroupMap::setNamesOfGroups(string seqGroup) {
344         int i, count;
345         count = 0;
346         for (i=0; i<namesOfGroups.size(); i++) {
347                 if (namesOfGroups[i] != seqGroup) {
348                         count++; //you have not found this group
349                 }else {
350                         break; //you already have it
351                 }
352         }
353         if (count == namesOfGroups.size()) {
354                 namesOfGroups.push_back(seqGroup); //new group
355                 seqsPerGroup[seqGroup] = 0;
356                 groupIndex[seqGroup] = index;
357                 index++;
358         }
359 }
360 /************************************************************/
361 bool GroupMap::isValidGroup(string groupname) {
362         try {
363                 for (int i = 0; i < namesOfGroups.size(); i++) {
364                         if (groupname == namesOfGroups[i]) { return true; }
365                 }
366                 
367                 return false;
368         }
369         catch(exception& e) {
370                 m->errorOut(e, "GroupMap", "isValidGroup");
371                 exit(1);
372         }
373 }
374 /************************************************************/
375 int GroupMap::getCopy(GroupMap* g) {
376         try {
377         vector<string> names = g->getNamesSeqs();
378         for (int i = 0; i < names.size(); i++) {
379             if (m->control_pressed) { break; }
380             string group = g->getGroup(names[i]);
381             setGroup(names[i], group);
382         }
383         return names.size();
384         }
385         catch(exception& e) {
386                 m->errorOut(e, "GroupMap", "getCopy");
387                 exit(1);
388         }
389 }
390 /************************************************************/
391 int GroupMap::getNumSeqs(string group) {
392         try {
393                 
394                 map<string, int>::iterator itNum;
395                 
396                 itNum = seqsPerGroup.find(group);
397                 
398                 if (itNum == seqsPerGroup.end()) { return 0; }
399                 
400                 return seqsPerGroup[group];
401                 
402         }
403         catch(exception& e) {
404                 m->errorOut(e, "GroupMap", "getNumSeqs");
405                 exit(1);
406         }
407 }
408 /************************************************************/
409 int GroupMap::renameSeq(string oldName, string newName) {
410         try {
411                 
412                 map<string, string>::iterator itName;
413                 
414                 itName = groupmap.find(oldName);
415                 
416                 if (itName == groupmap.end()) {
417             m->mothurOut("[ERROR]: cannot find " + toString(oldName) + " in group file");
418             m->control_pressed = true;
419             return 0;
420         }else {
421             string group = itName->second;
422             groupmap.erase(itName);
423             groupmap[newName] = group;
424         }
425         
426         return 0;
427                 
428         }
429         catch(exception& e) {
430                 m->errorOut(e, "GroupMap", "renameSeq");
431                 exit(1);
432         }
433 }
434 /************************************************************/
435 int GroupMap::print(ofstream& out) {
436         try {
437                 
438                 for (map<string, string>::iterator itName = groupmap.begin(); itName != groupmap.end(); itName++) {
439             out << itName->first << '\t' << itName->second << endl;
440         }
441              
442         return 0;
443                 
444         }
445         catch(exception& e) {
446                 m->errorOut(e, "GroupMap", "print");
447                 exit(1);
448         }
449 }
450 /************************************************************/
451 int GroupMap::print(ofstream& out, vector<string> userGroups) {
452         try {
453                 
454                 for (map<string, string>::iterator itName = groupmap.begin(); itName != groupmap.end(); itName++) {
455             if (m->inUsersGroups(itName->second, userGroups)) {
456                 out << itName->first << '\t' << itName->second << endl;
457             }
458         }
459         
460         return 0;
461                 
462         }
463         catch(exception& e) {
464                 m->errorOut(e, "GroupMap", "print");
465                 exit(1);
466         }
467 }
468 /************************************************************/
469 vector<string> GroupMap::getNamesSeqs(){
470         try {
471         
472                 vector<string> names;
473                 
474                 for (it = groupmap.begin(); it != groupmap.end(); it++) {
475                         names.push_back(it->first);
476                 }
477                 
478                 return names;
479         }
480         catch(exception& e) {
481                 m->errorOut(e, "GroupMap", "getNamesSeqs");
482                 exit(1);
483         }
484 }
485 /************************************************************/
486 vector<string> GroupMap::getNamesSeqs(vector<string> picked){
487         try {
488                 
489                 vector<string> names;
490                 
491                 for (it = groupmap.begin(); it != groupmap.end(); it++) {
492                         //if you are belong to one the the groups in the picked vector add you
493                         if (m->inUsersGroups(it->second, picked)) {
494                                 names.push_back(it->first);
495                         }
496                 }
497                 
498                 return names;
499         }
500         catch(exception& e) {
501                 m->errorOut(e, "GroupMap", "getNamesSeqs");
502                 exit(1);
503         }
504 }
505
506 /************************************************************/
507