]> git.donarmstrong.com Git - mothur.git/blob - getgroupscommand.cpp
added load.logfile command. changed summary.single output for subsample=t.
[mothur.git] / getgroupscommand.cpp
1 /*
2  *  getgroupscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/10/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "getgroupscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "sharedutilities.h"
14 #include "inputdata.h"
15
16 //**********************************************************************************************************************
17 vector<string> GetGroupsCommand::setParameters(){       
18         try {
19                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(pfasta);
20                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pshared);
21                 CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname);
22                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pgroup);
23         CommandParameter pdesign("design", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pdesign);
24                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(plist);
25                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(ptaxonomy);
26                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
27                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
28                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
29                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
30                 
31                 vector<string> myArray;
32                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
33                 return myArray;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "GetGroupsCommand", "setParameters");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 string GetGroupsCommand::getHelpString(){       
42         try {
43                 string helpString = "";
44                 helpString += "The get.groups command selects sequences from a specfic group or set of groups from the following file types: fasta, name, group, list, taxonomy, design or shared file.\n";
45                 helpString += "It outputs a file containing the sequences in the those specified groups, or a sharedfile containing only those groups.\n";
46                 helpString += "The get.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared, design and groups. The group parameter is required, unless you have a current group file, or are using a shared file.\n";
47                 helpString += "You must also provide an accnos containing the list of groups to get or set the groups parameter to the groups you wish to select.\n";
48                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like.  You can separate group names with dashes.\n";
49                 helpString += "The get.groups command should be in the following format: get.groups(accnos=yourAccnos, fasta=yourFasta, group=yourGroupFile).\n";
50                 helpString += "Example get.groups(accnos=amazon.accnos, fasta=amazon.fasta, group=amazon.groups).\n";
51                 helpString += "or get.groups(groups=pasture, fasta=amazon.fasta, group=amazon.groups).\n";
52                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
53                 return helpString;
54         }
55         catch(exception& e) {
56                 m->errorOut(e, "GetGroupsCommand", "getHelpString");
57                 exit(1);
58         }
59 }
60 //**********************************************************************************************************************
61 string GetGroupsCommand::getOutputFileNameTag(string type, string inputName=""){        
62         try {
63         string outputFileName = "";
64                 map<string, vector<string> >::iterator it;
65         
66         //is this a type this command creates
67         it = outputTypes.find(type);
68         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
69         else {
70             if (type == "fasta")            {   outputFileName =  "pick" + m->getExtension(inputName);   }
71             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
72             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
73             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
74             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
75             else if (type == "shared")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
76             else if (type == "design")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
77             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
78         }
79         return outputFileName;
80         }
81         catch(exception& e) {
82                 m->errorOut(e, "GetGroupsCommand", "getOutputFileNameTag");
83                 exit(1);
84         }
85 }
86
87 //**********************************************************************************************************************
88 GetGroupsCommand::GetGroupsCommand(){   
89         try {
90                 abort = true; calledHelp = true;
91                 setParameters();
92                 vector<string> tempOutNames;
93                 outputTypes["fasta"] = tempOutNames;
94                 outputTypes["taxonomy"] = tempOutNames;
95                 outputTypes["name"] = tempOutNames;
96                 outputTypes["group"] = tempOutNames;
97                 outputTypes["list"] = tempOutNames;
98                 outputTypes["shared"] = tempOutNames;
99         outputTypes["design"] = tempOutNames;
100         }
101         catch(exception& e) {
102                 m->errorOut(e, "GetGroupsCommand", "GetGroupsCommand");
103                 exit(1);
104         }
105 }
106 //**********************************************************************************************************************
107 GetGroupsCommand::GetGroupsCommand(string option)  {
108         try {
109                 abort = false; calledHelp = false;   
110                 
111                 //allow user to run help
112                 if(option == "help") { help(); abort = true; calledHelp = true; }
113                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
114                 
115                 else {
116                         vector<string> myArray = setParameters();
117                         
118                         OptionParser parser(option);
119                         map<string,string> parameters = parser.getParameters();
120                         
121                         ValidParameters validParameter;
122                         map<string,string>::iterator it;
123                         
124                         //check to make sure all parameters are valid for command
125                         for (it = parameters.begin(); it != parameters.end(); it++) { 
126                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
127                         }
128                         
129                         //initialize outputTypes
130                         vector<string> tempOutNames;
131                         outputTypes["fasta"] = tempOutNames;
132                         outputTypes["taxonomy"] = tempOutNames;
133                         outputTypes["name"] = tempOutNames;
134                         outputTypes["group"] = tempOutNames;
135                         outputTypes["list"] = tempOutNames;
136                         outputTypes["shared"] = tempOutNames;
137             outputTypes["design"] = tempOutNames;
138                         
139                         
140                         //if the user changes the output directory command factory will send this info to us in the output parameter 
141                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
142                         
143                         //if the user changes the input directory command factory will send this info to us in the output parameter 
144                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
145                         if (inputDir == "not found"){   inputDir = "";          }
146                         else {
147                                 string path;
148                                 it = parameters.find("fasta");
149                                 //user has given a template file
150                                 if(it != parameters.end()){ 
151                                         path = m->hasPath(it->second);
152                                         //if the user has not given a path then, add inputdir. else leave path alone.
153                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
154                                 }
155                                 
156                                 it = parameters.find("accnos");
157                                 //user has given a template file
158                                 if(it != parameters.end()){ 
159                                         path = m->hasPath(it->second);
160                                         //if the user has not given a path then, add inputdir. else leave path alone.
161                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
162                                 }
163                                 
164                                 it = parameters.find("list");
165                                 //user has given a template file
166                                 if(it != parameters.end()){ 
167                                         path = m->hasPath(it->second);
168                                         //if the user has not given a path then, add inputdir. else leave path alone.
169                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
170                                 }
171                                 
172                                 it = parameters.find("name");
173                                 //user has given a template file
174                                 if(it != parameters.end()){ 
175                                         path = m->hasPath(it->second);
176                                         //if the user has not given a path then, add inputdir. else leave path alone.
177                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
178                                 }
179                                 
180                                 it = parameters.find("group");
181                                 //user has given a template file
182                                 if(it != parameters.end()){ 
183                                         path = m->hasPath(it->second);
184                                         //if the user has not given a path then, add inputdir. else leave path alone.
185                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
186                                 }
187                                 
188                                 it = parameters.find("taxonomy");
189                                 //user has given a template file
190                                 if(it != parameters.end()){ 
191                                         path = m->hasPath(it->second);
192                                         //if the user has not given a path then, add inputdir. else leave path alone.
193                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
194                                 }
195                                 
196                                 it = parameters.find("shared");
197                                 //user has given a template file
198                                 if(it != parameters.end()){ 
199                                         path = m->hasPath(it->second);
200                                         //if the user has not given a path then, add inputdir. else leave path alone.
201                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
202                                 }
203                 
204                 it = parameters.find("design");
205                                 //user has given a template file
206                                 if(it != parameters.end()){ 
207                                         path = m->hasPath(it->second);
208                                         //if the user has not given a path then, add inputdir. else leave path alone.
209                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
210                                 }
211                         }
212                         
213                         
214                         //check for required parameters
215                         accnosfile = validParameter.validFile(parameters, "accnos", true);
216                         if (accnosfile == "not open") { abort = true; }
217                         else if (accnosfile == "not found") {  accnosfile = ""; }
218                         else { m->setAccnosFile(accnosfile); }
219                         
220                         fastafile = validParameter.validFile(parameters, "fasta", true);
221                         if (fastafile == "not open") { fastafile = ""; abort = true; }
222                         else if (fastafile == "not found") {  fastafile = "";  }
223                         else { m->setFastaFile(fastafile); }
224                         
225                         namefile = validParameter.validFile(parameters, "name", true);
226                         if (namefile == "not open") { namefile = ""; abort = true; }
227                         else if (namefile == "not found") {  namefile = "";  }  
228                         else { m->setNameFile(namefile); }
229                         
230                         groupfile = validParameter.validFile(parameters, "group", true);
231                         if (groupfile == "not open") { groupfile = ""; abort = true; }
232                         else if (groupfile == "not found") {  groupfile = "";                   }
233                         else { m->setGroupFile(groupfile); }    
234                         
235                         listfile = validParameter.validFile(parameters, "list", true);
236                         if (listfile == "not open") { abort = true; }
237                         else if (listfile == "not found") {  listfile = "";  }
238                         else { m->setListFile(listfile); }
239                         
240                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
241                         if (taxfile == "not open") { taxfile = ""; abort = true; }
242                         else if (taxfile == "not found") {  taxfile = "";  }
243                         else { m->setTaxonomyFile(taxfile); }
244                         
245                         groups = validParameter.validFile(parameters, "groups", false);                 
246                         if (groups == "not found") { groups = ""; }
247                         else {
248                                 m->splitAtDash(groups, Groups);
249                                 m->setGroups(Groups);
250                         }
251                         
252                         sharedfile = validParameter.validFile(parameters, "shared", true);
253                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }
254                         else if (sharedfile == "not found") {  sharedfile = "";  }
255                         else { m->setSharedFile(sharedfile); }
256                         
257                         groupfile = validParameter.validFile(parameters, "group", true);
258                         if (groupfile == "not open") { groupfile = ""; abort = true; }
259                         else if (groupfile == "not found") {    groupfile = ""; }
260                         else { m->setGroupFile(groupfile); }
261             
262             designfile = validParameter.validFile(parameters, "design", true);
263                         if (designfile == "not open") { designfile = ""; abort = true; }
264                         else if (designfile == "not found") {   designfile = "";        }
265                         else { m->setDesignFile(designfile); }
266                         
267                         if ((sharedfile == "") && (groupfile == "") && (designfile == "")) { 
268                                 //is there are current file available for any of these?
269                                 if ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")) {
270                                         //give priority to group, then shared
271                                         groupfile = m->getGroupFile(); 
272                                         if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
273                                         else { 
274                                                 sharedfile = m->getSharedFile(); 
275                                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
276                                                 else { 
277                                                         m->mothurOut("You have no current groupfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
278                                                 }
279                                         }
280                                 }else {
281                                         //give priority to shared, then group
282                                         sharedfile = m->getSharedFile(); 
283                                         if (sharedfile != "") {  m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
284                                         else { 
285                                                 groupfile = m->getGroupFile(); 
286                                                 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
287                                                 else { 
288                                                         designfile = m->getDesignFile(); 
289                             if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
290                             else { 
291                                 m->mothurOut("You have no current groupfile or sharedfile or designfile and one is required."); m->mothurOutEndLine(); abort = true;
292                             }
293                                                 }
294                                         }
295                                 }
296                         }
297                         
298                         if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; }
299                         
300                         if ((fastafile == "") && (namefile == "") && (groupfile == "")  && (designfile == "") && (sharedfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared, design or list."); m->mothurOutEndLine(); abort = true; }
301                         if ((groupfile == "") && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")))  { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group file."); m->mothurOutEndLine(); abort = true; }
302
303                         if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
304                                 vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
305                                 parser.getNameFile(files);
306                         }
307                 }
308                 
309         }
310         catch(exception& e) {
311                 m->errorOut(e, "GetGroupsCommand", "GetGroupsCommand");
312                 exit(1);
313         }
314 }
315 //**********************************************************************************************************************
316
317 int GetGroupsCommand::execute(){
318         try {
319                 
320                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
321                 
322                 //get groups you want to remove
323                 if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups); }
324                 
325                 if (groupfile != "") {
326                         groupMap = new GroupMap(groupfile);
327                         groupMap->readMap();
328                         
329                         //make sure groups are valid
330                         //takes care of user setting groupNames that are invalid or setting groups=all
331                         SharedUtil* util = new SharedUtil();
332                         vector<string> gNamesOfGroups = groupMap->getNamesOfGroups();
333                         util->setGroups(Groups, gNamesOfGroups);
334                         groupMap->setNamesOfGroups(gNamesOfGroups);
335                         delete util;
336                         
337                         //fill names with names of sequences that are from the groups we want to remove 
338                         fillNames();
339                         
340                         delete groupMap;
341                 }
342                 
343                 if (m->control_pressed) { return 0; }
344                 
345                 //read through the correct file and output lines you want to keep
346                 if (namefile != "")                     {               readName();             }
347                 if (fastafile != "")            {               readFasta();    }
348                 if (groupfile != "")            {               readGroup();    }
349                 if (listfile != "")                     {               readList();             }
350                 if (taxfile != "")                      {               readTax();              }
351                 if (sharedfile != "")           {               readShared();   }
352         if (designfile != "")           {               readDesign();   }
353                 
354                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
355                 
356                 
357                 if (outputNames.size() != 0) {
358                         m->mothurOutEndLine();
359                         m->mothurOut("Output File names: "); m->mothurOutEndLine();
360                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
361                         m->mothurOutEndLine();
362                         
363                         //set fasta file as new current fastafile
364                         string current = "";
365                         itTypes = outputTypes.find("fasta");
366                         if (itTypes != outputTypes.end()) {
367                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
368                         }
369                         
370                         itTypes = outputTypes.find("name");
371                         if (itTypes != outputTypes.end()) {
372                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
373                         }
374                         
375                         itTypes = outputTypes.find("group");
376                         if (itTypes != outputTypes.end()) {
377                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
378                         }
379                         
380                         itTypes = outputTypes.find("list");
381                         if (itTypes != outputTypes.end()) {
382                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
383                         }
384                         
385                         itTypes = outputTypes.find("taxonomy");
386                         if (itTypes != outputTypes.end()) {
387                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
388                         }
389                         
390                         itTypes = outputTypes.find("shared");
391                         if (itTypes != outputTypes.end()) {
392                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
393                         }
394             
395             itTypes = outputTypes.find("design");
396                         if (itTypes != outputTypes.end()) {
397                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setDesignFile(current); }
398                         }
399                 }
400                 
401                 return 0;               
402         }
403         
404         catch(exception& e) {
405                 m->errorOut(e, "GetGroupsCommand", "execute");
406                 exit(1);
407         }
408 }
409
410 //**********************************************************************************************************************
411 int GetGroupsCommand::readFasta(){
412         try {
413                 string thisOutputDir = outputDir;
414                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
415                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
416                 
417                 ofstream out;
418                 m->openOutputFile(outputFileName, out);
419                 
420                 ifstream in;
421                 m->openInputFile(fastafile, in);
422                 string name;
423                 
424                 bool wroteSomething = false;
425                 int selectedCount = 0;
426                 
427                 while(!in.eof()){
428                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
429                         
430                         Sequence currSeq(in);
431                         name = currSeq.getName();
432                         
433                         if (name != "") {
434                                 //if this name is in the accnos file
435                                 if (names.count(name) != 0) {
436                                         wroteSomething = true;
437                                         
438                                         currSeq.printSequence(out);
439                                         selectedCount++;
440                                 }else{
441                                         //if you are not in the accnos file check if you are a name that needs to be changed
442                                         map<string, string>::iterator it = uniqueToRedundant.find(name);
443                                         if (it != uniqueToRedundant.end()) {
444                                                 wroteSomething = true;
445                                                 currSeq.setName(it->second);
446                                                 currSeq.printSequence(out);
447                                                 selectedCount++;
448                                         }
449                                 }
450                         }
451                         m->gobble(in);
452                 }
453                 in.close();     
454                 out.close();
455                 
456                 if (wroteSomething == false) {  m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine();  }
457                 outputTypes["fasta"].push_back(outputFileName);  outputNames.push_back(outputFileName);
458                 
459                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
460
461                 
462                 return 0;
463                 
464         }
465         catch(exception& e) {
466                 m->errorOut(e, "GetGroupsCommand", "readFasta");
467                 exit(1);
468         }
469 }
470 //**********************************************************************************************************************
471 int GetGroupsCommand::readShared(){
472         try {
473                 string thisOutputDir = outputDir;
474                 if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
475                 
476                 InputData input(sharedfile, "sharedfile");
477                 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
478                 
479                 bool wroteSomething = false;
480                 
481                 while(lookup[0] != NULL) {
482                         
483                         string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + lookup[0]->getLabel() + "." + getOutputFileNameTag("shared", sharedfile);
484                         ofstream out;
485                         m->openOutputFile(outputFileName, out);
486                         outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
487                         
488                         if (m->control_pressed) { out.close();  m->mothurRemove(outputFileName);  for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
489                         
490                         lookup[0]->printHeaders(out); 
491                         
492                         for (int i = 0; i < lookup.size(); i++) {
493                                 out << lookup[i]->getLabel() << '\t' << lookup[i]->getGroup() << '\t';
494                                 lookup[i]->print(out);
495                                 wroteSomething = true;
496                                 
497                         }                       
498                         
499                         //get next line to process
500                         //prevent memory leak
501                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
502                         lookup = input.getSharedRAbundVectors();
503                         
504                         out.close();
505                 }
506                 
507                 if (wroteSomething == false) {  m->mothurOut("Your file contains only the groups you wish to remove."); m->mothurOutEndLine();  }
508                 
509                 string groupsString = "";
510                 for (int i = 0; i < Groups.size()-1; i++) {     groupsString += Groups[i] + ", "; }
511                 groupsString += Groups[Groups.size()-1];
512                 
513                 m->mothurOut("Selected groups: " + groupsString + " from your shared file."); m->mothurOutEndLine();
514                 
515                 return 0;
516                 
517         }
518         catch(exception& e) {
519                 m->errorOut(e, "GetGroupsCommand", "readShared");
520                 exit(1);
521         }
522 }
523 //**********************************************************************************************************************
524 int GetGroupsCommand::readList(){
525         try {
526                 string thisOutputDir = outputDir;
527                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
528                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);
529                 
530                 ofstream out;
531                 m->openOutputFile(outputFileName, out);
532                 
533                 ifstream in;
534                 m->openInputFile(listfile, in);
535                 
536                 bool wroteSomething = false;
537                 int selectedCount = 0;
538                 
539                 while(!in.eof()){
540                         
541                         selectedCount = 0;
542                         
543                         //read in list vector
544                         ListVector list(in);
545                         
546                         //make a new list vector
547                         ListVector newList;
548                         newList.setLabel(list.getLabel());
549                         
550                         //for each bin
551                         for (int i = 0; i < list.getNumBins(); i++) {
552                                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
553                                 
554                                 //parse out names that are in accnos file
555                                 string binnames = list.get(i);
556                                 
557                                 string newNames = "";
558                                 while (binnames.find_first_of(',') != -1) { 
559                                         string name = binnames.substr(0,binnames.find_first_of(','));
560                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
561                                         
562                                         //if that name is in the .accnos file, add it
563                                         if (names.count(name) != 0) {  newNames += name + ",";  selectedCount++;  }
564                                         else{
565                                                 //if you are not in the accnos file check if you are a name that needs to be changed
566                                                 map<string, string>::iterator it = uniqueToRedundant.find(name);
567                                                 if (it != uniqueToRedundant.end()) {
568                                                         newNames += it->second + ",";
569                                                         selectedCount++;
570                                                 }
571                                         }
572                                 }
573                                 
574                                 //get last name
575                                 if (names.count(binnames) != 0) {  newNames += binnames + ",";  selectedCount++;  }
576                                 else{
577                                         //if you are not in the accnos file check if you are a name that needs to be changed
578                                         map<string, string>::iterator it = uniqueToRedundant.find(binnames);
579                                         if (it != uniqueToRedundant.end()) {
580                                                 newNames += it->second + ",";
581                                                 selectedCount++;
582                                         }
583                                 }
584                                 
585                                 //if there are names in this bin add to new list
586                                 if (newNames != "") {  
587                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
588                                         newList.push_back(newNames);    
589                                 }
590                         }
591                         
592                         //print new listvector
593                         if (newList.getNumBins() != 0) {
594                                 wroteSomething = true;
595                                 newList.print(out);
596                         }
597                         
598                         m->gobble(in);
599                 }
600                 in.close();     
601                 out.close();
602                 
603                 if (wroteSomething == false) {  m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine();  }
604                 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
605                 
606                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine();
607                 
608                 return 0;
609                 
610         }
611         catch(exception& e) {
612                 m->errorOut(e, "GetGroupsCommand", "readList");
613                 exit(1);
614         }
615 }
616 //**********************************************************************************************************************
617 int GetGroupsCommand::readName(){
618         try {
619                 string thisOutputDir = outputDir;
620                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
621                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
622                 
623                 ofstream out;
624                 m->openOutputFile(outputFileName, out);
625                 
626                 ifstream in;
627                 m->openInputFile(namefile, in);
628                 string name, firstCol, secondCol;
629                 
630                 bool wroteSomething = false;
631                 int selectedCount = 0;
632                 
633                 while(!in.eof()){
634                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
635                         
636                         in >> firstCol;         m->gobble(in);          
637                         in >> secondCol;                        
638                         
639                         vector<string> parsedNames;
640                         m->splitAtComma(secondCol, parsedNames);
641                         
642                         vector<string> validSecond;  validSecond.clear();
643                         for (int i = 0; i < parsedNames.size(); i++) {
644                                 if (names.count(parsedNames[i]) != 0) {
645                                         validSecond.push_back(parsedNames[i]);
646                                 }
647                         }
648                         
649                         selectedCount += validSecond.size();
650                         
651                         //if the name in the first column is in the set then print it and any other names in second column also in set
652                         if (names.count(firstCol) != 0) {
653                                 
654                                 wroteSomething = true;
655                                 
656                                 out << firstCol << '\t';
657                                 
658                                 //you know you have at least one valid second since first column is valid
659                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
660                                 out << validSecond[validSecond.size()-1] << endl;
661                                 
662                                 //make first name in set you come to first column and then add the remaining names to second column
663                         }else {
664                                 
665                                 //you want part of this row
666                                 if (validSecond.size() != 0) {
667                                         
668                                         wroteSomething = true;
669                                         
670                                         out << validSecond[0] << '\t';
671                                         
672                                         //you know you have at least one valid second since first column is valid
673                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
674                                         out << validSecond[validSecond.size()-1] << endl;
675                                         uniqueToRedundant[firstCol] = validSecond[0];
676                                 }
677                         }
678                         
679                         m->gobble(in);
680                 }
681                 in.close();
682                 out.close();
683                 
684                 if (wroteSomething == false) {  m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine();  }
685                 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
686                 
687                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your name file."); m->mothurOutEndLine();
688
689                 return 0;
690         }
691         catch(exception& e) {
692                 m->errorOut(e, "GetGroupsCommand", "readName");
693                 exit(1);
694         }
695 }
696
697 //**********************************************************************************************************************
698 int GetGroupsCommand::readGroup(){
699         try {
700                 string thisOutputDir = outputDir;
701                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
702                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
703                 
704                 ofstream out;
705                 m->openOutputFile(outputFileName, out);
706                 
707                 ifstream in;
708                 m->openInputFile(groupfile, in);
709                 string name, group;
710                 
711                 bool wroteSomething = false;
712                 int selectedCount = 0;
713                 
714                 while(!in.eof()){
715                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
716                         
717                         in >> name;                             //read from first column
718                         in >> group;                    //read from second column
719                         
720                         //if this name is in the accnos file
721                         if (names.count(name) != 0) {
722                                 wroteSomething = true;
723                                 out << name << '\t' << group << endl;
724                                 selectedCount++;
725                         }
726                         
727                         m->gobble(in);
728                 }
729                 in.close();
730                 out.close();
731                 
732                 if (wroteSomething == false) {  m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine();  }
733                 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
734                 
735                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your group file."); m->mothurOutEndLine();
736
737                 return 0;
738         }
739         catch(exception& e) {
740                 m->errorOut(e, "GetGroupsCommand", "readGroup");
741                 exit(1);
742         }
743 }
744 //**********************************************************************************************************************
745 int GetGroupsCommand::readDesign(){
746         try {
747                 string thisOutputDir = outputDir;
748                 if (outputDir == "") {  thisOutputDir += m->hasPath(designfile);  }
749                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(designfile)) + getOutputFileNameTag("design", designfile);
750                 
751                 ofstream out;
752                 m->openOutputFile(outputFileName, out);
753                 
754                 ifstream in;
755                 m->openInputFile(designfile, in);
756                 string name, group;
757                 
758                 bool wroteSomething = false;
759                 int selectedCount = 0;
760                 
761                 while(!in.eof()){
762                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
763                         
764                         in >> name;                             //read from first column
765                         in >> group;                    //read from second column
766                         
767                         //if this name is in the accnos file
768                         if (m->inUsersGroups(name, Groups)) {
769                                 wroteSomething = true;
770                                 out << name << '\t' << group << endl;
771                 selectedCount++;
772                         }
773                         
774                         m->gobble(in);
775                 }
776                 in.close();
777                 out.close();
778                 
779                 if (wroteSomething == false) {  m->mothurOut("Your file does NOT contain groups from the groups you wish to get."); m->mothurOutEndLine();  }
780                 outputTypes["design"].push_back(outputFileName); outputNames.push_back(outputFileName);
781                 
782                 m->mothurOut("Selected " + toString(selectedCount) + " groups from your design file."); m->mothurOutEndLine();
783         
784                 
785                 return 0;
786         }
787         catch(exception& e) {
788                 m->errorOut(e, "GetGroupsCommand", "readDesign");
789                 exit(1);
790         }
791 }
792
793
794 //**********************************************************************************************************************
795 int GetGroupsCommand::readTax(){
796         try {
797                 string thisOutputDir = outputDir;
798                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
799                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
800                 ofstream out;
801                 m->openOutputFile(outputFileName, out);
802                 
803                 ifstream in;
804                 m->openInputFile(taxfile, in);
805                 string name, tax;
806                 
807                 bool wroteSomething = false;
808                 
809                 while(!in.eof()){
810                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
811                         
812                         in >> name;                             //read from first column
813                         in >> tax;                      //read from second column
814                         
815                         //if this name is in the accnos file
816                         if (names.count(name) != 0) {
817                                 wroteSomething = true;
818                                 out << name << '\t' << tax << endl;
819                         }else{
820                                 //if you are not in the accnos file check if you are a name that needs to be changed
821                                 map<string, string>::iterator it = uniqueToRedundant.find(name);
822                                 if (it != uniqueToRedundant.end()) {
823                                         wroteSomething = true;
824                                         out << it->second << '\t' << tax << endl;
825                                 }
826                         }
827                         
828                         m->gobble(in);
829                 }
830                 in.close();
831                 out.close();
832                 
833                 if (wroteSomething == false) {  m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine();  }
834                 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
835                 
836                 return 0;
837         }
838         catch(exception& e) {
839                 m->errorOut(e, "GetGroupsCommand", "readTax");
840                 exit(1);
841         }
842 }
843 //**********************************************************************************************************************
844 int GetGroupsCommand::fillNames(){
845         try {
846                 vector<string> seqs = groupMap->getNamesSeqs();
847                 
848                 for (int i = 0; i < seqs.size(); i++) {
849                         
850                         if (m->control_pressed) { return 0; }
851                         
852                         string group = groupMap->getGroup(seqs[i]);
853                         
854                         if (m->inUsersGroups(group, Groups)) {
855                                 names.insert(seqs[i]);
856                         }
857                 }
858                 
859                 return 0;
860         }
861         catch(exception& e) {
862                 m->errorOut(e, "GetGroupsCommand", "fillNames");
863                 exit(1);
864         }
865 }
866
867 //**********************************************************************************************************************
868
869