]> git.donarmstrong.com Git - mothur.git/blob - getgroupscommand.cpp
910a8720bc3ec88079d2663ebf819408436ca02a
[mothur.git] / getgroupscommand.cpp
1 /*
2  *  getgroupscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/10/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "getgroupscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "sharedutilities.h"
14 #include "inputdata.h"
15
16 //**********************************************************************************************************************
17 vector<string> GetGroupsCommand::setParameters(){       
18         try {
19                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(pfasta);
20                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pshared);
21         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
22         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
23                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "sharedGroup", "FNGLT",false,false); parameters.push_back(pgroup);         
24         CommandParameter pdesign("design", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pdesign);
25                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(plist);
26                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(ptaxonomy);
27                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
28                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
29                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
31                 
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "GetGroupsCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string GetGroupsCommand::getHelpString(){       
43         try {
44                 string helpString = "";
45                 helpString += "The get.groups command selects sequences from a specfic group or set of groups from the following file types: fasta, name, group, list, taxonomy, design or shared file.\n";
46                 helpString += "It outputs a file containing the sequences in the those specified groups, or a sharedfile containing only those groups.\n";
47                 helpString += "The get.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared, design and groups. The group or count parameter is required, unless you have a current group or count file, or are using a shared file.\n";
48                 helpString += "You must also provide an accnos containing the list of groups to get or set the groups parameter to the groups you wish to select.\n";
49                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like.  You can separate group names with dashes.\n";
50                 helpString += "The get.groups command should be in the following format: get.groups(accnos=yourAccnos, fasta=yourFasta, group=yourGroupFile).\n";
51                 helpString += "Example get.groups(accnos=amazon.accnos, fasta=amazon.fasta, group=amazon.groups).\n";
52                 helpString += "or get.groups(groups=pasture, fasta=amazon.fasta, group=amazon.groups).\n";
53                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
54                 return helpString;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "GetGroupsCommand", "getHelpString");
58                 exit(1);
59         }
60 }
61 //**********************************************************************************************************************
62 string GetGroupsCommand::getOutputFileNameTag(string type, string inputName=""){        
63         try {
64         string outputFileName = "";
65                 map<string, vector<string> >::iterator it;
66         
67         //is this a type this command creates
68         it = outputTypes.find(type);
69         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
70         else {
71             if (type == "fasta")            {   outputFileName =  "pick" + m->getExtension(inputName);   }
72             else if (type == "taxonomy")    {   outputFileName =  "pick" + m->getExtension(inputName);   }
73             else if (type == "name")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
74             else if (type == "group")       {   outputFileName =  "pick" + m->getExtension(inputName);   }
75             else if (type == "count")       {   outputFileName =  "pick.count.table";   }
76             else if (type == "list")        {   outputFileName =  "pick" + m->getExtension(inputName);   }
77             else if (type == "shared")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
78             else if (type == "design")      {   outputFileName =  "pick" + m->getExtension(inputName);   }
79             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
80         }
81         return outputFileName;
82         }
83         catch(exception& e) {
84                 m->errorOut(e, "GetGroupsCommand", "getOutputFileNameTag");
85                 exit(1);
86         }
87 }
88
89 //**********************************************************************************************************************
90 GetGroupsCommand::GetGroupsCommand(){   
91         try {
92                 abort = true; calledHelp = true;
93                 setParameters();
94                 vector<string> tempOutNames;
95                 outputTypes["fasta"] = tempOutNames;
96                 outputTypes["taxonomy"] = tempOutNames;
97                 outputTypes["name"] = tempOutNames;
98                 outputTypes["group"] = tempOutNames;
99                 outputTypes["list"] = tempOutNames;
100                 outputTypes["shared"] = tempOutNames;
101         outputTypes["design"] = tempOutNames;
102         outputTypes["count"] = tempOutNames;
103         }
104         catch(exception& e) {
105                 m->errorOut(e, "GetGroupsCommand", "GetGroupsCommand");
106                 exit(1);
107         }
108 }
109 //**********************************************************************************************************************
110 GetGroupsCommand::GetGroupsCommand(string option)  {
111         try {
112                 abort = false; calledHelp = false;   
113                 
114                 //allow user to run help
115                 if(option == "help") { help(); abort = true; calledHelp = true; }
116                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
117                 
118                 else {
119                         vector<string> myArray = setParameters();
120                         
121                         OptionParser parser(option);
122                         map<string,string> parameters = parser.getParameters();
123                         
124                         ValidParameters validParameter;
125                         map<string,string>::iterator it;
126                         
127                         //check to make sure all parameters are valid for command
128                         for (it = parameters.begin(); it != parameters.end(); it++) { 
129                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
130                         }
131                         
132                         //initialize outputTypes
133                         vector<string> tempOutNames;
134                         outputTypes["fasta"] = tempOutNames;
135                         outputTypes["taxonomy"] = tempOutNames;
136                         outputTypes["name"] = tempOutNames;
137                         outputTypes["group"] = tempOutNames;
138                         outputTypes["list"] = tempOutNames;
139                         outputTypes["shared"] = tempOutNames;
140             outputTypes["design"] = tempOutNames;
141             outputTypes["count"] = tempOutNames;
142                         
143                         
144                         //if the user changes the output directory command factory will send this info to us in the output parameter 
145                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
146                         
147                         //if the user changes the input directory command factory will send this info to us in the output parameter 
148                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
149                         if (inputDir == "not found"){   inputDir = "";          }
150                         else {
151                                 string path;
152                                 it = parameters.find("fasta");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
158                                 }
159                                 
160                                 it = parameters.find("accnos");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
166                                 }
167                                 
168                                 it = parameters.find("list");
169                                 //user has given a template file
170                                 if(it != parameters.end()){ 
171                                         path = m->hasPath(it->second);
172                                         //if the user has not given a path then, add inputdir. else leave path alone.
173                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
174                                 }
175                                 
176                                 it = parameters.find("name");
177                                 //user has given a template file
178                                 if(it != parameters.end()){ 
179                                         path = m->hasPath(it->second);
180                                         //if the user has not given a path then, add inputdir. else leave path alone.
181                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
182                                 }
183                                 
184                                 it = parameters.find("group");
185                                 //user has given a template file
186                                 if(it != parameters.end()){ 
187                                         path = m->hasPath(it->second);
188                                         //if the user has not given a path then, add inputdir. else leave path alone.
189                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
190                                 }
191                                 
192                                 it = parameters.find("taxonomy");
193                                 //user has given a template file
194                                 if(it != parameters.end()){ 
195                                         path = m->hasPath(it->second);
196                                         //if the user has not given a path then, add inputdir. else leave path alone.
197                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
198                                 }
199                                 
200                                 it = parameters.find("shared");
201                                 //user has given a template file
202                                 if(it != parameters.end()){ 
203                                         path = m->hasPath(it->second);
204                                         //if the user has not given a path then, add inputdir. else leave path alone.
205                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
206                                 }
207                 
208                 it = parameters.find("design");
209                                 //user has given a template file
210                                 if(it != parameters.end()){ 
211                                         path = m->hasPath(it->second);
212                                         //if the user has not given a path then, add inputdir. else leave path alone.
213                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
214                                 }
215                 
216                 it = parameters.find("count");
217                                 //user has given a template file
218                                 if(it != parameters.end()){ 
219                                         path = m->hasPath(it->second);
220                                         //if the user has not given a path then, add inputdir. else leave path alone.
221                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
222                                 }
223                         }
224                         
225                         
226                         //check for required parameters
227                         accnosfile = validParameter.validFile(parameters, "accnos", true);
228                         if (accnosfile == "not open") { abort = true; }
229                         else if (accnosfile == "not found") {  accnosfile = ""; }
230                         else { m->setAccnosFile(accnosfile); }
231                         
232                         fastafile = validParameter.validFile(parameters, "fasta", true);
233                         if (fastafile == "not open") { fastafile = ""; abort = true; }
234                         else if (fastafile == "not found") {  fastafile = "";  }
235                         else { m->setFastaFile(fastafile); }
236                         
237                         namefile = validParameter.validFile(parameters, "name", true);
238                         if (namefile == "not open") { namefile = ""; abort = true; }
239                         else if (namefile == "not found") {  namefile = "";  }  
240                         else { m->setNameFile(namefile); }
241                         
242                         listfile = validParameter.validFile(parameters, "list", true);
243                         if (listfile == "not open") { abort = true; }
244                         else if (listfile == "not found") {  listfile = "";  }
245                         else { m->setListFile(listfile); }
246                         
247                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
248                         if (taxfile == "not open") { taxfile = ""; abort = true; }
249                         else if (taxfile == "not found") {  taxfile = "";  }
250                         else { m->setTaxonomyFile(taxfile); }
251                         
252                         groups = validParameter.validFile(parameters, "groups", false);                 
253                         if (groups == "not found") { groups = ""; }
254                         else {
255                                 m->splitAtDash(groups, Groups);
256                                 m->setGroups(Groups);
257                         }
258                         
259                         sharedfile = validParameter.validFile(parameters, "shared", true);
260                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }
261                         else if (sharedfile == "not found") {  sharedfile = "";  }
262                         else { m->setSharedFile(sharedfile); }
263                         
264                         groupfile = validParameter.validFile(parameters, "group", true);
265                         if (groupfile == "not open") { groupfile = ""; abort = true; }
266                         else if (groupfile == "not found") {    groupfile = ""; }
267                         else { m->setGroupFile(groupfile); }
268             
269             designfile = validParameter.validFile(parameters, "design", true);
270                         if (designfile == "not open") { designfile = ""; abort = true; }
271                         else if (designfile == "not found") {   designfile = "";        }
272                         else { m->setDesignFile(designfile); }
273             
274             countfile = validParameter.validFile(parameters, "count", true);
275             if (countfile == "not open") { countfile = ""; abort = true; }
276             else if (countfile == "not found") { countfile = "";  }     
277             else { m->setCountTableFile(countfile); }
278             
279             if ((namefile != "") && (countfile != "")) {
280                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
281             }
282             
283             if ((groupfile != "") && (countfile != "")) {
284                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
285             }
286
287                         
288                         if ((sharedfile == "") && (groupfile == "") && (designfile == "") && (countfile == "")) { 
289                                 //is there are current file available for any of these?
290                                 if ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")) {
291                                         //give priority to group, then shared
292                                         groupfile = m->getGroupFile(); 
293                                         if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
294                                         else { 
295                                                 sharedfile = m->getSharedFile(); 
296                                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
297                                                 else { 
298                                                         countfile = m->getCountTableFile(); 
299                             if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
300                             else { 
301                                 m->mothurOut("You have no current groupfile, countfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
302                             }
303                                                 }
304                                         }
305                                 }else {
306                                         //give priority to shared, then group
307                                         sharedfile = m->getSharedFile(); 
308                                         if (sharedfile != "") {  m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
309                                         else { 
310                                                 groupfile = m->getGroupFile(); 
311                                                 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
312                                                 else { 
313                                                         designfile = m->getDesignFile(); 
314                             if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
315                             else { 
316                                 countfile = m->getCountTableFile(); 
317                                 if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
318                                 else { 
319                                     m->mothurOut("You have no current groupfile, designfile, countfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
320                                 }
321
322                             }
323                                                 }
324                                         }
325                                 }
326                         }
327                         
328                         if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; }
329                         
330                         if ((fastafile == "") && (namefile == "") && (countfile == "") && (groupfile == "")  && (designfile == "") && (sharedfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared, design, count or list."); m->mothurOutEndLine(); abort = true; }
331                         if (((groupfile == "") && (countfile == "")) && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")))  { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group or count file."); m->mothurOutEndLine(); abort = true; }
332             
333             if (countfile == "") {
334                 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
335                     vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
336                     parser.getNameFile(files);
337                 }
338             }
339                 }
340                 
341         }
342         catch(exception& e) {
343                 m->errorOut(e, "GetGroupsCommand", "GetGroupsCommand");
344                 exit(1);
345         }
346 }
347 //**********************************************************************************************************************
348
349 int GetGroupsCommand::execute(){
350         try {
351                 
352                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
353                 
354                 //get groups you want to remove
355                 if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups); }
356                 
357                 if (groupfile != "") {
358                         groupMap = new GroupMap(groupfile);
359                         groupMap->readMap();
360                         
361                         //make sure groups are valid
362                         //takes care of user setting groupNames that are invalid or setting groups=all
363                         SharedUtil* util = new SharedUtil();
364                         vector<string> gNamesOfGroups = groupMap->getNamesOfGroups();
365                         util->setGroups(Groups, gNamesOfGroups);
366             m->setGroups(Groups);
367                         groupMap->setNamesOfGroups(gNamesOfGroups);
368                         delete util;
369                         
370                         //fill names with names of sequences that are from the groups we want to remove 
371                         fillNames();
372                         
373                         delete groupMap;
374                 }else if (countfile != ""){
375             if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
376                 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
377             }
378             CountTable ct;
379             ct.readTable(countfile);
380             if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, aborting.\n"); return 0; }
381                 
382             vector<string> gNamesOfGroups = ct.getNamesOfGroups();
383             SharedUtil util;
384             util.setGroups(Groups, gNamesOfGroups);
385             m->setGroups(Groups);
386             for (int i = 0; i < Groups.size(); i++) {
387                 vector<string> thisGroupsSeqs = ct.getNamesOfSeqs(Groups[i]);
388                 for (int j = 0; j < thisGroupsSeqs.size(); j++) { names.insert(thisGroupsSeqs[j]); }
389             }
390         }
391                 
392                 if (m->control_pressed) { return 0; }
393                 
394                 //read through the correct file and output lines you want to keep
395                 if (namefile != "")                     {               readName();             }
396                 if (fastafile != "")            {               readFasta();    }
397                 if (groupfile != "")            {               readGroup();    }
398         if (countfile != "")            {               readCount();    }
399                 if (listfile != "")                     {               readList();             }
400                 if (taxfile != "")                      {               readTax();              }
401                 if (sharedfile != "")           {               readShared();   }
402         if (designfile != "")           {               readDesign();   }
403                 
404                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
405                 
406                 
407                 if (outputNames.size() != 0) {
408                         m->mothurOutEndLine();
409                         m->mothurOut("Output File names: "); m->mothurOutEndLine();
410                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
411                         m->mothurOutEndLine();
412                         
413                         //set fasta file as new current fastafile
414                         string current = "";
415                         itTypes = outputTypes.find("fasta");
416                         if (itTypes != outputTypes.end()) {
417                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
418                         }
419                         
420                         itTypes = outputTypes.find("name");
421                         if (itTypes != outputTypes.end()) {
422                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
423                         }
424                         
425                         itTypes = outputTypes.find("group");
426                         if (itTypes != outputTypes.end()) {
427                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
428                         }
429                         
430                         itTypes = outputTypes.find("list");
431                         if (itTypes != outputTypes.end()) {
432                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
433                         }
434                         
435                         itTypes = outputTypes.find("taxonomy");
436                         if (itTypes != outputTypes.end()) {
437                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
438                         }
439                         
440                         itTypes = outputTypes.find("shared");
441                         if (itTypes != outputTypes.end()) {
442                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
443                         }
444             
445             itTypes = outputTypes.find("design");
446                         if (itTypes != outputTypes.end()) {
447                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setDesignFile(current); }
448                         }
449             
450             itTypes = outputTypes.find("count");
451                         if (itTypes != outputTypes.end()) {
452                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
453                         }
454                 }
455                 
456                 return 0;               
457         }
458         
459         catch(exception& e) {
460                 m->errorOut(e, "GetGroupsCommand", "execute");
461                 exit(1);
462         }
463 }
464
465 //**********************************************************************************************************************
466 int GetGroupsCommand::readFasta(){
467         try {
468                 string thisOutputDir = outputDir;
469                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
470                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
471                 
472                 ofstream out;
473                 m->openOutputFile(outputFileName, out);
474                 
475                 ifstream in;
476                 m->openInputFile(fastafile, in);
477                 string name;
478                 
479                 bool wroteSomething = false;
480                 int selectedCount = 0;
481                 
482                 while(!in.eof()){
483                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
484                         
485                         Sequence currSeq(in);
486                         name = currSeq.getName();
487                         
488                         if (name != "") {
489                                 //if this name is in the accnos file
490                                 if (names.count(name) != 0) {
491                                         wroteSomething = true;
492                                         
493                                         currSeq.printSequence(out);
494                                         selectedCount++;
495                                 }else{
496                                         //if you are not in the accnos file check if you are a name that needs to be changed
497                                         map<string, string>::iterator it = uniqueToRedundant.find(name);
498                                         if (it != uniqueToRedundant.end()) {
499                                                 wroteSomething = true;
500                                                 currSeq.setName(it->second);
501                                                 currSeq.printSequence(out);
502                                                 selectedCount++;
503                                         }
504                                 }
505                         }
506                         m->gobble(in);
507                 }
508                 in.close();     
509                 out.close();
510                 
511                 if (wroteSomething == false) {  m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine();  }
512                 outputTypes["fasta"].push_back(outputFileName);  outputNames.push_back(outputFileName);
513                 
514                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
515
516                 
517                 return 0;
518                 
519         }
520         catch(exception& e) {
521                 m->errorOut(e, "GetGroupsCommand", "readFasta");
522                 exit(1);
523         }
524 }
525 //**********************************************************************************************************************
526 int GetGroupsCommand::readShared(){
527         try {
528                 string thisOutputDir = outputDir;
529                 if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
530                 
531                 InputData input(sharedfile, "sharedfile");
532                 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
533                 
534                 bool wroteSomething = false;
535                 
536                 while(lookup[0] != NULL) {
537                         
538                         string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + lookup[0]->getLabel() + "." + getOutputFileNameTag("shared", sharedfile);
539                         ofstream out;
540                         m->openOutputFile(outputFileName, out);
541                         outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
542                         
543                         if (m->control_pressed) { out.close();  m->mothurRemove(outputFileName);  for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
544                         
545                         lookup[0]->printHeaders(out); 
546                         
547                         for (int i = 0; i < lookup.size(); i++) {
548                                 out << lookup[i]->getLabel() << '\t' << lookup[i]->getGroup() << '\t';
549                                 lookup[i]->print(out);
550                                 wroteSomething = true;
551                                 
552                         }                       
553                         
554                         //get next line to process
555                         //prevent memory leak
556                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
557                         lookup = input.getSharedRAbundVectors();
558                         
559                         out.close();
560                 }
561                 
562                 if (wroteSomething == false) {  m->mothurOut("Your file contains only the groups you wish to remove."); m->mothurOutEndLine();  }
563                 
564                 string groupsString = "";
565                 for (int i = 0; i < Groups.size()-1; i++) {     groupsString += Groups[i] + ", "; }
566                 groupsString += Groups[Groups.size()-1];
567                 
568                 m->mothurOut("Selected groups: " + groupsString + " from your shared file."); m->mothurOutEndLine();
569                 
570                 return 0;
571                 
572         }
573         catch(exception& e) {
574                 m->errorOut(e, "GetGroupsCommand", "readShared");
575                 exit(1);
576         }
577 }
578 //**********************************************************************************************************************
579 int GetGroupsCommand::readList(){
580         try {
581                 string thisOutputDir = outputDir;
582                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
583                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);
584                 
585                 ofstream out;
586                 m->openOutputFile(outputFileName, out);
587                 
588                 ifstream in;
589                 m->openInputFile(listfile, in);
590                 
591                 bool wroteSomething = false;
592                 int selectedCount = 0;
593                 
594                 while(!in.eof()){
595                         
596                         selectedCount = 0;
597                         
598                         //read in list vector
599                         ListVector list(in);
600                         
601                         //make a new list vector
602                         ListVector newList;
603                         newList.setLabel(list.getLabel());
604                         
605                         //for each bin
606                         for (int i = 0; i < list.getNumBins(); i++) {
607                                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
608                                 
609                                 //parse out names that are in accnos file
610                                 string binnames = list.get(i);
611                                 
612                                 string newNames = "";
613                                 while (binnames.find_first_of(',') != -1) { 
614                                         string name = binnames.substr(0,binnames.find_first_of(','));
615                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
616                                         
617                                         //if that name is in the .accnos file, add it
618                                         if (names.count(name) != 0) {  newNames += name + ",";  selectedCount++;  }
619                                         else{
620                                                 //if you are not in the accnos file check if you are a name that needs to be changed
621                                                 map<string, string>::iterator it = uniqueToRedundant.find(name);
622                                                 if (it != uniqueToRedundant.end()) {
623                                                         newNames += it->second + ",";
624                                                         selectedCount++;
625                                                 }
626                                         }
627                                 }
628                                 
629                                 //get last name
630                                 if (names.count(binnames) != 0) {  newNames += binnames + ",";  selectedCount++;  }
631                                 else{
632                                         //if you are not in the accnos file check if you are a name that needs to be changed
633                                         map<string, string>::iterator it = uniqueToRedundant.find(binnames);
634                                         if (it != uniqueToRedundant.end()) {
635                                                 newNames += it->second + ",";
636                                                 selectedCount++;
637                                         }
638                                 }
639                                 
640                                 //if there are names in this bin add to new list
641                                 if (newNames != "") {  
642                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
643                                         newList.push_back(newNames);    
644                                 }
645                         }
646                         
647                         //print new listvector
648                         if (newList.getNumBins() != 0) {
649                                 wroteSomething = true;
650                                 newList.print(out);
651                         }
652                         
653                         m->gobble(in);
654                 }
655                 in.close();     
656                 out.close();
657                 
658                 if (wroteSomething == false) {  m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine();  }
659                 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
660                 
661                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine();
662                 
663                 return 0;
664                 
665         }
666         catch(exception& e) {
667                 m->errorOut(e, "GetGroupsCommand", "readList");
668                 exit(1);
669         }
670 }
671 //**********************************************************************************************************************
672 int GetGroupsCommand::readName(){
673         try {
674                 string thisOutputDir = outputDir;
675                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
676                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
677                 
678                 ofstream out;
679                 m->openOutputFile(outputFileName, out);
680                 
681                 ifstream in;
682                 m->openInputFile(namefile, in);
683                 string name, firstCol, secondCol;
684                 
685                 bool wroteSomething = false;
686                 int selectedCount = 0;
687                 
688                 while(!in.eof()){
689                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
690                         
691                         in >> firstCol;         m->gobble(in);          
692                         in >> secondCol;                        
693                         
694                         vector<string> parsedNames;
695                         m->splitAtComma(secondCol, parsedNames);
696                         
697                         vector<string> validSecond;  validSecond.clear();
698                         for (int i = 0; i < parsedNames.size(); i++) {
699                                 if (names.count(parsedNames[i]) != 0) {
700                                         validSecond.push_back(parsedNames[i]);
701                                 }
702                         }
703                         
704                         selectedCount += validSecond.size();
705                         
706                         //if the name in the first column is in the set then print it and any other names in second column also in set
707                         if (names.count(firstCol) != 0) {
708                                 
709                                 wroteSomething = true;
710                                 
711                                 out << firstCol << '\t';
712                                 
713                                 //you know you have at least one valid second since first column is valid
714                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
715                                 out << validSecond[validSecond.size()-1] << endl;
716                                 
717                                 //make first name in set you come to first column and then add the remaining names to second column
718                         }else {
719                                 
720                                 //you want part of this row
721                                 if (validSecond.size() != 0) {
722                                         
723                                         wroteSomething = true;
724                                         
725                                         out << validSecond[0] << '\t';
726                                         
727                                         //you know you have at least one valid second since first column is valid
728                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
729                                         out << validSecond[validSecond.size()-1] << endl;
730                                         uniqueToRedundant[firstCol] = validSecond[0];
731                                 }
732                         }
733                         
734                         m->gobble(in);
735                 }
736                 in.close();
737                 out.close();
738                 
739                 if (wroteSomething == false) {  m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine();  }
740                 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
741                 
742                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your name file."); m->mothurOutEndLine();
743
744                 return 0;
745         }
746         catch(exception& e) {
747                 m->errorOut(e, "GetGroupsCommand", "readName");
748                 exit(1);
749         }
750 }
751
752 //**********************************************************************************************************************
753 int GetGroupsCommand::readGroup(){
754         try {
755                 string thisOutputDir = outputDir;
756                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
757                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
758                 
759                 ofstream out;
760                 m->openOutputFile(outputFileName, out);
761                 
762                 ifstream in;
763                 m->openInputFile(groupfile, in);
764                 string name, group;
765                 
766                 bool wroteSomething = false;
767                 int selectedCount = 0;
768                 
769                 while(!in.eof()){
770                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
771                         
772                         in >> name;                             //read from first column
773                         in >> group;                    //read from second column
774                         
775                         //if this name is in the accnos file
776                         if (names.count(name) != 0) {
777                                 wroteSomething = true;
778                                 out << name << '\t' << group << endl;
779                                 selectedCount++;
780                         }
781                         
782                         m->gobble(in);
783                 }
784                 in.close();
785                 out.close();
786                 
787                 if (wroteSomething == false) {  m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine();  }
788                 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
789                 
790                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your group file."); m->mothurOutEndLine();
791
792                 return 0;
793         }
794         catch(exception& e) {
795                 m->errorOut(e, "GetGroupsCommand", "readGroup");
796                 exit(1);
797         }
798 }
799 //**********************************************************************************************************************
800 int GetGroupsCommand::readCount(){
801         try {
802                 string thisOutputDir = outputDir;
803                 if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
804                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
805                 
806                 ofstream out;
807                 m->openOutputFile(outputFileName, out);
808                 
809                 ifstream in;
810                 m->openInputFile(countfile, in);
811                 
812                 bool wroteSomething = false;
813                 int selectedCount = 0;
814                 
815         string headers = m->getline(in); m->gobble(in);
816         vector<string> columnHeaders = m->splitWhiteSpace(headers);
817         
818         vector<string> groups;
819         map<int, string> originalGroupIndexes;
820         map<string, int> GroupIndexes;
821         set<int> indexOfGroupsChosen;
822         for (int i = 2; i < columnHeaders.size(); i++) {  groups.push_back(columnHeaders[i]);  originalGroupIndexes[i-2] = columnHeaders[i]; }
823         //sort groups to keep consistent with how we store the groups in groupmap
824         sort(groups.begin(), groups.end());
825         for (int i = 0; i < groups.size(); i++) {  GroupIndexes[groups[i]] = i; }
826         sort(Groups.begin(), Groups.end());
827         out << "Representative_Sequence\ttotal\t";
828         for (int i = 0; i < Groups.size(); i++) { out << Groups[i] << '\t'; indexOfGroupsChosen.insert(GroupIndexes[Groups[i]]); }
829         out << endl;
830         
831         string name; int oldTotal;
832         while (!in.eof()) {
833             
834             if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
835             
836             in >> name; m->gobble(in); in >> oldTotal; m->gobble(in);
837             if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(oldTotal) + "\n"); }
838             
839             if (names.count(name) != 0) {
840                 //if group info, then read it
841                 vector<int> selectedCounts; int thisTotal = 0; int temp;
842                 for (int i = 0; i < groups.size(); i++) {  
843                     int thisIndex = GroupIndexes[originalGroupIndexes[i]]; 
844                     in >> temp;  m->gobble(in);
845                     if (indexOfGroupsChosen.count(thisIndex) != 0) { //we want this group
846                         selectedCounts.push_back(temp); thisTotal += temp;
847                     }
848                 }
849
850                 out << name << '\t' << thisTotal << '\t';
851                 for (int i = 0; i < selectedCounts.size(); i++) {  out << selectedCounts[i] << '\t'; }
852                 out << endl;
853                 
854                 wroteSomething = true;
855                 selectedCount+= thisTotal;
856             }else {  m->getline(in); }
857             
858             m->gobble(in);
859         }
860         in.close();
861                 out.close();
862                 
863                 if (wroteSomething == false) {  m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine();  }
864                 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
865                 
866                 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your count file."); m->mothurOutEndLine();
867         
868                 return 0;
869         }
870         catch(exception& e) {
871                 m->errorOut(e, "GetGroupsCommand", "readCount");
872                 exit(1);
873         }
874 }
875 //**********************************************************************************************************************
876 int GetGroupsCommand::readDesign(){
877         try {
878                 string thisOutputDir = outputDir;
879                 if (outputDir == "") {  thisOutputDir += m->hasPath(designfile);  }
880                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(designfile)) + getOutputFileNameTag("design", designfile);
881                 
882                 ofstream out;
883                 m->openOutputFile(outputFileName, out);
884                 
885                 ifstream in;
886                 m->openInputFile(designfile, in);
887                 string name, group;
888                 
889                 bool wroteSomething = false;
890                 int selectedCount = 0;
891                 
892                 while(!in.eof()){
893                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
894                         
895                         in >> name;                             //read from first column
896                         in >> group;                    //read from second column
897                         
898                         //if this name is in the accnos file
899                         if (m->inUsersGroups(name, Groups)) {
900                                 wroteSomething = true;
901                                 out << name << '\t' << group << endl;
902                 selectedCount++;
903                         }
904                         
905                         m->gobble(in);
906                 }
907                 in.close();
908                 out.close();
909                 
910                 if (wroteSomething == false) {  m->mothurOut("Your file does NOT contain groups from the groups you wish to get."); m->mothurOutEndLine();  }
911                 outputTypes["design"].push_back(outputFileName); outputNames.push_back(outputFileName);
912                 
913                 m->mothurOut("Selected " + toString(selectedCount) + " groups from your design file."); m->mothurOutEndLine();
914         
915                 
916                 return 0;
917         }
918         catch(exception& e) {
919                 m->errorOut(e, "GetGroupsCommand", "readDesign");
920                 exit(1);
921         }
922 }
923
924
925 //**********************************************************************************************************************
926 int GetGroupsCommand::readTax(){
927         try {
928                 string thisOutputDir = outputDir;
929                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
930                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
931                 ofstream out;
932                 m->openOutputFile(outputFileName, out);
933                 
934                 ifstream in;
935                 m->openInputFile(taxfile, in);
936                 string name, tax;
937                 
938                 bool wroteSomething = false;
939                 
940                 while(!in.eof()){
941                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
942                         
943                         in >> name;                             //read from first column
944                         in >> tax;                      //read from second column
945                         
946                         //if this name is in the accnos file
947                         if (names.count(name) != 0) {
948                                 wroteSomething = true;
949                                 out << name << '\t' << tax << endl;
950                         }else{
951                                 //if you are not in the accnos file check if you are a name that needs to be changed
952                                 map<string, string>::iterator it = uniqueToRedundant.find(name);
953                                 if (it != uniqueToRedundant.end()) {
954                                         wroteSomething = true;
955                                         out << it->second << '\t' << tax << endl;
956                                 }
957                         }
958                         
959                         m->gobble(in);
960                 }
961                 in.close();
962                 out.close();
963                 
964                 if (wroteSomething == false) {  m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine();  }
965                 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
966                 
967                 return 0;
968         }
969         catch(exception& e) {
970                 m->errorOut(e, "GetGroupsCommand", "readTax");
971                 exit(1);
972         }
973 }
974 //**********************************************************************************************************************
975 int GetGroupsCommand::fillNames(){
976         try {
977                 vector<string> seqs = groupMap->getNamesSeqs();
978                 
979                 for (int i = 0; i < seqs.size(); i++) {
980                         
981                         if (m->control_pressed) { return 0; }
982                         
983                         string group = groupMap->getGroup(seqs[i]);
984                         
985                         if (m->inUsersGroups(group, Groups)) {
986                                 names.insert(seqs[i]);
987                         }
988                 }
989                 
990                 return 0;
991         }
992         catch(exception& e) {
993                 m->errorOut(e, "GetGroupsCommand", "fillNames");
994                 exit(1);
995         }
996 }
997
998 //**********************************************************************************************************************
999
1000