]> git.donarmstrong.com Git - mothur.git/blob - removegroupscommand.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / removegroupscommand.cpp
1 /*
2  *  removegroupscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/10/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "removegroupscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "sharedutilities.h"
14 #include "inputdata.h"
15
16 //**********************************************************************************************************************
17 vector<string> RemoveGroupsCommand::setParameters(){    
18         try {
19                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "FNGLT","fasta",false,false,true); parameters.push_back(pfasta);
20                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none","shared",false,false,true); parameters.push_back(pshared);
21         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false,true); parameters.push_back(pname);
22         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false,true); parameters.push_back(pcount);
23                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "sharedGroup", "FNGLT","group",false,false,true); parameters.push_back(pgroup);            
24         CommandParameter pdesign("design", "InputTypes", "", "", "none", "sharedGroup", "FNGLT","design",false,false); parameters.push_back(pdesign);
25                 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "FNGLT","list",false,false,true); parameters.push_back(plist);
26                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FNGLT","taxonomy",false,false,true); parameters.push_back(ptaxonomy);
27                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);
28                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
29                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
31                 
32                 vector<string> myArray;
33                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
34                 return myArray;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "RemoveGroupsCommand", "setParameters");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 string RemoveGroupsCommand::getHelpString(){    
43         try {
44                 string helpString = "";
45                 helpString += "The remove.groups command removes sequences from a specfic group or set of groups from the following file types: fasta, name, group, count, list, taxonomy, design or sharedfile.\n";
46                 helpString += "It outputs a file containing the sequences NOT in the those specified groups, or with a sharedfile eliminates the groups you selected.\n";
47                 helpString += "The remove.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared, design and groups. The group or count parameter is required, unless you have a current group or count file or are using a sharedfile.\n";
48                 helpString += "You must also provide an accnos containing the list of groups to remove or set the groups parameter to the groups you wish to remove.\n";
49                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like removed.  You can separate group names with dashes.\n";
50                 helpString += "The remove.groups command should be in the following format: remove.groups(accnos=yourAccnos, fasta=yourFasta, group=yourGroupFile).\n";
51                 helpString += "Example remove.groups(accnos=amazon.accnos, fasta=amazon.fasta, group=amazon.groups).\n";
52                 helpString += "or remove.groups(groups=pasture, fasta=amazon.fasta, amazon.groups).\n";
53                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
54                 return helpString;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "RemoveGroupsCommand", "getHelpString");
58                 exit(1);
59         }
60 }
61 //**********************************************************************************************************************
62 string RemoveGroupsCommand::getOutputPattern(string type) {
63     try {
64         string pattern = "";
65         
66         if (type == "fasta")            {   pattern = "[filename],pick,[extension]";    }
67         else if (type == "taxonomy")    {   pattern = "[filename],pick,[extension]";    }
68         else if (type == "name")        {   pattern = "[filename],pick,[extension]";    }
69         else if (type == "group")       {   pattern = "[filename],pick,[extension]";    }
70         else if (type == "count")       {   pattern = "[filename],pick,[extension]";    }
71         else if (type == "list")        {   pattern = "[filename],pick,[extension]";    }
72         else if (type == "shared")      {   pattern = "[filename],[tag],pick,[extension]";    }
73         else if (type == "design")      {   pattern = "[filename],pick,[extension]";    }
74         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
75         
76         return pattern;
77     }
78     catch(exception& e) {
79         m->errorOut(e, "RemoveGroupsCommand", "getOutputPattern");
80         exit(1);
81     }
82 }
83 //**********************************************************************************************************************
84 RemoveGroupsCommand::RemoveGroupsCommand(){     
85         try {
86                 abort = true; calledHelp = true; 
87                 setParameters();
88                 vector<string> tempOutNames;
89                 outputTypes["fasta"] = tempOutNames;
90                 outputTypes["taxonomy"] = tempOutNames;
91                 outputTypes["name"] = tempOutNames;
92                 outputTypes["group"] = tempOutNames;
93                 outputTypes["list"] = tempOutNames;
94                 outputTypes["shared"] = tempOutNames;
95         outputTypes["design"] = tempOutNames;
96         outputTypes["count"] = tempOutNames;
97         }
98         catch(exception& e) {
99                 m->errorOut(e, "RemoveGroupsCommand", "RemoveGroupsCommand");
100                 exit(1);
101         }
102 }
103 //**********************************************************************************************************************
104 RemoveGroupsCommand::RemoveGroupsCommand(string option)  {
105         try {
106                 abort = false; calledHelp = false;   
107                 
108                 //allow user to run help
109                 if(option == "help") { help(); abort = true; calledHelp = true; }
110                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
111                 
112                 else {
113                         vector<string> myArray = setParameters();
114                         
115                         OptionParser parser(option);
116                         map<string,string> parameters = parser.getParameters();
117                         
118                         ValidParameters validParameter;
119                         map<string,string>::iterator it;
120                         
121                         //check to make sure all parameters are valid for command
122                         for (it = parameters.begin(); it != parameters.end(); it++) { 
123                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
124                         }
125                         
126                         //initialize outputTypes
127                         vector<string> tempOutNames;
128                         outputTypes["fasta"] = tempOutNames;
129                         outputTypes["taxonomy"] = tempOutNames;
130                         outputTypes["name"] = tempOutNames;
131                         outputTypes["group"] = tempOutNames;
132                         outputTypes["list"] = tempOutNames;
133                         outputTypes["shared"] = tempOutNames;
134             outputTypes["design"] = tempOutNames;
135             outputTypes["count"] = tempOutNames;
136                         
137                         
138                         //if the user changes the output directory command factory will send this info to us in the output parameter 
139                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
140                         
141                         //if the user changes the input directory command factory will send this info to us in the output parameter 
142                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
143                         if (inputDir == "not found"){   inputDir = "";          }
144                         else {
145                                 string path;
146                                 it = parameters.find("fasta");
147                                 //user has given a template file
148                                 if(it != parameters.end()){ 
149                                         path = m->hasPath(it->second);
150                                         //if the user has not given a path then, add inputdir. else leave path alone.
151                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
152                                 }
153                                 
154                                 it = parameters.find("accnos");
155                                 //user has given a template file
156                                 if(it != parameters.end()){ 
157                                         path = m->hasPath(it->second);
158                                         //if the user has not given a path then, add inputdir. else leave path alone.
159                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
160                                 }
161                                 
162                                 it = parameters.find("list");
163                                 //user has given a template file
164                                 if(it != parameters.end()){ 
165                                         path = m->hasPath(it->second);
166                                         //if the user has not given a path then, add inputdir. else leave path alone.
167                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
168                                 }
169                                 
170                                 it = parameters.find("name");
171                                 //user has given a template file
172                                 if(it != parameters.end()){ 
173                                         path = m->hasPath(it->second);
174                                         //if the user has not given a path then, add inputdir. else leave path alone.
175                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
176                                 }
177                                 
178                                 it = parameters.find("group");
179                                 //user has given a template file
180                                 if(it != parameters.end()){ 
181                                         path = m->hasPath(it->second);
182                                         //if the user has not given a path then, add inputdir. else leave path alone.
183                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
184                                 }
185                                 
186                                 it = parameters.find("taxonomy");
187                                 //user has given a template file
188                                 if(it != parameters.end()){ 
189                                         path = m->hasPath(it->second);
190                                         //if the user has not given a path then, add inputdir. else leave path alone.
191                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
192                                 }
193                                 
194                                 it = parameters.find("shared");
195                                 //user has given a template file
196                                 if(it != parameters.end()){ 
197                                         path = m->hasPath(it->second);
198                                         //if the user has not given a path then, add inputdir. else leave path alone.
199                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
200                                 }
201                 
202                 it = parameters.find("design");
203                                 //user has given a template file
204                                 if(it != parameters.end()){ 
205                                         path = m->hasPath(it->second);
206                                         //if the user has not given a path then, add inputdir. else leave path alone.
207                                         if (path == "") {       parameters["design"] = inputDir + it->second;           }
208                                 }
209                 
210                 it = parameters.find("count");
211                                 //user has given a template file
212                                 if(it != parameters.end()){ 
213                                         path = m->hasPath(it->second);
214                                         //if the user has not given a path then, add inputdir. else leave path alone.
215                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
216                                 }
217                         }
218                         
219                         
220                         //check for required parameters
221                         accnosfile = validParameter.validFile(parameters, "accnos", true);
222                         if (accnosfile == "not open") { accnosfile = ""; abort = true; }
223                         else if (accnosfile == "not found") {  accnosfile = ""; }       
224                         else { m->setAccnosFile(accnosfile); }
225                         
226                         fastafile = validParameter.validFile(parameters, "fasta", true);
227                         if (fastafile == "not open") { fastafile = ""; abort = true; }
228                         else if (fastafile == "not found") {  fastafile = "";  }        
229                         else { m->setFastaFile(fastafile); }
230                         
231                         namefile = validParameter.validFile(parameters, "name", true);
232                         if (namefile == "not open") { namefile = ""; abort = true; }
233                         else if (namefile == "not found") {  namefile = "";  }  
234                         else { m->setNameFile(namefile); }
235                         
236                         groupfile = validParameter.validFile(parameters, "group", true);
237                         if (groupfile == "not open") { groupfile = "";  abort = true; }
238                         else if (groupfile == "not found") {    groupfile = "";         }
239                         else { m->setGroupFile(groupfile); }    
240                         
241                         listfile = validParameter.validFile(parameters, "list", true);
242                         if (listfile == "not open") { listfile = ""; abort = true; }
243                         else if (listfile == "not found") {  listfile = "";  }
244                         else { m->setListFile(listfile); }
245                         
246                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
247                         if (taxfile == "not open") { taxfile = ""; abort = true; }
248                         else if (taxfile == "not found") {  taxfile = "";  }
249                         else { m->setTaxonomyFile(taxfile); }
250             
251             designfile = validParameter.validFile(parameters, "design", true);
252                         if (designfile == "not open") { designfile = ""; abort = true; }
253                         else if (designfile == "not found") {  designfile = "";  }
254                         else { m->setDesignFile(designfile); }
255                         
256                         groups = validParameter.validFile(parameters, "groups", false);                 
257                         if (groups == "not found") { groups = ""; }
258                         else { 
259                                 m->splitAtDash(groups, Groups);
260                                 m->setGroups(Groups);
261                         }
262                         
263                         sharedfile = validParameter.validFile(parameters, "shared", true);
264                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }
265                         else if (sharedfile == "not found") {  sharedfile = "";  }
266                         else { m->setSharedFile(sharedfile); }
267                         
268                         
269                         countfile = validParameter.validFile(parameters, "count", true);
270             if (countfile == "not open") { countfile = ""; abort = true; }
271             else if (countfile == "not found") { countfile = "";  }     
272             else { m->setCountTableFile(countfile); }
273             
274             if ((namefile != "") && (countfile != "")) {
275                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
276             }
277             
278             if ((groupfile != "") && (countfile != "")) {
279                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
280             }
281             
282                         
283                         if ((sharedfile == "") && (groupfile == "") && (designfile == "") && (countfile == "")) { 
284                                 //is there are current file available for any of these?
285                                 if ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")) {
286                                         //give priority to group, then shared
287                                         groupfile = m->getGroupFile(); 
288                                         if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
289                                         else { 
290                                                 sharedfile = m->getSharedFile(); 
291                                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
292                                                 else { 
293                                                         countfile = m->getCountTableFile(); 
294                             if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
295                             else { 
296                                 m->mothurOut("You have no current groupfile, countfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
297                             }
298                                                 }
299                                         }
300                                 }else {
301                                         //give priority to shared, then group
302                                         sharedfile = m->getSharedFile(); 
303                                         if (sharedfile != "") {  m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
304                                         else { 
305                                                 groupfile = m->getGroupFile(); 
306                                                 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
307                                                 else { 
308                                                         designfile = m->getDesignFile(); 
309                             if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
310                             else { 
311                                 countfile = m->getCountTableFile(); 
312                                 if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
313                                 else { 
314                                     m->mothurOut("You have no current groupfile, designfile, countfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
315                                 }
316                                 
317                             }
318                                                 }
319                                         }
320                                 }
321                         }
322                         
323                         if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file containing group names or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; }
324                         
325                         if ((fastafile == "") && (namefile == "") && (countfile == "") && (groupfile == "")  && (designfile == "") && (sharedfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared, design, count or list."); m->mothurOutEndLine(); abort = true; }
326                         if (((groupfile == "") && (countfile == "")) && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")))  { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group or count file."); m->mothurOutEndLine(); abort = true; }
327             
328             if (countfile == "") {
329                 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
330                     vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
331                     parser.getNameFile(files);
332                 }
333             }
334                 }
335                 
336         }
337         catch(exception& e) {
338                 m->errorOut(e, "RemoveGroupsCommand", "RemoveGroupsCommand");
339                 exit(1);
340         }
341 }
342 //**********************************************************************************************************************
343
344 int RemoveGroupsCommand::execute(){
345         try {
346                 
347                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
348                 
349                 //get groups you want to remove
350                 if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups);  }
351                 
352                 if (groupfile != "") {
353                         groupMap = new GroupMap(groupfile);
354                         groupMap->readMap();
355                         
356                         //make sure groups are valid
357                         //takes care of user setting groupNames that are invalid or setting groups=all
358                         SharedUtil* util = new SharedUtil();
359                         vector<string> namesGroups = groupMap->getNamesOfGroups();
360                         util->setGroups(Groups, namesGroups);
361                         delete util;
362                         
363                         //fill names with names of sequences that are from the groups we want to remove 
364                         fillNames();
365                         
366                         delete groupMap;
367                 }else if (countfile != ""){
368             if ((fastafile != "") || (listfile != "") || (taxfile != "")) { 
369                 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
370             }
371             CountTable ct;
372             ct.readTable(countfile);
373             if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, aborting.\n"); return 0; }
374             
375             vector<string> gNamesOfGroups = ct.getNamesOfGroups();
376             SharedUtil util;
377             util.setGroups(Groups, gNamesOfGroups);
378             vector<string> namesOfSeqs = ct.getNamesOfSeqs();
379             sort(Groups.begin(), Groups.end());
380             
381             for (int i = 0; i < namesOfSeqs.size(); i++) {
382                 vector<string> thisSeqsGroups = ct.getGroups(namesOfSeqs[i]);
383                 if (m->isSubset(Groups, thisSeqsGroups)) { //you only have seqs from these groups so remove you
384                     names.insert(namesOfSeqs[i]);
385                 }
386             }
387         }
388
389                                 
390                 if (m->control_pressed) { return 0; }
391                 
392                 //read through the correct file and output lines you want to keep
393                 if (namefile != "")                     {               readName();             }
394                 if (fastafile != "")            {               readFasta();    }
395                 if (groupfile != "")            {               readGroup();    }
396         if (countfile != "")            {               readCount();    }
397                 if (listfile != "")                     {               readList();             }
398                 if (taxfile != "")                      {               readTax();              }
399                 if (sharedfile != "")           {               readShared();   }
400         if (designfile != "")           {               readDesign();   }
401                 
402                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
403                                 
404                 if (outputNames.size() != 0) {
405                         m->mothurOutEndLine();
406                         m->mothurOut("Output File names: "); m->mothurOutEndLine();
407                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
408                         m->mothurOutEndLine();
409                         
410                         //set fasta file as new current fastafile
411                         string current = "";
412                         itTypes = outputTypes.find("fasta");
413                         if (itTypes != outputTypes.end()) {
414                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
415                         }
416                         
417                         itTypes = outputTypes.find("name");
418                         if (itTypes != outputTypes.end()) {
419                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
420                         }
421                         
422                         itTypes = outputTypes.find("group");
423                         if (itTypes != outputTypes.end()) {
424                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
425                         }
426                         
427                         itTypes = outputTypes.find("list");
428                         if (itTypes != outputTypes.end()) {
429                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
430                         }
431                         
432                         itTypes = outputTypes.find("taxonomy");
433                         if (itTypes != outputTypes.end()) {
434                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
435                         }
436                         
437                         itTypes = outputTypes.find("shared");
438                         if (itTypes != outputTypes.end()) {
439                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
440                         }
441             
442             itTypes = outputTypes.find("design");
443                         if (itTypes != outputTypes.end()) {
444                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setDesignFile(current); }
445                         }
446             
447             itTypes = outputTypes.find("count");
448                         if (itTypes != outputTypes.end()) {
449                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
450                         }
451                 }
452                 
453                 return 0;               
454         }
455         
456         catch(exception& e) {
457                 m->errorOut(e, "RemoveGroupsCommand", "execute");
458                 exit(1);
459         }
460 }
461
462 //**********************************************************************************************************************
463 int RemoveGroupsCommand::readFasta(){
464         try {
465                 string thisOutputDir = outputDir;
466                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
467         map<string, string> variables; 
468         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
469         variables["[extension]"] = m->getExtension(fastafile);
470                 string outputFileName = getOutputFileName("fasta", variables);
471                 
472                 ofstream out;
473                 m->openOutputFile(outputFileName, out);
474                 
475                 ifstream in;
476                 m->openInputFile(fastafile, in);
477                 string name;
478                 
479                 bool wroteSomething = false;
480                 int removedCount = 0;
481                 
482                 while(!in.eof()){
483                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
484                         
485                         Sequence currSeq(in);
486                         name = currSeq.getName();
487                         
488                         if (name != "") {
489                                 //if this name is in the accnos file
490                                 if (names.count(name) == 0) {
491                                         wroteSomething = true;
492                                         currSeq.printSequence(out); 
493                                 }else { 
494                                         //if you are not in the accnos file check if you are a name that needs to be changed
495                                         map<string, string>::iterator it = uniqueToRedundant.find(name);
496                                         if (it != uniqueToRedundant.end()) {
497                                                 wroteSomething = true;
498                                                 currSeq.setName(it->second);
499                                                 currSeq.printSequence(out);
500                                         }else { removedCount++; }
501                                 }
502                         }
503                         m->gobble(in);
504                 }
505                 in.close();     
506                 out.close();
507                 
508                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine();  }
509                 outputTypes["fasta"].push_back(outputFileName);  outputNames.push_back(outputFileName);
510                 
511                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
512                 
513                 return 0;
514                 
515         }
516         catch(exception& e) {
517                 m->errorOut(e, "RemoveGroupsCommand", "readFasta");
518                 exit(1);
519         }
520 }
521 //**********************************************************************************************************************
522 int RemoveGroupsCommand::readShared(){
523         try {
524                 string thisOutputDir = outputDir;
525                 if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
526                 
527                 //get group names from sharedfile so we can set Groups to the groupNames we want to keep
528                 //that way we can take advantage of the reads in inputdata and sharedRabundVector
529                 InputData* tempInput = new InputData(sharedfile, "sharedfile");
530                 vector<SharedRAbundVector*> lookup = tempInput->getSharedRAbundVectors();
531         
532         map<string, string> variables; 
533         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile));
534         variables["[extension]"] = m->getExtension(sharedfile);
535         
536                 //save m->Groups
537                 vector<string> allGroupsNames = m->getAllGroups();
538                 vector<string> mothurOutGroups = m->getGroups();
539                 
540                 vector<string> groupsToKeep;
541                 for (int i = 0; i < allGroupsNames.size(); i++) {
542                         if (!m->inUsersGroups(allGroupsNames[i], m->getGroups())) {
543                                 groupsToKeep.push_back(allGroupsNames[i]);
544                         }
545                 }
546                 
547                 if (allGroupsNames.size() == groupsToKeep.size()) { m->mothurOut("Your file does not contain any groups you wish to remove."); m->mothurOutEndLine(); m->setGroups(mothurOutGroups); delete tempInput; for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
548                 
549                 //reset read 
550                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
551                 delete tempInput;
552                 m->setGroups(groupsToKeep);
553                 m->clearAllGroups();
554                 m->saveNextLabel = "";
555                 m->printedHeaders = false;
556                 m->currentBinLabels.clear();
557                 m->binLabelsInFile.clear();
558                 
559                 InputData input(sharedfile, "sharedfile");
560                 lookup = input.getSharedRAbundVectors();
561
562                 bool wroteSomething = false;
563                 
564                 while(lookup[0] != NULL) {
565                         
566                         variables["[tag]"] = lookup[0]->getLabel();
567             string outputFileName = getOutputFileName("shared", variables);
568                         ofstream out;
569                         m->openOutputFile(outputFileName, out);
570                         outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
571                         
572                         if (m->control_pressed) { out.close();  m->mothurRemove(outputFileName);  for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
573                         
574                         lookup[0]->printHeaders(out); 
575                         
576                         for (int i = 0; i < lookup.size(); i++) {
577                                 out << lookup[i]->getLabel() << '\t' << lookup[i]->getGroup() << '\t';
578                                 lookup[i]->print(out);
579                                 wroteSomething = true;
580                                 
581                         }                       
582                         
583                         //get next line to process
584                         //prevent memory leak
585                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
586                         lookup = input.getSharedRAbundVectors();
587                         
588                         out.close();
589                 }
590                 
591                 
592                 m->setGroups(mothurOutGroups);
593                 
594                 if (wroteSomething == false) {  m->mothurOut("Your file contains only the groups you wish to remove."); m->mothurOutEndLine();  }
595                 
596                 string groupsString = "";
597                 for (int i = 0; i < Groups.size()-1; i++) {     groupsString += Groups[i] + ", "; }
598                 groupsString += Groups[Groups.size()-1];
599                 
600                 m->mothurOut("Removed groups: " + groupsString + " from your shared file."); m->mothurOutEndLine();
601                 
602                 return 0;
603                 
604         }
605         catch(exception& e) {
606                 m->errorOut(e, "RemoveGroupsCommand", "readShared");
607                 exit(1);
608         }
609 }
610 //**********************************************************************************************************************
611 int RemoveGroupsCommand::readList(){
612         try {
613                 string thisOutputDir = outputDir;
614                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
615                 map<string, string> variables; 
616         variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
617         variables["[extension]"] = m->getExtension(listfile);
618                 string outputFileName = getOutputFileName("list", variables);
619
620                 
621                 ofstream out;
622                 m->openOutputFile(outputFileName, out);
623                 
624                 ifstream in;
625                 m->openInputFile(listfile, in);
626                 
627                 bool wroteSomething = false;
628                 int removedCount = 0;
629                 
630                 while(!in.eof()){
631                         
632                         removedCount = 0;
633                         
634                         //read in list vector
635                         ListVector list(in);
636                         
637                         //make a new list vector
638                         ListVector newList;
639                         newList.setLabel(list.getLabel());
640                         
641                         //for each bin
642                         for (int i = 0; i < list.getNumBins(); i++) {
643                                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
644                                 
645                                 //parse out names that are in accnos file
646                                 string binnames = list.get(i);
647                                 
648                                 string newNames = "";
649                                 while (binnames.find_first_of(',') != -1) { 
650                                         string name = binnames.substr(0,binnames.find_first_of(','));
651                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
652                                         
653                                         //if that name is in the .accnos file, add it
654                                         if (names.count(name) == 0) {  newNames += name + ",";  }
655                                         else {
656                                                 //if you are not in the accnos file check if you are a name that needs to be changed
657                                                 map<string, string>::iterator it = uniqueToRedundant.find(name);
658                                                 if (it != uniqueToRedundant.end()) {
659                                                         newNames += it->second + ",";
660                                                 }else { removedCount++; }
661                                         }
662                                 }
663                                 
664                                 //get last name
665                                 if (names.count(binnames) == 0) {  newNames += binnames + ",";  }
666                                 else { //if you are not in the accnos file check if you are a name that needs to be changed
667                                         map<string, string>::iterator it = uniqueToRedundant.find(binnames);
668                                         if (it != uniqueToRedundant.end()) {
669                                                 newNames += it->second + ",";
670                                         }else { removedCount++; }
671                                 }
672                                 
673                                 //if there are names in this bin add to new list
674                                 if (newNames != "") {  
675                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
676                                         newList.push_back(newNames);    
677                                 }
678                         }
679                         
680                         //print new listvector
681                         if (newList.getNumBins() != 0) {
682                                 wroteSomething = true;
683                                 newList.print(out);
684                         }
685                         
686                         m->gobble(in);
687                 }
688                 in.close();     
689                 out.close();
690                 
691                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine();  }
692                 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
693                 
694                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your list file."); m->mothurOutEndLine();
695                 
696                 return 0;
697                 
698         }
699         catch(exception& e) {
700                 m->errorOut(e, "RemoveGroupsCommand", "readList");
701                 exit(1);
702         }
703 }
704 //**********************************************************************************************************************
705 int RemoveGroupsCommand::readName(){
706         try {
707                 string thisOutputDir = outputDir;
708                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
709                 map<string, string> variables; 
710                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
711         variables["[extension]"] = m->getExtension(namefile);
712                 string outputFileName = getOutputFileName("name", variables);   
713                 ofstream out;
714                 m->openOutputFile(outputFileName, out);
715                 
716                 ifstream in;
717                 m->openInputFile(namefile, in);
718                 string name, firstCol, secondCol;
719                 
720                 bool wroteSomething = false;
721                 int removedCount = 0;
722                 
723                 while(!in.eof()){
724                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
725                         
726                         in >> firstCol;         m->gobble(in);          
727                         in >> secondCol;                        
728                         
729                         vector<string> parsedNames;
730                         m->splitAtComma(secondCol, parsedNames);
731                                                 
732                         vector<string> validSecond;  validSecond.clear();
733                         for (int i = 0; i < parsedNames.size(); i++) {
734                                 if (names.count(parsedNames[i]) == 0) {
735                                         validSecond.push_back(parsedNames[i]);
736                                 }
737                         }
738                         
739                         removedCount += parsedNames.size()-validSecond.size();
740                         
741                         //if the name in the first column is in the set then print it and any other names in second column also in set
742                         if (names.count(firstCol) == 0) {
743                                 
744                                 wroteSomething = true;
745                                 
746                                 out << firstCol << '\t';
747                                 
748                                 //you know you have at least one valid second since first column is valid
749                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
750                                 out << validSecond[validSecond.size()-1] << endl;
751                                 
752                                 //make first name in set you come to first column and then add the remaining names to second column
753                         }else {
754                                 
755                                 //you want part of this row
756                                 if (validSecond.size() != 0) {
757                                         
758                                         wroteSomething = true;
759                                         
760                                         out << validSecond[0] << '\t';
761                                         
762                                         //you know you have at least one valid second since first column is valid
763                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
764                                         out << validSecond[validSecond.size()-1] << endl;
765                                         uniqueToRedundant[firstCol] = validSecond[0];
766                                 }
767                         }
768                         
769                         m->gobble(in);
770                 }
771                 in.close();
772                 out.close();
773                 
774                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine();  }
775                 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
776                 
777                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your name file."); m->mothurOutEndLine();
778                 
779                 return 0;
780         }
781         catch(exception& e) {
782                 m->errorOut(e, "RemoveGroupsCommand", "readName");
783                 exit(1);
784         }
785 }
786
787 //**********************************************************************************************************************
788 int RemoveGroupsCommand::readGroup(){
789         try {
790                 string thisOutputDir = outputDir;
791                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
792         map<string, string> variables; 
793                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
794         variables["[extension]"] = m->getExtension(groupfile);
795                 string outputFileName = getOutputFileName("group", variables);  
796                 ofstream out;
797                 m->openOutputFile(outputFileName, out);
798                 
799                 ifstream in;
800                 m->openInputFile(groupfile, in);
801                 string name, group;
802                 
803                 bool wroteSomething = false;
804                 int removedCount = 0;
805                 
806                 while(!in.eof()){
807                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
808                         
809                         in >> name;                             //read from first column
810                         in >> group;                    //read from second column
811                         
812                         //if this name is in the accnos file
813                         if (names.count(name) == 0) {
814                                 wroteSomething = true;
815                                 out << name << '\t' << group << endl;
816                         }else {  removedCount++;  }
817                         
818                         m->gobble(in);
819                 }
820                 in.close();
821                 out.close();
822                 
823                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine();  }
824                 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
825                 
826                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your group file."); m->mothurOutEndLine();
827
828                 
829                 return 0;
830         }
831         catch(exception& e) {
832                 m->errorOut(e, "RemoveGroupsCommand", "readGroup");
833                 exit(1);
834         }
835 }
836 //**********************************************************************************************************************
837 int RemoveGroupsCommand::readCount(){
838         try {
839                 string thisOutputDir = outputDir;
840                 if (outputDir == "") {  thisOutputDir += m->hasPath(countfile);  }
841                 map<string, string> variables; 
842                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
843         variables["[extension]"] = m->getExtension(countfile);
844                 string outputFileName = getOutputFileName("count", variables);
845                 
846                 ofstream out;
847                 m->openOutputFile(outputFileName, out);
848                 
849                 ifstream in;
850                 m->openInputFile(countfile, in);
851                 
852                 bool wroteSomething = false;
853                 int removedCount = 0;
854                 
855         string headers = m->getline(in); m->gobble(in);
856         vector<string> columnHeaders = m->splitWhiteSpace(headers);
857         
858         vector<string> groups;
859         map<int, string> originalGroupIndexes;
860         map<string, int> GroupIndexes;
861         set<int> indexOfGroupsChosen;
862         for (int i = 2; i < columnHeaders.size(); i++) {  groups.push_back(columnHeaders[i]);  originalGroupIndexes[i-2] = columnHeaders[i]; }
863         //sort groups to keep consistent with how we store the groups in groupmap
864         sort(groups.begin(), groups.end());
865         for (int i = 0; i < groups.size(); i++) {  GroupIndexes[groups[i]] = i; }
866
867                 vector<string> groupsToKeep;
868                 for (int i = 0; i < groups.size(); i++) {
869                         if (!m->inUsersGroups(groups[i], Groups)) { groupsToKeep.push_back(groups[i]); }
870                 }
871         sort(groupsToKeep.begin(), groupsToKeep.end());
872         out << "Representative_Sequence\ttotal\t";
873         for (int i = 0; i < groupsToKeep.size(); i++) { out << groupsToKeep[i] << '\t'; indexOfGroupsChosen.insert(GroupIndexes[groupsToKeep[i]]); }
874         out << endl;
875         
876         string name; int oldTotal;
877         while (!in.eof()) {
878             
879             if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
880             
881             in >> name; m->gobble(in); in >> oldTotal; m->gobble(in);
882             if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(oldTotal) + "\n"); }
883             
884             if (names.count(name) == 0) {
885                 //if group info, then read it
886                 vector<int> selectedCounts; int thisTotal = 0; int temp;
887                 for (int i = 0; i < groups.size(); i++) {  
888                     int thisIndex = GroupIndexes[originalGroupIndexes[i]]; 
889                     in >> temp;  m->gobble(in);
890                     if (indexOfGroupsChosen.count(thisIndex) != 0) { //we want this group
891                         selectedCounts.push_back(temp); thisTotal += temp;
892                     }
893                 }
894                 
895                 out << name << '\t' << thisTotal << '\t';
896                 for (int i = 0; i < selectedCounts.size(); i++) {  out << selectedCounts[i] << '\t'; }
897                 out << endl;
898                 
899                 wroteSomething = true;
900                 removedCount+= (oldTotal - thisTotal);
901             }else {  m->getline(in); removedCount += oldTotal; }
902             
903             m->gobble(in);
904         }
905         in.close();
906                 out.close();
907                 
908                 if (wroteSomething == false) {  m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine();  }
909                 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
910                 
911                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine();
912         
913                 return 0;
914         }
915         catch(exception& e) {
916                 m->errorOut(e, "RemoveGroupsCommand", "readCount");
917                 exit(1);
918         }
919 }
920 //**********************************************************************************************************************
921 int RemoveGroupsCommand::readDesign(){
922         try {
923                 string thisOutputDir = outputDir;
924                 if (outputDir == "") {  thisOutputDir += m->hasPath(designfile);  }
925         map<string, string> variables; 
926                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(designfile));
927         variables["[extension]"] = m->getExtension(designfile);
928                 string outputFileName = getOutputFileName("design", variables);
929                 
930                 ofstream out;
931                 m->openOutputFile(outputFileName, out);
932                 
933                 ifstream in;
934                 m->openInputFile(designfile, in);
935                 string name, group;
936                 
937                 bool wroteSomething = false;
938                 int removedCount = 0;
939                 
940                 while(!in.eof()){
941                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
942                         
943                         in >> name;                             //read from first column
944                         in >> group;                    //read from second column
945                         
946                         //if this name is in the accnos file
947                         if (!(m->inUsersGroups(name, Groups))) {
948                                 wroteSomething = true;
949                                 out << name << '\t' << group << endl;
950                         }else {  removedCount++;  }
951                         
952                         m->gobble(in);
953                 }
954                 in.close();
955                 out.close();
956                 
957                 if (wroteSomething == false) {  m->mothurOut("Your file contains only groups from the groups you wish to remove."); m->mothurOutEndLine();  }
958                 outputTypes["design"].push_back(outputFileName); outputNames.push_back(outputFileName);
959                 
960                 m->mothurOut("Removed " + toString(removedCount) + " groups from your design file."); m->mothurOutEndLine();
961         
962                 
963                 return 0;
964         }
965         catch(exception& e) {
966                 m->errorOut(e, "RemoveGroupsCommand", "readDesign");
967                 exit(1);
968         }
969 }
970
971 //**********************************************************************************************************************
972 int RemoveGroupsCommand::readTax(){
973         try {
974                 string thisOutputDir = outputDir;
975                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
976                 map<string, string> variables; 
977                 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
978         variables["[extension]"] = m->getExtension(taxfile);
979                 string outputFileName = getOutputFileName("taxonomy", variables);
980                 ofstream out;
981                 m->openOutputFile(outputFileName, out);
982                 
983                 ifstream in;
984                 m->openInputFile(taxfile, in);
985                 string name, tax;
986                 
987                 bool wroteSomething = false;
988                 int removedCount = 0;
989                 
990                 while(!in.eof()){
991                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
992                         
993                         in >> name;                             //read from first column
994                         in >> tax;                      //read from second column
995                         
996                         //if this name is in the accnos file
997                         if (names.count(name) == 0) {
998                                 wroteSomething = true;
999                                 out << name << '\t' << tax << endl;
1000                         }else {  //if you are not in the accnos file check if you are a name that needs to be changed
1001                                 map<string, string>::iterator it = uniqueToRedundant.find(name);
1002                                 if (it != uniqueToRedundant.end()) {
1003                                         wroteSomething = true;
1004                                         out << it->second << '\t' << tax << endl;
1005                                 }else { removedCount++; }  }
1006                         
1007                         m->gobble(in);
1008                 }
1009                 in.close();
1010                 out.close();
1011                 
1012                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine();  }
1013                 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
1014                 
1015                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
1016                 
1017                 return 0;
1018         }
1019         catch(exception& e) {
1020                 m->errorOut(e, "RemoveGroupsCommand", "readTax");
1021                 exit(1);
1022         }
1023 }
1024 //**********************************************************************************************************************
1025 int RemoveGroupsCommand::fillNames(){
1026         try {
1027                 vector<string> seqs = groupMap->getNamesSeqs();
1028                 
1029                 for (int i = 0; i < seqs.size(); i++) {
1030                         
1031                         if (m->control_pressed) { return 0; }
1032                         
1033                         string group = groupMap->getGroup(seqs[i]);
1034                         
1035                         if (m->inUsersGroups(group, Groups)) {
1036                                 names.insert(seqs[i]);
1037                         }
1038                 }
1039                 
1040                 return 0;
1041         }
1042         catch(exception& e) {
1043                 m->errorOut(e, "RemoveGroupsCommand", "fillNames");
1044                 exit(1);
1045         }
1046 }
1047
1048 //**********************************************************************************************************************
1049
1050
1051