]> git.donarmstrong.com Git - mothur.git/blob - removegroupscommand.cpp
finished shhh.seqs command, fixed bug with remove.groups and get.groups that caused...
[mothur.git] / removegroupscommand.cpp
1 /*
2  *  removegroupscommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 11/10/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "removegroupscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "sharedutilities.h"
14 #include "inputdata.h"
15
16 //**********************************************************************************************************************
17 vector<string> RemoveGroupsCommand::setParameters(){    
18         try {
19                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
20                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT-sharedGroup", "none",false,false); parameters.push_back(pshared);
21                 CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
22                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT-sharedGroup", "none",false,false); parameters.push_back(pgroup);
23                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
24                 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
25                 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
26                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
27                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
29                 
30                 vector<string> myArray;
31                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
32                 return myArray;
33         }
34         catch(exception& e) {
35                 m->errorOut(e, "RemoveGroupsCommand", "setParameters");
36                 exit(1);
37         }
38 }
39 //**********************************************************************************************************************
40 string RemoveGroupsCommand::getHelpString(){    
41         try {
42                 string helpString = "";
43                 helpString += "The remove.groups command removes sequences from a specfic group or set of groups from the following file types: fasta, name, group, list, taxonomy or sharedfile.\n";
44                 helpString += "It outputs a file containing the sequences NOT in the those specified groups, or with a sharedfile eliminates the groups you selected.\n";
45                 helpString += "The remove.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared and groups. The group parameter is required, unless you have a current group file or are using a sharedfile.\n";
46                 helpString += "You must also provide an accnos containing the list of groups to remove or set the groups parameter to the groups you wish to remove.\n";
47                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like removed.  You can separate group names with dashes.\n";
48                 helpString += "The remove.groups command should be in the following format: remove.groups(accnos=yourAccnos, fasta=yourFasta, group=yourGroupFile).\n";
49                 helpString += "Example remove.groups(accnos=amazon.accnos, fasta=amazon.fasta, group=amazon.groups).\n";
50                 helpString += "or remove.groups(groups=pasture, fasta=amazon.fasta, amazon.groups).\n";
51                 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
52                 return helpString;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "RemoveGroupsCommand", "getHelpString");
56                 exit(1);
57         }
58 }
59
60 //**********************************************************************************************************************
61 RemoveGroupsCommand::RemoveGroupsCommand(){     
62         try {
63                 abort = true; calledHelp = true; 
64                 setParameters();
65                 vector<string> tempOutNames;
66                 outputTypes["fasta"] = tempOutNames;
67                 outputTypes["taxonomy"] = tempOutNames;
68                 outputTypes["name"] = tempOutNames;
69                 outputTypes["group"] = tempOutNames;
70                 outputTypes["list"] = tempOutNames;
71                 outputTypes["shared"] = tempOutNames;
72         }
73         catch(exception& e) {
74                 m->errorOut(e, "RemoveGroupsCommand", "RemoveGroupsCommand");
75                 exit(1);
76         }
77 }
78 //**********************************************************************************************************************
79 RemoveGroupsCommand::RemoveGroupsCommand(string option)  {
80         try {
81                 abort = false; calledHelp = false;   
82                 
83                 //allow user to run help
84                 if(option == "help") { help(); abort = true; calledHelp = true; }
85                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
86                 
87                 else {
88                         vector<string> myArray = setParameters();
89                         
90                         OptionParser parser(option);
91                         map<string,string> parameters = parser.getParameters();
92                         
93                         ValidParameters validParameter;
94                         map<string,string>::iterator it;
95                         
96                         //check to make sure all parameters are valid for command
97                         for (it = parameters.begin(); it != parameters.end(); it++) { 
98                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
99                         }
100                         
101                         //initialize outputTypes
102                         vector<string> tempOutNames;
103                         outputTypes["fasta"] = tempOutNames;
104                         outputTypes["taxonomy"] = tempOutNames;
105                         outputTypes["name"] = tempOutNames;
106                         outputTypes["group"] = tempOutNames;
107                         outputTypes["list"] = tempOutNames;
108                         outputTypes["shared"] = tempOutNames;
109                         
110                         
111                         //if the user changes the output directory command factory will send this info to us in the output parameter 
112                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
113                         
114                         //if the user changes the input directory command factory will send this info to us in the output parameter 
115                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
116                         if (inputDir == "not found"){   inputDir = "";          }
117                         else {
118                                 string path;
119                                 it = parameters.find("fasta");
120                                 //user has given a template file
121                                 if(it != parameters.end()){ 
122                                         path = m->hasPath(it->second);
123                                         //if the user has not given a path then, add inputdir. else leave path alone.
124                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
125                                 }
126                                 
127                                 it = parameters.find("accnos");
128                                 //user has given a template file
129                                 if(it != parameters.end()){ 
130                                         path = m->hasPath(it->second);
131                                         //if the user has not given a path then, add inputdir. else leave path alone.
132                                         if (path == "") {       parameters["accnos"] = inputDir + it->second;           }
133                                 }
134                                 
135                                 it = parameters.find("list");
136                                 //user has given a template file
137                                 if(it != parameters.end()){ 
138                                         path = m->hasPath(it->second);
139                                         //if the user has not given a path then, add inputdir. else leave path alone.
140                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
141                                 }
142                                 
143                                 it = parameters.find("name");
144                                 //user has given a template file
145                                 if(it != parameters.end()){ 
146                                         path = m->hasPath(it->second);
147                                         //if the user has not given a path then, add inputdir. else leave path alone.
148                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
149                                 }
150                                 
151                                 it = parameters.find("group");
152                                 //user has given a template file
153                                 if(it != parameters.end()){ 
154                                         path = m->hasPath(it->second);
155                                         //if the user has not given a path then, add inputdir. else leave path alone.
156                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
157                                 }
158                                 
159                                 it = parameters.find("taxonomy");
160                                 //user has given a template file
161                                 if(it != parameters.end()){ 
162                                         path = m->hasPath(it->second);
163                                         //if the user has not given a path then, add inputdir. else leave path alone.
164                                         if (path == "") {       parameters["taxonomy"] = inputDir + it->second;         }
165                                 }
166                                 
167                                 it = parameters.find("shared");
168                                 //user has given a template file
169                                 if(it != parameters.end()){ 
170                                         path = m->hasPath(it->second);
171                                         //if the user has not given a path then, add inputdir. else leave path alone.
172                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
173                                 }
174                         }
175                         
176                         
177                         //check for required parameters
178                         accnosfile = validParameter.validFile(parameters, "accnos", true);
179                         if (accnosfile == "not open") { accnosfile = ""; abort = true; }
180                         else if (accnosfile == "not found") {  accnosfile = ""; }       
181                         else { m->setAccnosFile(accnosfile); }
182                         
183                         fastafile = validParameter.validFile(parameters, "fasta", true);
184                         if (fastafile == "not open") { fastafile = ""; abort = true; }
185                         else if (fastafile == "not found") {  fastafile = "";  }        
186                         else { m->setFastaFile(fastafile); }
187                         
188                         namefile = validParameter.validFile(parameters, "name", true);
189                         if (namefile == "not open") { namefile = ""; abort = true; }
190                         else if (namefile == "not found") {  namefile = "";  }  
191                         else { m->setNameFile(namefile); }
192                         
193                         groupfile = validParameter.validFile(parameters, "group", true);
194                         if (groupfile == "not open") { abort = true; }
195                         else if (groupfile == "not found") {            
196                                 //if there is a current group file, use it
197                                 groupfile = m->getGroupFile(); 
198                                 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
199                                 else {  m->mothurOut("You have no current groupfile and the group parameter is required."); m->mothurOutEndLine(); abort = true; }
200                         }else { m->setGroupFile(groupfile); }   
201                         
202                         listfile = validParameter.validFile(parameters, "list", true);
203                         if (listfile == "not open") { listfile = ""; abort = true; }
204                         else if (listfile == "not found") {  listfile = "";  }
205                         else { m->setListFile(listfile); }
206                         
207                         taxfile = validParameter.validFile(parameters, "taxonomy", true);
208                         if (taxfile == "not open") { taxfile = ""; abort = true; }
209                         else if (taxfile == "not found") {  taxfile = "";  }
210                         else { m->setTaxonomyFile(taxfile); }
211                         
212                         groups = validParameter.validFile(parameters, "groups", false);                 
213                         if (groups == "not found") { groups = ""; }
214                         else { 
215                                 m->splitAtDash(groups, Groups);
216                                 m->setGroups(Groups);
217                         }
218                         
219                         sharedfile = validParameter.validFile(parameters, "shared", true);
220                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }
221                         else if (sharedfile == "not found") {  sharedfile = "";  }
222                         else { m->setSharedFile(sharedfile); }
223                         
224                         groupfile = validParameter.validFile(parameters, "group", true);
225                         if (groupfile == "not open") { groupfile = ""; abort = true; }
226                         else if (groupfile == "not found") {    groupfile = ""; }
227                         else { m->setGroupFile(groupfile); }    
228                         
229                         if ((sharedfile == "") && (groupfile == "")) { 
230                                 //is there are current file available for any of these?
231                                 if ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")) {
232                                         //give priority to group, then shared
233                                         groupfile = m->getGroupFile(); 
234                                         if (groupfile != "") {  m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
235                                         else { 
236                                                 sharedfile = m->getSharedFile(); 
237                                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
238                                                 else { 
239                                                         m->mothurOut("You have no current groupfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
240                                                 }
241                                         }
242                                 }else {
243                                         //give priority to shared, then group
244                                         sharedfile = m->getSharedFile(); 
245                                         if (sharedfile != "") {  m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
246                                         else { 
247                                                 groupfile = m->getGroupFile(); 
248                                                 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
249                                                 else { 
250                                                         m->mothurOut("You have no current groupfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
251                                                 }
252                                         }
253                                 }
254                         }
255                         
256                         if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file containing group names or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; }
257                         
258                         if ((fastafile == "") && (namefile == "") && (groupfile == "")  && (sharedfile == "") && (listfile == "") && (taxfile == ""))  { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared or list."); m->mothurOutEndLine(); abort = true; }
259                         if ((groupfile == "") && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")))  { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group file."); m->mothurOutEndLine(); abort = true; }
260                 }
261                 
262         }
263         catch(exception& e) {
264                 m->errorOut(e, "RemoveGroupsCommand", "RemoveGroupsCommand");
265                 exit(1);
266         }
267 }
268 //**********************************************************************************************************************
269
270 int RemoveGroupsCommand::execute(){
271         try {
272                 
273                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
274                 
275                 //get groups you want to remove
276                 if (accnosfile != "") { readAccnos(); }
277                 
278                 if (groupfile != "") {
279                         groupMap = new GroupMap(groupfile);
280                         groupMap->readMap();
281                         
282                         //make sure groups are valid
283                         //takes care of user setting groupNames that are invalid or setting groups=all
284                         SharedUtil* util = new SharedUtil();
285                         vector<string> namesGroups = groupMap->getNamesOfGroups();
286                         util->setGroups(Groups, namesGroups);
287                         delete util;
288                         
289                         //fill names with names of sequences that are from the groups we want to remove 
290                         fillNames();
291                         
292                         delete groupMap;
293                 }
294                                 
295                 if (m->control_pressed) { return 0; }
296                 
297                 //read through the correct file and output lines you want to keep
298                 if (namefile != "")                     {               readName();             }
299                 if (fastafile != "")            {               readFasta();    }
300                 if (groupfile != "")            {               readGroup();    }
301                 if (listfile != "")                     {               readList();             }
302                 if (taxfile != "")                      {               readTax();              }
303                 if (sharedfile != "")           {               readShared();   }
304                 
305                 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } return 0; }
306                                 
307                 if (outputNames.size() != 0) {
308                         m->mothurOutEndLine();
309                         m->mothurOut("Output File names: "); m->mothurOutEndLine();
310                         for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
311                         m->mothurOutEndLine();
312                         
313                         //set fasta file as new current fastafile
314                         string current = "";
315                         itTypes = outputTypes.find("fasta");
316                         if (itTypes != outputTypes.end()) {
317                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
318                         }
319                         
320                         itTypes = outputTypes.find("name");
321                         if (itTypes != outputTypes.end()) {
322                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
323                         }
324                         
325                         itTypes = outputTypes.find("group");
326                         if (itTypes != outputTypes.end()) {
327                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
328                         }
329                         
330                         itTypes = outputTypes.find("list");
331                         if (itTypes != outputTypes.end()) {
332                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
333                         }
334                         
335                         itTypes = outputTypes.find("taxonomy");
336                         if (itTypes != outputTypes.end()) {
337                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
338                         }
339                         
340                         itTypes = outputTypes.find("shared");
341                         if (itTypes != outputTypes.end()) {
342                                 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
343                         }
344                 }
345                 
346                 return 0;               
347         }
348         
349         catch(exception& e) {
350                 m->errorOut(e, "RemoveGroupsCommand", "execute");
351                 exit(1);
352         }
353 }
354
355 //**********************************************************************************************************************
356 int RemoveGroupsCommand::readFasta(){
357         try {
358                 string thisOutputDir = outputDir;
359                 if (outputDir == "") {  thisOutputDir += m->hasPath(fastafile);  }
360                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pick" + m->getExtension(fastafile);
361                 
362                 ofstream out;
363                 m->openOutputFile(outputFileName, out);
364                 
365                 ifstream in;
366                 m->openInputFile(fastafile, in);
367                 string name;
368                 
369                 bool wroteSomething = false;
370                 int removedCount = 0;
371                 
372                 while(!in.eof()){
373                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
374                         
375                         Sequence currSeq(in);
376                         name = currSeq.getName();
377                         
378                         if (name != "") {
379                                 //if this name is in the accnos file
380                                 if (names.count(name) == 0) {
381                                         wroteSomething = true;
382                                         currSeq.printSequence(out); 
383                                 }else { 
384                                         //if you are not in the accnos file check if you are a name that needs to be changed
385                                         map<string, string>::iterator it = uniqueToRedundant.find(name);
386                                         if (it != uniqueToRedundant.end()) {
387                                                 wroteSomething = true;
388                                                 currSeq.setName(it->second);
389                                                 currSeq.printSequence(out);
390                                         }else { removedCount++; }
391                                 }
392                         }
393                         m->gobble(in);
394                 }
395                 in.close();     
396                 out.close();
397                 
398                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine();  }
399                 outputTypes["fasta"].push_back(outputFileName);  outputNames.push_back(outputFileName);
400                 
401                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
402                 
403                 return 0;
404                 
405         }
406         catch(exception& e) {
407                 m->errorOut(e, "RemoveGroupsCommand", "readFasta");
408                 exit(1);
409         }
410 }
411 //**********************************************************************************************************************
412 int RemoveGroupsCommand::readShared(){
413         try {
414                 string thisOutputDir = outputDir;
415                 if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
416                 
417                 //get group names from sharedfile so we can set Groups to the groupNames we want to keep
418                 //that way we can take advantage of the reads in inputdata and sharedRabundVector
419                 InputData* tempInput = new InputData(sharedfile, "sharedfile");
420                 vector<SharedRAbundVector*> lookup = tempInput->getSharedRAbundVectors();
421         
422                 //save m->Groups
423                 vector<string> allGroupsNames = m->getAllGroups();
424                 vector<string> mothurOutGroups = m->getGroups();
425                 
426                 vector<string> groupsToKeep;
427                 for (int i = 0; i < allGroupsNames.size(); i++) {
428                         if (!m->inUsersGroups(allGroupsNames[i], m->getGroups())) {
429                                 groupsToKeep.push_back(allGroupsNames[i]);
430                         }
431                 }
432                 
433                 if (allGroupsNames.size() == groupsToKeep.size()) { m->mothurOut("Your file does not contain any groups you wish to remove."); m->mothurOutEndLine(); m->setGroups(mothurOutGroups); delete tempInput; for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
434                 
435                 //reset read 
436                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
437                 delete tempInput;
438                 m->setGroups(groupsToKeep);
439                 m->clearAllGroups();
440                 m->names.clear();
441                 m->saveNextLabel = "";
442                 m->printedHeaders = false;
443                 m->currentBinLabels.clear();
444                 m->binLabelsInFile.clear();
445                 
446                 InputData input(sharedfile, "sharedfile");
447                 lookup = input.getSharedRAbundVectors();
448
449                 bool wroteSomething = false;
450                 
451                 while(lookup[0] != NULL) {
452                         
453                         string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + lookup[0]->getLabel() + ".pick" + m->getExtension(sharedfile);
454                         ofstream out;
455                         m->openOutputFile(outputFileName, out);
456                         outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
457                         
458                         if (m->control_pressed) { out.close();  m->mothurRemove(outputFileName);  for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
459                         
460                         lookup[0]->printHeaders(out); 
461                         
462                         for (int i = 0; i < lookup.size(); i++) {
463                                 out << lookup[i]->getLabel() << '\t' << lookup[i]->getGroup() << '\t';
464                                 lookup[i]->print(out);
465                                 wroteSomething = true;
466                                 
467                         }                       
468                         
469                         //get next line to process
470                         //prevent memory leak
471                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } 
472                         lookup = input.getSharedRAbundVectors();
473                         
474                         out.close();
475                 }
476                 
477                 
478                 m->setGroups(mothurOutGroups);
479                 
480                 if (wroteSomething == false) {  m->mothurOut("Your file contains only the groups you wish to remove."); m->mothurOutEndLine();  }
481                 
482                 string groupsString = "";
483                 for (int i = 0; i < Groups.size()-1; i++) {     groupsString += Groups[i] + ", "; }
484                 groupsString += Groups[Groups.size()-1];
485                 
486                 m->mothurOut("Removed groups: " + groupsString + " from your shared file."); m->mothurOutEndLine();
487                 
488                 return 0;
489                 
490         }
491         catch(exception& e) {
492                 m->errorOut(e, "RemoveGroupsCommand", "readShared");
493                 exit(1);
494         }
495 }
496 //**********************************************************************************************************************
497 int RemoveGroupsCommand::readList(){
498         try {
499                 string thisOutputDir = outputDir;
500                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
501                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" +  m->getExtension(listfile);
502                 
503                 ofstream out;
504                 m->openOutputFile(outputFileName, out);
505                 
506                 ifstream in;
507                 m->openInputFile(listfile, in);
508                 
509                 bool wroteSomething = false;
510                 int removedCount = 0;
511                 
512                 while(!in.eof()){
513                         
514                         removedCount = 0;
515                         
516                         //read in list vector
517                         ListVector list(in);
518                         
519                         //make a new list vector
520                         ListVector newList;
521                         newList.setLabel(list.getLabel());
522                         
523                         //for each bin
524                         for (int i = 0; i < list.getNumBins(); i++) {
525                                 if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
526                                 
527                                 //parse out names that are in accnos file
528                                 string binnames = list.get(i);
529                                 
530                                 string newNames = "";
531                                 while (binnames.find_first_of(',') != -1) { 
532                                         string name = binnames.substr(0,binnames.find_first_of(','));
533                                         binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
534                                         
535                                         //if that name is in the .accnos file, add it
536                                         if (names.count(name) == 0) {  newNames += name + ",";  }
537                                         else {
538                                                 //if you are not in the accnos file check if you are a name that needs to be changed
539                                                 map<string, string>::iterator it = uniqueToRedundant.find(name);
540                                                 if (it != uniqueToRedundant.end()) {
541                                                         newNames += it->second + ",";
542                                                 }else { removedCount++; }
543                                         }
544                                 }
545                                 
546                                 //get last name
547                                 if (names.count(binnames) == 0) {  newNames += binnames + ",";  }
548                                 else { //if you are not in the accnos file check if you are a name that needs to be changed
549                                         map<string, string>::iterator it = uniqueToRedundant.find(binnames);
550                                         if (it != uniqueToRedundant.end()) {
551                                                 newNames += it->second + ",";
552                                         }else { removedCount++; }
553                                 }
554                                 
555                                 //if there are names in this bin add to new list
556                                 if (newNames != "") {  
557                                         newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
558                                         newList.push_back(newNames);    
559                                 }
560                         }
561                         
562                         //print new listvector
563                         if (newList.getNumBins() != 0) {
564                                 wroteSomething = true;
565                                 newList.print(out);
566                         }
567                         
568                         m->gobble(in);
569                 }
570                 in.close();     
571                 out.close();
572                 
573                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine();  }
574                 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
575                 
576                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your list file."); m->mothurOutEndLine();
577                 
578                 return 0;
579                 
580         }
581         catch(exception& e) {
582                 m->errorOut(e, "RemoveGroupsCommand", "readList");
583                 exit(1);
584         }
585 }
586 //**********************************************************************************************************************
587 int RemoveGroupsCommand::readName(){
588         try {
589                 string thisOutputDir = outputDir;
590                 if (outputDir == "") {  thisOutputDir += m->hasPath(namefile);  }
591                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pick" + m->getExtension(namefile);
592                 
593                 ofstream out;
594                 m->openOutputFile(outputFileName, out);
595                 
596                 ifstream in;
597                 m->openInputFile(namefile, in);
598                 string name, firstCol, secondCol;
599                 
600                 bool wroteSomething = false;
601                 int removedCount = 0;
602                 
603                 while(!in.eof()){
604                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
605                         
606                         in >> firstCol;         m->gobble(in);          
607                         in >> secondCol;                        
608                         
609                         vector<string> parsedNames;
610                         m->splitAtComma(secondCol, parsedNames);
611                                                 
612                         vector<string> validSecond;  validSecond.clear();
613                         for (int i = 0; i < parsedNames.size(); i++) {
614                                 if (names.count(parsedNames[i]) == 0) {
615                                         validSecond.push_back(parsedNames[i]);
616                                 }
617                         }
618                         
619                         removedCount += parsedNames.size()-validSecond.size();
620                         
621                         //if the name in the first column is in the set then print it and any other names in second column also in set
622                         if (names.count(firstCol) == 0) {
623                                 
624                                 wroteSomething = true;
625                                 
626                                 out << firstCol << '\t';
627                                 
628                                 //you know you have at least one valid second since first column is valid
629                                 for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
630                                 out << validSecond[validSecond.size()-1] << endl;
631                                 
632                                 //make first name in set you come to first column and then add the remaining names to second column
633                         }else {
634                                 
635                                 //you want part of this row
636                                 if (validSecond.size() != 0) {
637                                         
638                                         wroteSomething = true;
639                                         
640                                         out << validSecond[0] << '\t';
641                                         
642                                         //you know you have at least one valid second since first column is valid
643                                         for (int i = 0; i < validSecond.size()-1; i++) {  out << validSecond[i] << ',';  }
644                                         out << validSecond[validSecond.size()-1] << endl;
645                                         uniqueToRedundant[firstCol] = validSecond[0];
646                                 }
647                         }
648                         
649                         m->gobble(in);
650                 }
651                 in.close();
652                 out.close();
653                 
654                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine();  }
655                 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
656                 
657                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your name file."); m->mothurOutEndLine();
658                 
659                 return 0;
660         }
661         catch(exception& e) {
662                 m->errorOut(e, "RemoveGroupsCommand", "readName");
663                 exit(1);
664         }
665 }
666
667 //**********************************************************************************************************************
668 int RemoveGroupsCommand::readGroup(){
669         try {
670                 string thisOutputDir = outputDir;
671                 if (outputDir == "") {  thisOutputDir += m->hasPath(groupfile);  }
672                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
673                 
674                 ofstream out;
675                 m->openOutputFile(outputFileName, out);
676                 
677                 ifstream in;
678                 m->openInputFile(groupfile, in);
679                 string name, group;
680                 
681                 bool wroteSomething = false;
682                 int removedCount = 0;
683                 
684                 while(!in.eof()){
685                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
686                         
687                         in >> name;                             //read from first column
688                         in >> group;                    //read from second column
689                         
690                         //if this name is in the accnos file
691                         if (names.count(name) == 0) {
692                                 wroteSomething = true;
693                                 out << name << '\t' << group << endl;
694                         }else {  removedCount++;  }
695                         
696                         m->gobble(in);
697                 }
698                 in.close();
699                 out.close();
700                 
701                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine();  }
702                 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
703                 
704                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your group file."); m->mothurOutEndLine();
705
706                 
707                 return 0;
708         }
709         catch(exception& e) {
710                 m->errorOut(e, "RemoveGroupsCommand", "readGroup");
711                 exit(1);
712         }
713 }
714 //**********************************************************************************************************************
715 int RemoveGroupsCommand::readTax(){
716         try {
717                 string thisOutputDir = outputDir;
718                 if (outputDir == "") {  thisOutputDir += m->hasPath(taxfile);  }
719                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
720                 ofstream out;
721                 m->openOutputFile(outputFileName, out);
722                 
723                 ifstream in;
724                 m->openInputFile(taxfile, in);
725                 string name, tax;
726                 
727                 bool wroteSomething = false;
728                 int removedCount = 0;
729                 
730                 while(!in.eof()){
731                         if (m->control_pressed) { in.close();  out.close();  m->mothurRemove(outputFileName);  return 0; }
732                         
733                         in >> name;                             //read from first column
734                         in >> tax;                      //read from second column
735                         
736                         //if this name is in the accnos file
737                         if (names.count(name) == 0) {
738                                 wroteSomething = true;
739                                 out << name << '\t' << tax << endl;
740                         }else {  //if you are not in the accnos file check if you are a name that needs to be changed
741                                 map<string, string>::iterator it = uniqueToRedundant.find(name);
742                                 if (it != uniqueToRedundant.end()) {
743                                         wroteSomething = true;
744                                         out << it->second << '\t' << tax << endl;
745                                 }else { removedCount++; }  }
746                         
747                         m->gobble(in);
748                 }
749                 in.close();
750                 out.close();
751                 
752                 if (wroteSomething == false) {  m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine();  }
753                 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
754                 
755                 m->mothurOut("Removed " + toString(removedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
756                 
757                 return 0;
758         }
759         catch(exception& e) {
760                 m->errorOut(e, "RemoveGroupsCommand", "readTax");
761                 exit(1);
762         }
763 }
764 //**********************************************************************************************************************
765 void RemoveGroupsCommand::readAccnos(){
766         try {
767                 Groups.clear();
768                 
769                 ifstream in;
770                 m->openInputFile(accnosfile, in);
771                 string name;
772                 
773                 while(!in.eof()){
774                         in >> name;
775                         
776                         Groups.push_back(name);
777                         
778                         m->gobble(in);
779                 }
780                 in.close();     
781                 
782                 m->setGroups(Groups);
783                 
784         }
785         catch(exception& e) {
786                 m->errorOut(e, "RemoveGroupsCommand", "readAccnos");
787                 exit(1);
788         }
789 }
790 //**********************************************************************************************************************
791 int RemoveGroupsCommand::fillNames(){
792         try {
793                 vector<string> seqs = groupMap->getNamesSeqs();
794                 
795                 for (int i = 0; i < seqs.size(); i++) {
796                         
797                         if (m->control_pressed) { return 0; }
798                         
799                         string group = groupMap->getGroup(seqs[i]);
800                         
801                         if (m->inUsersGroups(group, Groups)) {
802                                 names.insert(seqs[i]);
803                         }
804                 }
805                 
806                 return 0;
807         }
808         catch(exception& e) {
809                 m->errorOut(e, "RemoveGroupsCommand", "fillNames");
810                 exit(1);
811         }
812 }
813
814 //**********************************************************************************************************************
815
816
817