2 * removegroupscommand.cpp
5 * Created by westcott on 11/10/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "removegroupscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "sharedutilities.h"
14 #include "inputdata.h"
16 //**********************************************************************************************************************
17 vector<string> RemoveGroupsCommand::setParameters(){
19 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(pfasta);
20 CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none",false,false); parameters.push_back(pshared);
21 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none",false,false); parameters.push_back(pname);
22 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
23 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "sharedGroup", "FNGLT",false,false); parameters.push_back(pgroup); CommandParameter pdesign("design", "InputTypes", "", "", "none", "sharedGroup", "FNGLT",false,false); parameters.push_back(pdesign);
24 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(plist);
25 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FNGLT",false,false); parameters.push_back(ptaxonomy);
26 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
27 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
28 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
29 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
31 vector<string> myArray;
32 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
36 m->errorOut(e, "RemoveGroupsCommand", "setParameters");
40 //**********************************************************************************************************************
41 string RemoveGroupsCommand::getHelpString(){
43 string helpString = "";
44 helpString += "The remove.groups command removes sequences from a specfic group or set of groups from the following file types: fasta, name, group, count, list, taxonomy, design or sharedfile.\n";
45 helpString += "It outputs a file containing the sequences NOT in the those specified groups, or with a sharedfile eliminates the groups you selected.\n";
46 helpString += "The remove.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared, design and groups. The group or count parameter is required, unless you have a current group or count file or are using a sharedfile.\n";
47 helpString += "You must also provide an accnos containing the list of groups to remove or set the groups parameter to the groups you wish to remove.\n";
48 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like removed. You can separate group names with dashes.\n";
49 helpString += "The remove.groups command should be in the following format: remove.groups(accnos=yourAccnos, fasta=yourFasta, group=yourGroupFile).\n";
50 helpString += "Example remove.groups(accnos=amazon.accnos, fasta=amazon.fasta, group=amazon.groups).\n";
51 helpString += "or remove.groups(groups=pasture, fasta=amazon.fasta, amazon.groups).\n";
52 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
56 m->errorOut(e, "RemoveGroupsCommand", "getHelpString");
60 //**********************************************************************************************************************
61 string RemoveGroupsCommand::getOutputFileNameTag(string type, string inputName=""){
63 string outputFileName = "";
64 map<string, vector<string> >::iterator it;
66 //is this a type this command creates
67 it = outputTypes.find(type);
68 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
70 if (type == "fasta") { outputFileName = "pick" + m->getExtension(inputName); }
71 else if (type == "taxonomy") { outputFileName = "pick" + m->getExtension(inputName); }
72 else if (type == "name") { outputFileName = "pick" + m->getExtension(inputName); }
73 else if (type == "group") { outputFileName = "pick" + m->getExtension(inputName); }
74 else if (type == "count") { outputFileName = "pick.count_table"; }
75 else if (type == "list") { outputFileName = "pick" + m->getExtension(inputName); }
76 else if (type == "shared") { outputFileName = "pick" + m->getExtension(inputName); }
77 else if (type == "design") { outputFileName = "pick" + m->getExtension(inputName); }
78 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
80 return outputFileName;
83 m->errorOut(e, "RemoveGroupsCommand", "getOutputFileNameTag");
87 //**********************************************************************************************************************
88 RemoveGroupsCommand::RemoveGroupsCommand(){
90 abort = true; calledHelp = true;
92 vector<string> tempOutNames;
93 outputTypes["fasta"] = tempOutNames;
94 outputTypes["taxonomy"] = tempOutNames;
95 outputTypes["name"] = tempOutNames;
96 outputTypes["group"] = tempOutNames;
97 outputTypes["list"] = tempOutNames;
98 outputTypes["shared"] = tempOutNames;
99 outputTypes["design"] = tempOutNames;
100 outputTypes["count"] = tempOutNames;
102 catch(exception& e) {
103 m->errorOut(e, "RemoveGroupsCommand", "RemoveGroupsCommand");
107 //**********************************************************************************************************************
108 RemoveGroupsCommand::RemoveGroupsCommand(string option) {
110 abort = false; calledHelp = false;
112 //allow user to run help
113 if(option == "help") { help(); abort = true; calledHelp = true; }
114 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
117 vector<string> myArray = setParameters();
119 OptionParser parser(option);
120 map<string,string> parameters = parser.getParameters();
122 ValidParameters validParameter;
123 map<string,string>::iterator it;
125 //check to make sure all parameters are valid for command
126 for (it = parameters.begin(); it != parameters.end(); it++) {
127 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
130 //initialize outputTypes
131 vector<string> tempOutNames;
132 outputTypes["fasta"] = tempOutNames;
133 outputTypes["taxonomy"] = tempOutNames;
134 outputTypes["name"] = tempOutNames;
135 outputTypes["group"] = tempOutNames;
136 outputTypes["list"] = tempOutNames;
137 outputTypes["shared"] = tempOutNames;
138 outputTypes["design"] = tempOutNames;
139 outputTypes["count"] = tempOutNames;
142 //if the user changes the output directory command factory will send this info to us in the output parameter
143 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
145 //if the user changes the input directory command factory will send this info to us in the output parameter
146 string inputDir = validParameter.validFile(parameters, "inputdir", false);
147 if (inputDir == "not found"){ inputDir = ""; }
150 it = parameters.find("fasta");
151 //user has given a template file
152 if(it != parameters.end()){
153 path = m->hasPath(it->second);
154 //if the user has not given a path then, add inputdir. else leave path alone.
155 if (path == "") { parameters["fasta"] = inputDir + it->second; }
158 it = parameters.find("accnos");
159 //user has given a template file
160 if(it != parameters.end()){
161 path = m->hasPath(it->second);
162 //if the user has not given a path then, add inputdir. else leave path alone.
163 if (path == "") { parameters["accnos"] = inputDir + it->second; }
166 it = parameters.find("list");
167 //user has given a template file
168 if(it != parameters.end()){
169 path = m->hasPath(it->second);
170 //if the user has not given a path then, add inputdir. else leave path alone.
171 if (path == "") { parameters["list"] = inputDir + it->second; }
174 it = parameters.find("name");
175 //user has given a template file
176 if(it != parameters.end()){
177 path = m->hasPath(it->second);
178 //if the user has not given a path then, add inputdir. else leave path alone.
179 if (path == "") { parameters["name"] = inputDir + it->second; }
182 it = parameters.find("group");
183 //user has given a template file
184 if(it != parameters.end()){
185 path = m->hasPath(it->second);
186 //if the user has not given a path then, add inputdir. else leave path alone.
187 if (path == "") { parameters["group"] = inputDir + it->second; }
190 it = parameters.find("taxonomy");
191 //user has given a template file
192 if(it != parameters.end()){
193 path = m->hasPath(it->second);
194 //if the user has not given a path then, add inputdir. else leave path alone.
195 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
198 it = parameters.find("shared");
199 //user has given a template file
200 if(it != parameters.end()){
201 path = m->hasPath(it->second);
202 //if the user has not given a path then, add inputdir. else leave path alone.
203 if (path == "") { parameters["shared"] = inputDir + it->second; }
206 it = parameters.find("design");
207 //user has given a template file
208 if(it != parameters.end()){
209 path = m->hasPath(it->second);
210 //if the user has not given a path then, add inputdir. else leave path alone.
211 if (path == "") { parameters["design"] = inputDir + it->second; }
214 it = parameters.find("count");
215 //user has given a template file
216 if(it != parameters.end()){
217 path = m->hasPath(it->second);
218 //if the user has not given a path then, add inputdir. else leave path alone.
219 if (path == "") { parameters["count"] = inputDir + it->second; }
224 //check for required parameters
225 accnosfile = validParameter.validFile(parameters, "accnos", true);
226 if (accnosfile == "not open") { accnosfile = ""; abort = true; }
227 else if (accnosfile == "not found") { accnosfile = ""; }
228 else { m->setAccnosFile(accnosfile); }
230 fastafile = validParameter.validFile(parameters, "fasta", true);
231 if (fastafile == "not open") { fastafile = ""; abort = true; }
232 else if (fastafile == "not found") { fastafile = ""; }
233 else { m->setFastaFile(fastafile); }
235 namefile = validParameter.validFile(parameters, "name", true);
236 if (namefile == "not open") { namefile = ""; abort = true; }
237 else if (namefile == "not found") { namefile = ""; }
238 else { m->setNameFile(namefile); }
240 groupfile = validParameter.validFile(parameters, "group", true);
241 if (groupfile == "not open") { groupfile = ""; abort = true; }
242 else if (groupfile == "not found") { groupfile = ""; }
243 else { m->setGroupFile(groupfile); }
245 listfile = validParameter.validFile(parameters, "list", true);
246 if (listfile == "not open") { listfile = ""; abort = true; }
247 else if (listfile == "not found") { listfile = ""; }
248 else { m->setListFile(listfile); }
250 taxfile = validParameter.validFile(parameters, "taxonomy", true);
251 if (taxfile == "not open") { taxfile = ""; abort = true; }
252 else if (taxfile == "not found") { taxfile = ""; }
253 else { m->setTaxonomyFile(taxfile); }
255 designfile = validParameter.validFile(parameters, "design", true);
256 if (designfile == "not open") { designfile = ""; abort = true; }
257 else if (designfile == "not found") { designfile = ""; }
258 else { m->setDesignFile(designfile); }
260 groups = validParameter.validFile(parameters, "groups", false);
261 if (groups == "not found") { groups = ""; }
263 m->splitAtDash(groups, Groups);
264 m->setGroups(Groups);
267 sharedfile = validParameter.validFile(parameters, "shared", true);
268 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
269 else if (sharedfile == "not found") { sharedfile = ""; }
270 else { m->setSharedFile(sharedfile); }
273 countfile = validParameter.validFile(parameters, "count", true);
274 if (countfile == "not open") { countfile = ""; abort = true; }
275 else if (countfile == "not found") { countfile = ""; }
276 else { m->setCountTableFile(countfile); }
278 if ((namefile != "") && (countfile != "")) {
279 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
282 if ((groupfile != "") && (countfile != "")) {
283 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
287 if ((sharedfile == "") && (groupfile == "") && (designfile == "") && (countfile == "")) {
288 //is there are current file available for any of these?
289 if ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")) {
290 //give priority to group, then shared
291 groupfile = m->getGroupFile();
292 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
294 sharedfile = m->getSharedFile();
295 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
297 countfile = m->getCountTableFile();
298 if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
300 m->mothurOut("You have no current groupfile, countfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
305 //give priority to shared, then group
306 sharedfile = m->getSharedFile();
307 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
309 groupfile = m->getGroupFile();
310 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
312 designfile = m->getDesignFile();
313 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
315 countfile = m->getCountTableFile();
316 if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
318 m->mothurOut("You have no current groupfile, designfile, countfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
327 if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file containing group names or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; }
329 if ((fastafile == "") && (namefile == "") && (countfile == "") && (groupfile == "") && (designfile == "") && (sharedfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared, design, count or list."); m->mothurOutEndLine(); abort = true; }
330 if (((groupfile == "") && (countfile == "")) && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != ""))) { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group or count file."); m->mothurOutEndLine(); abort = true; }
332 if (countfile == "") {
333 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
334 vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
335 parser.getNameFile(files);
341 catch(exception& e) {
342 m->errorOut(e, "RemoveGroupsCommand", "RemoveGroupsCommand");
346 //**********************************************************************************************************************
348 int RemoveGroupsCommand::execute(){
351 if (abort == true) { if (calledHelp) { return 0; } return 2; }
353 //get groups you want to remove
354 if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups); }
356 if (groupfile != "") {
357 groupMap = new GroupMap(groupfile);
360 //make sure groups are valid
361 //takes care of user setting groupNames that are invalid or setting groups=all
362 SharedUtil* util = new SharedUtil();
363 vector<string> namesGroups = groupMap->getNamesOfGroups();
364 util->setGroups(Groups, namesGroups);
367 //fill names with names of sequences that are from the groups we want to remove
371 }else if (countfile != ""){
372 if ((fastafile != "") || (listfile != "") || (taxfile != "")) {
373 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
376 ct.readTable(countfile);
377 if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, aborting.\n"); return 0; }
379 vector<string> gNamesOfGroups = ct.getNamesOfGroups();
381 util.setGroups(Groups, gNamesOfGroups);
382 vector<string> namesOfSeqs = ct.getNamesOfSeqs();
383 sort(Groups.begin(), Groups.end());
385 for (int i = 0; i < namesOfSeqs.size(); i++) {
386 vector<string> thisSeqsGroups = ct.getGroups(namesOfSeqs[i]);
387 if (m->isSubset(Groups, thisSeqsGroups)) { //you only have seqs from these groups so remove you
388 names.insert(namesOfSeqs[i]);
394 if (m->control_pressed) { return 0; }
396 //read through the correct file and output lines you want to keep
397 if (namefile != "") { readName(); }
398 if (fastafile != "") { readFasta(); }
399 if (groupfile != "") { readGroup(); }
400 if (countfile != "") { readCount(); }
401 if (listfile != "") { readList(); }
402 if (taxfile != "") { readTax(); }
403 if (sharedfile != "") { readShared(); }
404 if (designfile != "") { readDesign(); }
406 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
408 if (outputNames.size() != 0) {
409 m->mothurOutEndLine();
410 m->mothurOut("Output File names: "); m->mothurOutEndLine();
411 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
412 m->mothurOutEndLine();
414 //set fasta file as new current fastafile
416 itTypes = outputTypes.find("fasta");
417 if (itTypes != outputTypes.end()) {
418 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
421 itTypes = outputTypes.find("name");
422 if (itTypes != outputTypes.end()) {
423 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
426 itTypes = outputTypes.find("group");
427 if (itTypes != outputTypes.end()) {
428 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
431 itTypes = outputTypes.find("list");
432 if (itTypes != outputTypes.end()) {
433 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
436 itTypes = outputTypes.find("taxonomy");
437 if (itTypes != outputTypes.end()) {
438 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
441 itTypes = outputTypes.find("shared");
442 if (itTypes != outputTypes.end()) {
443 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
446 itTypes = outputTypes.find("design");
447 if (itTypes != outputTypes.end()) {
448 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setDesignFile(current); }
451 itTypes = outputTypes.find("count");
452 if (itTypes != outputTypes.end()) {
453 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
460 catch(exception& e) {
461 m->errorOut(e, "RemoveGroupsCommand", "execute");
466 //**********************************************************************************************************************
467 int RemoveGroupsCommand::readFasta(){
469 string thisOutputDir = outputDir;
470 if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
471 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + getOutputFileNameTag("fasta", fastafile);
474 m->openOutputFile(outputFileName, out);
477 m->openInputFile(fastafile, in);
480 bool wroteSomething = false;
481 int removedCount = 0;
484 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
486 Sequence currSeq(in);
487 name = currSeq.getName();
490 //if this name is in the accnos file
491 if (names.count(name) == 0) {
492 wroteSomething = true;
493 currSeq.printSequence(out);
495 //if you are not in the accnos file check if you are a name that needs to be changed
496 map<string, string>::iterator it = uniqueToRedundant.find(name);
497 if (it != uniqueToRedundant.end()) {
498 wroteSomething = true;
499 currSeq.setName(it->second);
500 currSeq.printSequence(out);
501 }else { removedCount++; }
509 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine(); }
510 outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(outputFileName);
512 m->mothurOut("Removed " + toString(removedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
517 catch(exception& e) {
518 m->errorOut(e, "RemoveGroupsCommand", "readFasta");
522 //**********************************************************************************************************************
523 int RemoveGroupsCommand::readShared(){
525 string thisOutputDir = outputDir;
526 if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); }
528 //get group names from sharedfile so we can set Groups to the groupNames we want to keep
529 //that way we can take advantage of the reads in inputdata and sharedRabundVector
530 InputData* tempInput = new InputData(sharedfile, "sharedfile");
531 vector<SharedRAbundVector*> lookup = tempInput->getSharedRAbundVectors();
534 vector<string> allGroupsNames = m->getAllGroups();
535 vector<string> mothurOutGroups = m->getGroups();
537 vector<string> groupsToKeep;
538 for (int i = 0; i < allGroupsNames.size(); i++) {
539 if (!m->inUsersGroups(allGroupsNames[i], m->getGroups())) {
540 groupsToKeep.push_back(allGroupsNames[i]);
544 if (allGroupsNames.size() == groupsToKeep.size()) { m->mothurOut("Your file does not contain any groups you wish to remove."); m->mothurOutEndLine(); m->setGroups(mothurOutGroups); delete tempInput; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
547 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
549 m->setGroups(groupsToKeep);
551 m->saveNextLabel = "";
552 m->printedHeaders = false;
553 m->currentBinLabels.clear();
554 m->binLabelsInFile.clear();
556 InputData input(sharedfile, "sharedfile");
557 lookup = input.getSharedRAbundVectors();
559 bool wroteSomething = false;
561 while(lookup[0] != NULL) {
563 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + lookup[0]->getLabel() + "." + getOutputFileNameTag("shared", sharedfile);
565 m->openOutputFile(outputFileName, out);
566 outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
568 if (m->control_pressed) { out.close(); m->mothurRemove(outputFileName); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
570 lookup[0]->printHeaders(out);
572 for (int i = 0; i < lookup.size(); i++) {
573 out << lookup[i]->getLabel() << '\t' << lookup[i]->getGroup() << '\t';
574 lookup[i]->print(out);
575 wroteSomething = true;
579 //get next line to process
580 //prevent memory leak
581 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
582 lookup = input.getSharedRAbundVectors();
588 m->setGroups(mothurOutGroups);
590 if (wroteSomething == false) { m->mothurOut("Your file contains only the groups you wish to remove."); m->mothurOutEndLine(); }
592 string groupsString = "";
593 for (int i = 0; i < Groups.size()-1; i++) { groupsString += Groups[i] + ", "; }
594 groupsString += Groups[Groups.size()-1];
596 m->mothurOut("Removed groups: " + groupsString + " from your shared file."); m->mothurOutEndLine();
601 catch(exception& e) {
602 m->errorOut(e, "RemoveGroupsCommand", "readShared");
606 //**********************************************************************************************************************
607 int RemoveGroupsCommand::readList(){
609 string thisOutputDir = outputDir;
610 if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
611 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("list", listfile);
614 m->openOutputFile(outputFileName, out);
617 m->openInputFile(listfile, in);
619 bool wroteSomething = false;
620 int removedCount = 0;
626 //read in list vector
629 //make a new list vector
631 newList.setLabel(list.getLabel());
634 for (int i = 0; i < list.getNumBins(); i++) {
635 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
637 //parse out names that are in accnos file
638 string binnames = list.get(i);
640 string newNames = "";
641 while (binnames.find_first_of(',') != -1) {
642 string name = binnames.substr(0,binnames.find_first_of(','));
643 binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
645 //if that name is in the .accnos file, add it
646 if (names.count(name) == 0) { newNames += name + ","; }
648 //if you are not in the accnos file check if you are a name that needs to be changed
649 map<string, string>::iterator it = uniqueToRedundant.find(name);
650 if (it != uniqueToRedundant.end()) {
651 newNames += it->second + ",";
652 }else { removedCount++; }
657 if (names.count(binnames) == 0) { newNames += binnames + ","; }
658 else { //if you are not in the accnos file check if you are a name that needs to be changed
659 map<string, string>::iterator it = uniqueToRedundant.find(binnames);
660 if (it != uniqueToRedundant.end()) {
661 newNames += it->second + ",";
662 }else { removedCount++; }
665 //if there are names in this bin add to new list
666 if (newNames != "") {
667 newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
668 newList.push_back(newNames);
672 //print new listvector
673 if (newList.getNumBins() != 0) {
674 wroteSomething = true;
683 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine(); }
684 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
686 m->mothurOut("Removed " + toString(removedCount) + " sequences from your list file."); m->mothurOutEndLine();
691 catch(exception& e) {
692 m->errorOut(e, "RemoveGroupsCommand", "readList");
696 //**********************************************************************************************************************
697 int RemoveGroupsCommand::readName(){
699 string thisOutputDir = outputDir;
700 if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
701 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + getOutputFileNameTag("name", namefile);
703 m->openOutputFile(outputFileName, out);
706 m->openInputFile(namefile, in);
707 string name, firstCol, secondCol;
709 bool wroteSomething = false;
710 int removedCount = 0;
713 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
715 in >> firstCol; m->gobble(in);
718 vector<string> parsedNames;
719 m->splitAtComma(secondCol, parsedNames);
721 vector<string> validSecond; validSecond.clear();
722 for (int i = 0; i < parsedNames.size(); i++) {
723 if (names.count(parsedNames[i]) == 0) {
724 validSecond.push_back(parsedNames[i]);
728 removedCount += parsedNames.size()-validSecond.size();
730 //if the name in the first column is in the set then print it and any other names in second column also in set
731 if (names.count(firstCol) == 0) {
733 wroteSomething = true;
735 out << firstCol << '\t';
737 //you know you have at least one valid second since first column is valid
738 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
739 out << validSecond[validSecond.size()-1] << endl;
741 //make first name in set you come to first column and then add the remaining names to second column
744 //you want part of this row
745 if (validSecond.size() != 0) {
747 wroteSomething = true;
749 out << validSecond[0] << '\t';
751 //you know you have at least one valid second since first column is valid
752 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
753 out << validSecond[validSecond.size()-1] << endl;
754 uniqueToRedundant[firstCol] = validSecond[0];
763 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine(); }
764 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
766 m->mothurOut("Removed " + toString(removedCount) + " sequences from your name file."); m->mothurOutEndLine();
770 catch(exception& e) {
771 m->errorOut(e, "RemoveGroupsCommand", "readName");
776 //**********************************************************************************************************************
777 int RemoveGroupsCommand::readGroup(){
779 string thisOutputDir = outputDir;
780 if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
781 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + getOutputFileNameTag("group", groupfile);
783 m->openOutputFile(outputFileName, out);
786 m->openInputFile(groupfile, in);
789 bool wroteSomething = false;
790 int removedCount = 0;
793 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
795 in >> name; //read from first column
796 in >> group; //read from second column
798 //if this name is in the accnos file
799 if (names.count(name) == 0) {
800 wroteSomething = true;
801 out << name << '\t' << group << endl;
802 }else { removedCount++; }
809 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine(); }
810 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
812 m->mothurOut("Removed " + toString(removedCount) + " sequences from your group file."); m->mothurOutEndLine();
817 catch(exception& e) {
818 m->errorOut(e, "RemoveGroupsCommand", "readGroup");
822 //**********************************************************************************************************************
823 int RemoveGroupsCommand::readCount(){
825 string thisOutputDir = outputDir;
826 if (outputDir == "") { thisOutputDir += m->hasPath(countfile); }
827 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(countfile)) + getOutputFileNameTag("count", countfile);
830 m->openOutputFile(outputFileName, out);
833 m->openInputFile(countfile, in);
835 bool wroteSomething = false;
836 int removedCount = 0;
838 string headers = m->getline(in); m->gobble(in);
839 vector<string> columnHeaders = m->splitWhiteSpace(headers);
841 vector<string> groups;
842 map<int, string> originalGroupIndexes;
843 map<string, int> GroupIndexes;
844 set<int> indexOfGroupsChosen;
845 for (int i = 2; i < columnHeaders.size(); i++) { groups.push_back(columnHeaders[i]); originalGroupIndexes[i-2] = columnHeaders[i]; }
846 //sort groups to keep consistent with how we store the groups in groupmap
847 sort(groups.begin(), groups.end());
848 for (int i = 0; i < groups.size(); i++) { GroupIndexes[groups[i]] = i; }
850 vector<string> groupsToKeep;
851 for (int i = 0; i < groups.size(); i++) {
852 if (!m->inUsersGroups(groups[i], Groups)) { groupsToKeep.push_back(groups[i]); }
854 sort(groupsToKeep.begin(), groupsToKeep.end());
855 out << "Representative_Sequence\ttotal\t";
856 for (int i = 0; i < groupsToKeep.size(); i++) { out << groupsToKeep[i] << '\t'; indexOfGroupsChosen.insert(GroupIndexes[groupsToKeep[i]]); }
859 string name; int oldTotal;
862 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
864 in >> name; m->gobble(in); in >> oldTotal; m->gobble(in);
865 if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(oldTotal) + "\n"); }
867 if (names.count(name) == 0) {
868 //if group info, then read it
869 vector<int> selectedCounts; int thisTotal = 0; int temp;
870 for (int i = 0; i < groups.size(); i++) {
871 int thisIndex = GroupIndexes[originalGroupIndexes[i]];
872 in >> temp; m->gobble(in);
873 if (indexOfGroupsChosen.count(thisIndex) != 0) { //we want this group
874 selectedCounts.push_back(temp); thisTotal += temp;
878 out << name << '\t' << thisTotal << '\t';
879 for (int i = 0; i < selectedCounts.size(); i++) { out << selectedCounts[i] << '\t'; }
882 wroteSomething = true;
883 removedCount+= (oldTotal - thisTotal);
884 }else { m->getline(in); removedCount += oldTotal; }
891 if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); }
892 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
894 m->mothurOut("Removed " + toString(removedCount) + " sequences from your count file."); m->mothurOutEndLine();
898 catch(exception& e) {
899 m->errorOut(e, "RemoveGroupsCommand", "readCount");
903 //**********************************************************************************************************************
904 int RemoveGroupsCommand::readDesign(){
906 string thisOutputDir = outputDir;
907 if (outputDir == "") { thisOutputDir += m->hasPath(designfile); }
908 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(designfile)) + getOutputFileNameTag("design", designfile);
911 m->openOutputFile(outputFileName, out);
914 m->openInputFile(designfile, in);
917 bool wroteSomething = false;
918 int removedCount = 0;
921 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
923 in >> name; //read from first column
924 in >> group; //read from second column
926 //if this name is in the accnos file
927 if (!(m->inUsersGroups(name, Groups))) {
928 wroteSomething = true;
929 out << name << '\t' << group << endl;
930 }else { removedCount++; }
937 if (wroteSomething == false) { m->mothurOut("Your file contains only groups from the groups you wish to remove."); m->mothurOutEndLine(); }
938 outputTypes["design"].push_back(outputFileName); outputNames.push_back(outputFileName);
940 m->mothurOut("Removed " + toString(removedCount) + " groups from your design file."); m->mothurOutEndLine();
945 catch(exception& e) {
946 m->errorOut(e, "RemoveGroupsCommand", "readDesign");
951 //**********************************************************************************************************************
952 int RemoveGroupsCommand::readTax(){
954 string thisOutputDir = outputDir;
955 if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
956 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + getOutputFileNameTag("taxonomy", taxfile);
958 m->openOutputFile(outputFileName, out);
961 m->openInputFile(taxfile, in);
964 bool wroteSomething = false;
965 int removedCount = 0;
968 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
970 in >> name; //read from first column
971 in >> tax; //read from second column
973 //if this name is in the accnos file
974 if (names.count(name) == 0) {
975 wroteSomething = true;
976 out << name << '\t' << tax << endl;
977 }else { //if you are not in the accnos file check if you are a name that needs to be changed
978 map<string, string>::iterator it = uniqueToRedundant.find(name);
979 if (it != uniqueToRedundant.end()) {
980 wroteSomething = true;
981 out << it->second << '\t' << tax << endl;
982 }else { removedCount++; } }
989 if (wroteSomething == false) { m->mothurOut("Your file contains only sequences from the groups you wish to remove."); m->mothurOutEndLine(); }
990 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
992 m->mothurOut("Removed " + toString(removedCount) + " sequences from your taxonomy file."); m->mothurOutEndLine();
996 catch(exception& e) {
997 m->errorOut(e, "RemoveGroupsCommand", "readTax");
1001 //**********************************************************************************************************************
1002 int RemoveGroupsCommand::fillNames(){
1004 vector<string> seqs = groupMap->getNamesSeqs();
1006 for (int i = 0; i < seqs.size(); i++) {
1008 if (m->control_pressed) { return 0; }
1010 string group = groupMap->getGroup(seqs[i]);
1012 if (m->inUsersGroups(group, Groups)) {
1013 names.insert(seqs[i]);
1019 catch(exception& e) {
1020 m->errorOut(e, "RemoveGroupsCommand", "fillNames");
1025 //**********************************************************************************************************************