5 * Created by westcott on 11/10/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "getgroupscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "sharedutilities.h"
14 #include "inputdata.h"
16 //**********************************************************************************************************************
17 vector<string> GetGroupsCommand::setParameters(){
19 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "FNGLT","fasta",false,false, true); parameters.push_back(pfasta);
20 CommandParameter pshared("shared", "InputTypes", "", "", "none", "sharedGroup", "none","shared",false,false, true); parameters.push_back(pshared);
21 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "none", "none","name",false,false, true); parameters.push_back(pname);
22 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false, true); parameters.push_back(pcount);
23 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "sharedGroup", "FNGLT","group",false,false, true); parameters.push_back(pgroup);
24 CommandParameter pdesign("design", "InputTypes", "", "", "none", "sharedGroup", "FNGLT","design",false,false, true); parameters.push_back(pdesign);
25 CommandParameter plist("list", "InputTypes", "", "", "none", "none", "FNGLT","list",false,false, true); parameters.push_back(plist);
26 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "none", "FNGLT","taxonomy",false,false, true); parameters.push_back(ptaxonomy);
27 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(paccnos);
28 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
29 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
30 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
32 vector<string> myArray;
33 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
37 m->errorOut(e, "GetGroupsCommand", "setParameters");
41 //**********************************************************************************************************************
42 string GetGroupsCommand::getHelpString(){
44 string helpString = "";
45 helpString += "The get.groups command selects sequences from a specfic group or set of groups from the following file types: fasta, name, group, list, taxonomy, design or shared file.\n";
46 helpString += "It outputs a file containing the sequences in the those specified groups, or a sharedfile containing only those groups.\n";
47 helpString += "The get.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared, design and groups. The group or count parameter is required, unless you have a current group or count file, or are using a shared file.\n";
48 helpString += "You must also provide an accnos containing the list of groups to get or set the groups parameter to the groups you wish to select.\n";
49 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like. You can separate group names with dashes.\n";
50 helpString += "The get.groups command should be in the following format: get.groups(accnos=yourAccnos, fasta=yourFasta, group=yourGroupFile).\n";
51 helpString += "Example get.groups(accnos=amazon.accnos, fasta=amazon.fasta, group=amazon.groups).\n";
52 helpString += "or get.groups(groups=pasture, fasta=amazon.fasta, group=amazon.groups).\n";
53 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
57 m->errorOut(e, "GetGroupsCommand", "getHelpString");
61 //**********************************************************************************************************************
62 string GetGroupsCommand::getOutputPattern(string type) {
66 if (type == "fasta") { pattern = "[filename],pick,[extension]"; }
67 else if (type == "taxonomy") { pattern = "[filename],pick,[extension]"; }
68 else if (type == "name") { pattern = "[filename],pick,[extension]"; }
69 else if (type == "group") { pattern = "[filename],pick,[extension]"; }
70 else if (type == "count") { pattern = "[filename],pick,[extension]"; }
71 else if (type == "list") { pattern = "[filename],[tag],pick,[extension]"; }
72 else if (type == "shared") { pattern = "[filename],[tag],pick,[extension]"; }
73 else if (type == "design") { pattern = "[filename],pick,[extension]"; }
74 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
79 m->errorOut(e, "GetGroupsCommand", "getOutputPattern");
83 //**********************************************************************************************************************
84 GetGroupsCommand::GetGroupsCommand(){
86 abort = true; calledHelp = true;
88 vector<string> tempOutNames;
89 outputTypes["fasta"] = tempOutNames;
90 outputTypes["taxonomy"] = tempOutNames;
91 outputTypes["name"] = tempOutNames;
92 outputTypes["group"] = tempOutNames;
93 outputTypes["list"] = tempOutNames;
94 outputTypes["shared"] = tempOutNames;
95 outputTypes["design"] = tempOutNames;
96 outputTypes["count"] = tempOutNames;
99 m->errorOut(e, "GetGroupsCommand", "GetGroupsCommand");
103 //**********************************************************************************************************************
104 GetGroupsCommand::GetGroupsCommand(string option) {
106 abort = false; calledHelp = false;
108 //allow user to run help
109 if(option == "help") { help(); abort = true; calledHelp = true; }
110 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
113 vector<string> myArray = setParameters();
115 OptionParser parser(option);
116 map<string,string> parameters = parser.getParameters();
118 ValidParameters validParameter;
119 map<string,string>::iterator it;
121 //check to make sure all parameters are valid for command
122 for (it = parameters.begin(); it != parameters.end(); it++) {
123 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
126 //initialize outputTypes
127 vector<string> tempOutNames;
128 outputTypes["fasta"] = tempOutNames;
129 outputTypes["taxonomy"] = tempOutNames;
130 outputTypes["name"] = tempOutNames;
131 outputTypes["group"] = tempOutNames;
132 outputTypes["list"] = tempOutNames;
133 outputTypes["shared"] = tempOutNames;
134 outputTypes["design"] = tempOutNames;
135 outputTypes["count"] = tempOutNames;
138 //if the user changes the output directory command factory will send this info to us in the output parameter
139 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
141 //if the user changes the input directory command factory will send this info to us in the output parameter
142 string inputDir = validParameter.validFile(parameters, "inputdir", false);
143 if (inputDir == "not found"){ inputDir = ""; }
146 it = parameters.find("fasta");
147 //user has given a template file
148 if(it != parameters.end()){
149 path = m->hasPath(it->second);
150 //if the user has not given a path then, add inputdir. else leave path alone.
151 if (path == "") { parameters["fasta"] = inputDir + it->second; }
154 it = parameters.find("accnos");
155 //user has given a template file
156 if(it != parameters.end()){
157 path = m->hasPath(it->second);
158 //if the user has not given a path then, add inputdir. else leave path alone.
159 if (path == "") { parameters["accnos"] = inputDir + it->second; }
162 it = parameters.find("list");
163 //user has given a template file
164 if(it != parameters.end()){
165 path = m->hasPath(it->second);
166 //if the user has not given a path then, add inputdir. else leave path alone.
167 if (path == "") { parameters["list"] = inputDir + it->second; }
170 it = parameters.find("name");
171 //user has given a template file
172 if(it != parameters.end()){
173 path = m->hasPath(it->second);
174 //if the user has not given a path then, add inputdir. else leave path alone.
175 if (path == "") { parameters["name"] = inputDir + it->second; }
178 it = parameters.find("group");
179 //user has given a template file
180 if(it != parameters.end()){
181 path = m->hasPath(it->second);
182 //if the user has not given a path then, add inputdir. else leave path alone.
183 if (path == "") { parameters["group"] = inputDir + it->second; }
186 it = parameters.find("taxonomy");
187 //user has given a template file
188 if(it != parameters.end()){
189 path = m->hasPath(it->second);
190 //if the user has not given a path then, add inputdir. else leave path alone.
191 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
194 it = parameters.find("shared");
195 //user has given a template file
196 if(it != parameters.end()){
197 path = m->hasPath(it->second);
198 //if the user has not given a path then, add inputdir. else leave path alone.
199 if (path == "") { parameters["shared"] = inputDir + it->second; }
202 it = parameters.find("design");
203 //user has given a template file
204 if(it != parameters.end()){
205 path = m->hasPath(it->second);
206 //if the user has not given a path then, add inputdir. else leave path alone.
207 if (path == "") { parameters["design"] = inputDir + it->second; }
210 it = parameters.find("count");
211 //user has given a template file
212 if(it != parameters.end()){
213 path = m->hasPath(it->second);
214 //if the user has not given a path then, add inputdir. else leave path alone.
215 if (path == "") { parameters["count"] = inputDir + it->second; }
220 //check for required parameters
221 accnosfile = validParameter.validFile(parameters, "accnos", true);
222 if (accnosfile == "not open") { abort = true; }
223 else if (accnosfile == "not found") { accnosfile = ""; }
224 else { m->setAccnosFile(accnosfile); }
226 fastafile = validParameter.validFile(parameters, "fasta", true);
227 if (fastafile == "not open") { fastafile = ""; abort = true; }
228 else if (fastafile == "not found") { fastafile = ""; }
229 else { m->setFastaFile(fastafile); }
231 namefile = validParameter.validFile(parameters, "name", true);
232 if (namefile == "not open") { namefile = ""; abort = true; }
233 else if (namefile == "not found") { namefile = ""; }
234 else { m->setNameFile(namefile); }
236 listfile = validParameter.validFile(parameters, "list", true);
237 if (listfile == "not open") { abort = true; }
238 else if (listfile == "not found") { listfile = ""; }
239 else { m->setListFile(listfile); }
241 taxfile = validParameter.validFile(parameters, "taxonomy", true);
242 if (taxfile == "not open") { taxfile = ""; abort = true; }
243 else if (taxfile == "not found") { taxfile = ""; }
244 else { m->setTaxonomyFile(taxfile); }
246 groups = validParameter.validFile(parameters, "groups", false);
247 if (groups == "not found") { groups = ""; }
249 m->splitAtDash(groups, Groups);
250 m->setGroups(Groups);
253 sharedfile = validParameter.validFile(parameters, "shared", true);
254 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
255 else if (sharedfile == "not found") { sharedfile = ""; }
256 else { m->setSharedFile(sharedfile); }
258 groupfile = validParameter.validFile(parameters, "group", true);
259 if (groupfile == "not open") { groupfile = ""; abort = true; }
260 else if (groupfile == "not found") { groupfile = ""; }
261 else { m->setGroupFile(groupfile); }
263 designfile = validParameter.validFile(parameters, "design", true);
264 if (designfile == "not open") { designfile = ""; abort = true; }
265 else if (designfile == "not found") { designfile = ""; }
266 else { m->setDesignFile(designfile); }
268 countfile = validParameter.validFile(parameters, "count", true);
269 if (countfile == "not open") { countfile = ""; abort = true; }
270 else if (countfile == "not found") { countfile = ""; }
271 else { m->setCountTableFile(countfile); }
273 if ((namefile != "") && (countfile != "")) {
274 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
277 if ((groupfile != "") && (countfile != "")) {
278 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
282 if ((sharedfile == "") && (groupfile == "") && (designfile == "") && (countfile == "")) {
283 //is there are current file available for any of these?
284 if ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")) {
285 //give priority to group, then shared
286 groupfile = m->getGroupFile();
287 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
289 sharedfile = m->getSharedFile();
290 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
292 countfile = m->getCountTableFile();
293 if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
295 m->mothurOut("You have no current groupfile, countfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
300 //give priority to shared, then group
301 sharedfile = m->getSharedFile();
302 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
304 groupfile = m->getGroupFile();
305 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
307 designfile = m->getDesignFile();
308 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
310 countfile = m->getCountTableFile();
311 if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
313 m->mothurOut("You have no current groupfile, designfile, countfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
322 if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; }
324 if ((fastafile == "") && (namefile == "") && (countfile == "") && (groupfile == "") && (designfile == "") && (sharedfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared, design, count or list."); m->mothurOutEndLine(); abort = true; }
325 if (((groupfile == "") && (countfile == "")) && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != ""))) { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group or count file."); m->mothurOutEndLine(); abort = true; }
327 if (countfile == "") {
328 if ((namefile == "") && ((fastafile != "") || (taxfile != ""))){
329 vector<string> files; files.push_back(fastafile); files.push_back(taxfile);
330 parser.getNameFile(files);
336 catch(exception& e) {
337 m->errorOut(e, "GetGroupsCommand", "GetGroupsCommand");
341 //**********************************************************************************************************************
343 int GetGroupsCommand::execute(){
346 if (abort == true) { if (calledHelp) { return 0; } return 2; }
348 //get groups you want to remove
349 if (accnosfile != "") { m->readAccnos(accnosfile, Groups); m->setGroups(Groups); }
351 if (groupfile != "") {
352 groupMap = new GroupMap(groupfile);
355 //make sure groups are valid
356 //takes care of user setting groupNames that are invalid or setting groups=all
357 SharedUtil* util = new SharedUtil();
358 vector<string> gNamesOfGroups = groupMap->getNamesOfGroups();
359 util->setGroups(Groups, gNamesOfGroups);
360 m->setGroups(Groups);
361 groupMap->setNamesOfGroups(gNamesOfGroups);
364 //fill names with names of sequences that are from the groups we want to remove
368 }else if (countfile != ""){
369 if ((fastafile != "") || (listfile != "") || (taxfile != "")) {
370 m->mothurOut("\n[NOTE]: The count file should contain only unique names, so mothur assumes your fasta, list and taxonomy files also contain only uniques.\n\n");
373 ct.readTable(countfile, true, false);
374 if (!ct.hasGroupInfo()) { m->mothurOut("[ERROR]: your count file does not contain group info, aborting.\n"); return 0; }
376 vector<string> gNamesOfGroups = ct.getNamesOfGroups();
378 util.setGroups(Groups, gNamesOfGroups);
379 m->setGroups(Groups);
380 for (int i = 0; i < Groups.size(); i++) {
381 vector<string> thisGroupsSeqs = ct.getNamesOfSeqs(Groups[i]);
382 for (int j = 0; j < thisGroupsSeqs.size(); j++) { names.insert(thisGroupsSeqs[j]); }
386 if (m->control_pressed) { return 0; }
388 //read through the correct file and output lines you want to keep
389 if (namefile != "") { readName(); }
390 if (fastafile != "") { readFasta(); }
391 if (groupfile != "") { readGroup(); }
392 if (countfile != "") { readCount(); }
393 if (listfile != "") { readList(); }
394 if (taxfile != "") { readTax(); }
395 if (sharedfile != "") { readShared(); }
396 if (designfile != "") { readDesign(); }
398 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
401 if (outputNames.size() != 0) {
402 m->mothurOutEndLine();
403 m->mothurOut("Output File names: "); m->mothurOutEndLine();
404 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
405 m->mothurOutEndLine();
407 //set fasta file as new current fastafile
409 itTypes = outputTypes.find("fasta");
410 if (itTypes != outputTypes.end()) {
411 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
414 itTypes = outputTypes.find("name");
415 if (itTypes != outputTypes.end()) {
416 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
419 itTypes = outputTypes.find("group");
420 if (itTypes != outputTypes.end()) {
421 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
424 itTypes = outputTypes.find("list");
425 if (itTypes != outputTypes.end()) {
426 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
429 itTypes = outputTypes.find("taxonomy");
430 if (itTypes != outputTypes.end()) {
431 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
434 itTypes = outputTypes.find("shared");
435 if (itTypes != outputTypes.end()) {
436 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
439 itTypes = outputTypes.find("design");
440 if (itTypes != outputTypes.end()) {
441 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setDesignFile(current); }
444 itTypes = outputTypes.find("count");
445 if (itTypes != outputTypes.end()) {
446 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
453 catch(exception& e) {
454 m->errorOut(e, "GetGroupsCommand", "execute");
459 //**********************************************************************************************************************
460 int GetGroupsCommand::readFasta(){
462 string thisOutputDir = outputDir;
463 if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
464 map<string, string> variables;
465 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(fastafile));
466 variables["[extension]"] = m->getExtension(fastafile);
467 string outputFileName = getOutputFileName("fasta", variables);
470 m->openOutputFile(outputFileName, out);
473 m->openInputFile(fastafile, in);
476 bool wroteSomething = false;
477 int selectedCount = 0;
480 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
482 Sequence currSeq(in);
483 name = currSeq.getName();
486 //if this name is in the accnos file
487 if (names.count(name) != 0) {
488 wroteSomething = true;
490 currSeq.printSequence(out);
493 //if you are not in the accnos file check if you are a name that needs to be changed
494 map<string, string>::iterator it = uniqueToRedundant.find(name);
495 if (it != uniqueToRedundant.end()) {
496 wroteSomething = true;
497 currSeq.setName(it->second);
498 currSeq.printSequence(out);
508 if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); }
509 outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(outputFileName);
511 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
517 catch(exception& e) {
518 m->errorOut(e, "GetGroupsCommand", "readFasta");
522 //**********************************************************************************************************************
523 int GetGroupsCommand::readShared(){
525 string thisOutputDir = outputDir;
526 if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); }
528 InputData input(sharedfile, "sharedfile");
529 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
530 map<string, string> variables;
531 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile));
532 variables["[extension]"] = m->getExtension(sharedfile);
534 bool wroteSomething = false;
536 while(lookup[0] != NULL) {
538 variables["[tag]"] = lookup[0]->getLabel();
539 string outputFileName = getOutputFileName("shared", variables);
542 m->openOutputFile(outputFileName, out);
543 outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
545 if (m->control_pressed) { out.close(); m->mothurRemove(outputFileName); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
547 lookup[0]->printHeaders(out);
549 for (int i = 0; i < lookup.size(); i++) {
550 out << lookup[i]->getLabel() << '\t' << lookup[i]->getGroup() << '\t';
551 lookup[i]->print(out);
552 wroteSomething = true;
556 //get next line to process
557 //prevent memory leak
558 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
559 lookup = input.getSharedRAbundVectors();
564 if (wroteSomething == false) { m->mothurOut("Your file contains only the groups you wish to remove."); m->mothurOutEndLine(); }
566 string groupsString = "";
567 for (int i = 0; i < Groups.size()-1; i++) { groupsString += Groups[i] + ", "; }
568 groupsString += Groups[Groups.size()-1];
570 m->mothurOut("Selected groups: " + groupsString + " from your shared file."); m->mothurOutEndLine();
575 catch(exception& e) {
576 m->errorOut(e, "GetGroupsCommand", "readShared");
580 //**********************************************************************************************************************
581 int GetGroupsCommand::readList(){
583 string thisOutputDir = outputDir;
584 if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
585 map<string, string> variables;
586 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(listfile));
587 variables["[extension]"] = m->getExtension(listfile);
590 m->openInputFile(listfile, in);
592 bool wroteSomething = false;
593 int selectedCount = 0;
599 //read in list vector
602 variables["[tag]"] = list.getLabel();
603 string outputFileName = getOutputFileName("list", variables);
606 m->openOutputFile(outputFileName, out);
607 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
609 vector<string> binLabels = list.getLabels();
610 vector<string> newBinLabels;
612 //make a new list vector
614 newList.setLabel(list.getLabel());
617 for (int i = 0; i < list.getNumBins(); i++) {
618 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
620 //parse out names that are in accnos file
621 string binnames = list.get(i);
622 vector<string> thisBinNames;
623 m->splitAtComma(binnames, thisBinNames);
625 string newNames = "";
626 for (int j = 0; j < thisBinNames.size(); j++) {
627 string name = thisBinNames[j];
629 //if that name is in the .accnos file, add it
630 if (names.count(name) != 0) { newNames += name + ","; selectedCount++; }
632 //if you are not in the accnos file check if you are a name that needs to be changed
633 map<string, string>::iterator it = uniqueToRedundant.find(name);
634 if (it != uniqueToRedundant.end()) {
635 newNames += it->second + ",";
641 //if there are names in this bin add to new list
642 if (newNames != "") {
643 newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
644 newList.push_back(newNames);
645 newBinLabels.push_back(binLabels[i]);
649 //print new listvector
650 if (newList.getNumBins() != 0) {
651 wroteSomething = true;
652 newList.setLabels(newBinLabels);
653 newList.printHeaders(out);
662 if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); }
664 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine();
669 catch(exception& e) {
670 m->errorOut(e, "GetGroupsCommand", "readList");
674 //**********************************************************************************************************************
675 int GetGroupsCommand::readName(){
677 string thisOutputDir = outputDir;
678 if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
679 map<string, string> variables;
680 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(namefile));
681 variables["[extension]"] = m->getExtension(namefile);
682 string outputFileName = getOutputFileName("name", variables);
685 m->openOutputFile(outputFileName, out);
688 m->openInputFile(namefile, in);
689 string name, firstCol, secondCol;
691 bool wroteSomething = false;
692 int selectedCount = 0;
695 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
697 in >> firstCol; m->gobble(in);
700 vector<string> parsedNames;
701 m->splitAtComma(secondCol, parsedNames);
703 vector<string> validSecond; validSecond.clear();
704 for (int i = 0; i < parsedNames.size(); i++) {
705 if (names.count(parsedNames[i]) != 0) {
706 validSecond.push_back(parsedNames[i]);
710 selectedCount += validSecond.size();
712 //if the name in the first column is in the set then print it and any other names in second column also in set
713 if (names.count(firstCol) != 0) {
715 wroteSomething = true;
717 out << firstCol << '\t';
719 //you know you have at least one valid second since first column is valid
720 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
721 out << validSecond[validSecond.size()-1] << endl;
723 //make first name in set you come to first column and then add the remaining names to second column
726 //you want part of this row
727 if (validSecond.size() != 0) {
729 wroteSomething = true;
731 out << validSecond[0] << '\t';
733 //you know you have at least one valid second since first column is valid
734 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
735 out << validSecond[validSecond.size()-1] << endl;
736 uniqueToRedundant[firstCol] = validSecond[0];
745 if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); }
746 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
748 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your name file."); m->mothurOutEndLine();
752 catch(exception& e) {
753 m->errorOut(e, "GetGroupsCommand", "readName");
758 //**********************************************************************************************************************
759 int GetGroupsCommand::readGroup(){
761 string thisOutputDir = outputDir;
762 if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
763 map<string, string> variables;
764 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(groupfile));
765 variables["[extension]"] = m->getExtension(groupfile);
766 string outputFileName = getOutputFileName("group", variables);
769 m->openOutputFile(outputFileName, out);
772 m->openInputFile(groupfile, in);
775 bool wroteSomething = false;
776 int selectedCount = 0;
779 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
781 in >> name; //read from first column
782 in >> group; //read from second column
784 //if this name is in the accnos file
785 if (names.count(name) != 0) {
786 wroteSomething = true;
787 out << name << '\t' << group << endl;
796 if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); }
797 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
799 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your group file."); m->mothurOutEndLine();
803 catch(exception& e) {
804 m->errorOut(e, "GetGroupsCommand", "readGroup");
808 //**********************************************************************************************************************
809 int GetGroupsCommand::readCount(){
811 string thisOutputDir = outputDir;
812 if (outputDir == "") { thisOutputDir += m->hasPath(countfile); }
813 map<string, string> variables;
814 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(countfile));
815 variables["[extension]"] = m->getExtension(countfile);
816 string outputFileName = getOutputFileName("count", variables);
819 m->openOutputFile(outputFileName, out);
822 m->openInputFile(countfile, in);
824 bool wroteSomething = false;
825 int selectedCount = 0;
827 string headers = m->getline(in); m->gobble(in);
828 vector<string> columnHeaders = m->splitWhiteSpace(headers);
830 vector<string> groups;
831 map<int, string> originalGroupIndexes;
832 map<string, int> GroupIndexes;
833 set<int> indexOfGroupsChosen;
834 for (int i = 2; i < columnHeaders.size(); i++) { groups.push_back(columnHeaders[i]); originalGroupIndexes[i-2] = columnHeaders[i]; }
835 //sort groups to keep consistent with how we store the groups in groupmap
836 sort(groups.begin(), groups.end());
837 for (int i = 0; i < groups.size(); i++) { GroupIndexes[groups[i]] = i; }
838 sort(Groups.begin(), Groups.end());
839 out << "Representative_Sequence\ttotal\t";
840 for (int i = 0; i < Groups.size(); i++) { out << Groups[i] << '\t'; indexOfGroupsChosen.insert(GroupIndexes[Groups[i]]); }
843 string name; int oldTotal;
846 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
848 in >> name; m->gobble(in); in >> oldTotal; m->gobble(in);
849 if (m->debug) { m->mothurOut("[DEBUG]: " + name + '\t' + toString(oldTotal) + "\n"); }
851 if (names.count(name) != 0) {
852 //if group info, then read it
853 vector<int> selectedCounts; int thisTotal = 0; int temp;
854 for (int i = 0; i < groups.size(); i++) {
855 int thisIndex = GroupIndexes[originalGroupIndexes[i]];
856 in >> temp; m->gobble(in);
857 if (indexOfGroupsChosen.count(thisIndex) != 0) { //we want this group
858 selectedCounts.push_back(temp); thisTotal += temp;
862 out << name << '\t' << thisTotal << '\t';
863 for (int i = 0; i < selectedCounts.size(); i++) { out << selectedCounts[i] << '\t'; }
866 wroteSomething = true;
867 selectedCount+= thisTotal;
868 }else { m->getline(in); }
875 if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); }
876 outputTypes["count"].push_back(outputFileName); outputNames.push_back(outputFileName);
878 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your count file."); m->mothurOutEndLine();
882 catch(exception& e) {
883 m->errorOut(e, "GetGroupsCommand", "readCount");
887 //**********************************************************************************************************************
888 int GetGroupsCommand::readDesign(){
890 string thisOutputDir = outputDir;
891 if (outputDir == "") { thisOutputDir += m->hasPath(designfile); }
892 map<string, string> variables;
893 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(designfile));
894 variables["[extension]"] = m->getExtension(designfile);
895 string outputFileName = getOutputFileName("design", variables);
898 m->openOutputFile(outputFileName, out);
901 m->openInputFile(designfile, in);
904 bool wroteSomething = false;
905 int selectedCount = 0;
908 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
910 in >> name; //read from first column
911 in >> group; //read from second column
913 //if this name is in the accnos file
914 if (m->inUsersGroups(name, Groups)) {
915 wroteSomething = true;
916 out << name << '\t' << group << endl;
925 if (wroteSomething == false) { m->mothurOut("Your file does NOT contain groups from the groups you wish to get."); m->mothurOutEndLine(); }
926 outputTypes["design"].push_back(outputFileName); outputNames.push_back(outputFileName);
928 m->mothurOut("Selected " + toString(selectedCount) + " groups from your design file."); m->mothurOutEndLine();
933 catch(exception& e) {
934 m->errorOut(e, "GetGroupsCommand", "readDesign");
940 //**********************************************************************************************************************
941 int GetGroupsCommand::readTax(){
943 string thisOutputDir = outputDir;
944 if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
945 map<string, string> variables;
946 variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(taxfile));
947 variables["[extension]"] = m->getExtension(taxfile);
948 string outputFileName = getOutputFileName("taxonomy", variables);
951 m->openOutputFile(outputFileName, out);
954 m->openInputFile(taxfile, in);
957 bool wroteSomething = false;
960 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
962 in >> name; //read from first column
963 in >> tax; //read from second column
965 //if this name is in the accnos file
966 if (names.count(name) != 0) {
967 wroteSomething = true;
968 out << name << '\t' << tax << endl;
970 //if you are not in the accnos file check if you are a name that needs to be changed
971 map<string, string>::iterator it = uniqueToRedundant.find(name);
972 if (it != uniqueToRedundant.end()) {
973 wroteSomething = true;
974 out << it->second << '\t' << tax << endl;
983 if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); }
984 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
988 catch(exception& e) {
989 m->errorOut(e, "GetGroupsCommand", "readTax");
993 //**********************************************************************************************************************
994 int GetGroupsCommand::fillNames(){
996 vector<string> seqs = groupMap->getNamesSeqs();
998 for (int i = 0; i < seqs.size(); i++) {
1000 if (m->control_pressed) { return 0; }
1002 string group = groupMap->getGroup(seqs[i]);
1004 if (m->inUsersGroups(group, Groups)) {
1005 names.insert(seqs[i]);
1011 catch(exception& e) {
1012 m->errorOut(e, "GetGroupsCommand", "fillNames");
1017 //**********************************************************************************************************************