5 * Created by westcott on 11/10/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "getgroupscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "sharedutilities.h"
14 #include "inputdata.h"
16 //**********************************************************************************************************************
17 vector<string> GetGroupsCommand::setParameters(){
19 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
20 CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT-sharedGroup", "none",false,false); parameters.push_back(pshared);
21 CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
22 CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT-sharedGroup", "none",false,false); parameters.push_back(pgroup);
23 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
24 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
25 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
26 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "GetGroupsCommand", "setParameters");
39 //**********************************************************************************************************************
40 string GetGroupsCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The get.groups command selects sequences from a specfic group or set of groups from the following file types: fasta, name, group, list, taxonomy or shared file.\n";
44 helpString += "It outputs a file containing the sequences in the those specified groups, or a sharedfile containing only those groups.\n";
45 helpString += "The get.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared and groups. The group parameter is required, unless you have a current group file, or are using a shared file.\n";
46 helpString += "You must also provide an accnos containing the list of groups to get or set the groups parameter to the groups you wish to select.\n";
47 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like. You can separate group names with dashes.\n";
48 helpString += "The get.groups command should be in the following format: get.groups(accnos=yourAccnos, fasta=yourFasta, group=yourGroupFile).\n";
49 helpString += "Example get.groups(accnos=amazon.accnos, fasta=amazon.fasta, group=amazon.groups).\n";
50 helpString += "or get.groups(groups=pasture, fasta=amazon.fasta, group=amazon.groups).\n";
51 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
55 m->errorOut(e, "GetGroupsCommand", "getHelpString");
59 //**********************************************************************************************************************
60 GetGroupsCommand::GetGroupsCommand(){
62 abort = true; calledHelp = true;
64 vector<string> tempOutNames;
65 outputTypes["fasta"] = tempOutNames;
66 outputTypes["taxonomy"] = tempOutNames;
67 outputTypes["name"] = tempOutNames;
68 outputTypes["group"] = tempOutNames;
69 outputTypes["list"] = tempOutNames;
70 outputTypes["shared"] = tempOutNames;
73 m->errorOut(e, "GetGroupsCommand", "GetGroupsCommand");
77 //**********************************************************************************************************************
78 GetGroupsCommand::GetGroupsCommand(string option) {
80 abort = false; calledHelp = false;
82 //allow user to run help
83 if(option == "help") { help(); abort = true; calledHelp = true; }
84 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
87 vector<string> myArray = setParameters();
89 OptionParser parser(option);
90 map<string,string> parameters = parser.getParameters();
92 ValidParameters validParameter;
93 map<string,string>::iterator it;
95 //check to make sure all parameters are valid for command
96 for (it = parameters.begin(); it != parameters.end(); it++) {
97 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
100 //initialize outputTypes
101 vector<string> tempOutNames;
102 outputTypes["fasta"] = tempOutNames;
103 outputTypes["taxonomy"] = tempOutNames;
104 outputTypes["name"] = tempOutNames;
105 outputTypes["group"] = tempOutNames;
106 outputTypes["list"] = tempOutNames;
107 outputTypes["shared"] = tempOutNames;
110 //if the user changes the output directory command factory will send this info to us in the output parameter
111 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
113 //if the user changes the input directory command factory will send this info to us in the output parameter
114 string inputDir = validParameter.validFile(parameters, "inputdir", false);
115 if (inputDir == "not found"){ inputDir = ""; }
118 it = parameters.find("fasta");
119 //user has given a template file
120 if(it != parameters.end()){
121 path = m->hasPath(it->second);
122 //if the user has not given a path then, add inputdir. else leave path alone.
123 if (path == "") { parameters["fasta"] = inputDir + it->second; }
126 it = parameters.find("accnos");
127 //user has given a template file
128 if(it != parameters.end()){
129 path = m->hasPath(it->second);
130 //if the user has not given a path then, add inputdir. else leave path alone.
131 if (path == "") { parameters["accnos"] = inputDir + it->second; }
134 it = parameters.find("list");
135 //user has given a template file
136 if(it != parameters.end()){
137 path = m->hasPath(it->second);
138 //if the user has not given a path then, add inputdir. else leave path alone.
139 if (path == "") { parameters["list"] = inputDir + it->second; }
142 it = parameters.find("name");
143 //user has given a template file
144 if(it != parameters.end()){
145 path = m->hasPath(it->second);
146 //if the user has not given a path then, add inputdir. else leave path alone.
147 if (path == "") { parameters["name"] = inputDir + it->second; }
150 it = parameters.find("group");
151 //user has given a template file
152 if(it != parameters.end()){
153 path = m->hasPath(it->second);
154 //if the user has not given a path then, add inputdir. else leave path alone.
155 if (path == "") { parameters["group"] = inputDir + it->second; }
158 it = parameters.find("taxonomy");
159 //user has given a template file
160 if(it != parameters.end()){
161 path = m->hasPath(it->second);
162 //if the user has not given a path then, add inputdir. else leave path alone.
163 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
166 it = parameters.find("shared");
167 //user has given a template file
168 if(it != parameters.end()){
169 path = m->hasPath(it->second);
170 //if the user has not given a path then, add inputdir. else leave path alone.
171 if (path == "") { parameters["shared"] = inputDir + it->second; }
176 //check for required parameters
177 accnosfile = validParameter.validFile(parameters, "accnos", true);
178 if (accnosfile == "not open") { abort = true; }
179 else if (accnosfile == "not found") { accnosfile = ""; }
180 else { m->setAccnosFile(accnosfile); }
182 fastafile = validParameter.validFile(parameters, "fasta", true);
183 if (fastafile == "not open") { abort = true; }
184 else if (fastafile == "not found") { fastafile = ""; }
185 else { m->setFastaFile(fastafile); }
187 namefile = validParameter.validFile(parameters, "name", true);
188 if (namefile == "not open") { abort = true; }
189 else if (namefile == "not found") { namefile = ""; }
190 else { m->setNameFile(namefile); }
192 listfile = validParameter.validFile(parameters, "list", true);
193 if (listfile == "not open") { abort = true; }
194 else if (listfile == "not found") { listfile = ""; }
195 else { m->setListFile(listfile); }
197 taxfile = validParameter.validFile(parameters, "taxonomy", true);
198 if (taxfile == "not open") { abort = true; }
199 else if (taxfile == "not found") { taxfile = ""; }
200 else { m->setTaxonomyFile(taxfile); }
202 groups = validParameter.validFile(parameters, "groups", false);
203 if (groups == "not found") { groups = ""; }
205 m->splitAtDash(groups, Groups);
206 m->setGroups(Groups);
209 sharedfile = validParameter.validFile(parameters, "shared", true);
210 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
211 else if (sharedfile == "not found") { sharedfile = ""; }
212 else { m->setSharedFile(sharedfile); }
214 groupfile = validParameter.validFile(parameters, "group", true);
215 if (groupfile == "not open") { groupfile = ""; abort = true; }
216 else if (groupfile == "not found") { groupfile = ""; }
217 else { m->setGroupFile(groupfile); }
219 if ((sharedfile == "") && (groupfile == "")) {
220 //is there are current file available for any of these?
221 if ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")) {
222 //give priority to group, then shared
223 groupfile = m->getGroupFile();
224 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
226 sharedfile = m->getSharedFile();
227 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
229 m->mothurOut("You have no current groupfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
233 //give priority to shared, then group
234 sharedfile = m->getSharedFile();
235 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
237 groupfile = m->getGroupFile();
238 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
240 m->mothurOut("You have no current groupfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
246 if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; }
248 if ((fastafile == "") && (namefile == "") && (groupfile == "") && (sharedfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared or list."); m->mothurOutEndLine(); abort = true; }
249 if ((groupfile == "") && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != ""))) { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group file."); m->mothurOutEndLine(); abort = true; }
254 catch(exception& e) {
255 m->errorOut(e, "GetGroupsCommand", "GetGroupsCommand");
259 //**********************************************************************************************************************
261 int GetGroupsCommand::execute(){
264 if (abort == true) { if (calledHelp) { return 0; } return 2; }
266 //get groups you want to remove
267 if (accnosfile != "") { readAccnos(); }
269 if (groupfile != "") {
270 groupMap = new GroupMap(groupfile);
273 //make sure groups are valid
274 //takes care of user setting groupNames that are invalid or setting groups=all
275 SharedUtil* util = new SharedUtil();
276 vector<string> gNamesOfGroups = groupMap->getNamesOfGroups();
277 util->setGroups(Groups, gNamesOfGroups);
278 groupMap->setNamesOfGroups(gNamesOfGroups);
281 //fill names with names of sequences that are from the groups we want to remove
287 if (m->control_pressed) { return 0; }
289 //read through the correct file and output lines you want to keep
290 if (namefile != "") { readName(); }
291 if (fastafile != "") { readFasta(); }
292 if (groupfile != "") { readGroup(); }
293 if (listfile != "") { readList(); }
294 if (taxfile != "") { readTax(); }
295 if (sharedfile != "") { readShared(); }
297 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
300 if (outputNames.size() != 0) {
301 m->mothurOutEndLine();
302 m->mothurOut("Output File names: "); m->mothurOutEndLine();
303 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
304 m->mothurOutEndLine();
306 //set fasta file as new current fastafile
308 itTypes = outputTypes.find("fasta");
309 if (itTypes != outputTypes.end()) {
310 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
313 itTypes = outputTypes.find("name");
314 if (itTypes != outputTypes.end()) {
315 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
318 itTypes = outputTypes.find("group");
319 if (itTypes != outputTypes.end()) {
320 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
323 itTypes = outputTypes.find("list");
324 if (itTypes != outputTypes.end()) {
325 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
328 itTypes = outputTypes.find("taxonomy");
329 if (itTypes != outputTypes.end()) {
330 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
333 itTypes = outputTypes.find("shared");
334 if (itTypes != outputTypes.end()) {
335 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
342 catch(exception& e) {
343 m->errorOut(e, "GetGroupsCommand", "execute");
348 //**********************************************************************************************************************
349 int GetGroupsCommand::readFasta(){
351 string thisOutputDir = outputDir;
352 if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
353 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pick" + m->getExtension(fastafile);
356 m->openOutputFile(outputFileName, out);
359 m->openInputFile(fastafile, in);
362 bool wroteSomething = false;
363 int selectedCount = 0;
366 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
368 Sequence currSeq(in);
369 name = currSeq.getName();
372 //if this name is in the accnos file
373 if (names.count(name) != 0) {
374 wroteSomething = true;
376 currSeq.printSequence(out);
385 if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); }
386 outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(outputFileName);
388 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
394 catch(exception& e) {
395 m->errorOut(e, "GetGroupsCommand", "readFasta");
399 //**********************************************************************************************************************
400 int GetGroupsCommand::readShared(){
402 string thisOutputDir = outputDir;
403 if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); }
405 InputData input(sharedfile, "sharedfile");
406 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
408 bool wroteSomething = false;
410 while(lookup[0] != NULL) {
412 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + lookup[0]->getLabel() + ".pick" + m->getExtension(sharedfile);
414 m->openOutputFile(outputFileName, out);
415 outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
417 if (m->control_pressed) { out.close(); m->mothurRemove(outputFileName); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
419 lookup[0]->printHeaders(out);
421 for (int i = 0; i < lookup.size(); i++) {
422 out << lookup[i]->getLabel() << '\t' << lookup[i]->getGroup() << '\t';
423 lookup[i]->print(out);
424 wroteSomething = true;
428 //get next line to process
429 //prevent memory leak
430 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
431 lookup = input.getSharedRAbundVectors();
436 if (wroteSomething == false) { m->mothurOut("Your file contains only the groups you wish to remove."); m->mothurOutEndLine(); }
438 string groupsString = "";
439 for (int i = 0; i < Groups.size()-1; i++) { groupsString += Groups[i] + ", "; }
440 groupsString += Groups[Groups.size()-1];
442 m->mothurOut("Selected groups: " + groupsString + " from your shared file."); m->mothurOutEndLine();
447 catch(exception& e) {
448 m->errorOut(e, "GetGroupsCommand", "readShared");
452 //**********************************************************************************************************************
453 int GetGroupsCommand::readList(){
455 string thisOutputDir = outputDir;
456 if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
457 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" + m->getExtension(listfile);
460 m->openOutputFile(outputFileName, out);
463 m->openInputFile(listfile, in);
465 bool wroteSomething = false;
466 int selectedCount = 0;
472 //read in list vector
475 //make a new list vector
477 newList.setLabel(list.getLabel());
480 for (int i = 0; i < list.getNumBins(); i++) {
481 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
483 //parse out names that are in accnos file
484 string binnames = list.get(i);
486 string newNames = "";
487 while (binnames.find_first_of(',') != -1) {
488 string name = binnames.substr(0,binnames.find_first_of(','));
489 binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
491 //if that name is in the .accnos file, add it
492 if (names.count(name) != 0) { newNames += name + ","; selectedCount++; }
496 if (names.count(binnames) != 0) { newNames += binnames + ","; selectedCount++; }
498 //if there are names in this bin add to new list
499 if (newNames != "") {
500 newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
501 newList.push_back(newNames);
505 //print new listvector
506 if (newList.getNumBins() != 0) {
507 wroteSomething = true;
516 if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); }
517 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
519 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine();
524 catch(exception& e) {
525 m->errorOut(e, "GetGroupsCommand", "readList");
529 //**********************************************************************************************************************
530 int GetGroupsCommand::readName(){
532 string thisOutputDir = outputDir;
533 if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
534 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pick" + m->getExtension(namefile);
537 m->openOutputFile(outputFileName, out);
540 m->openInputFile(namefile, in);
541 string name, firstCol, secondCol;
543 bool wroteSomething = false;
544 int selectedCount = 0;
547 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
549 in >> firstCol; m->gobble(in);
552 vector<string> parsedNames;
553 m->splitAtComma(secondCol, parsedNames);
555 vector<string> validSecond; validSecond.clear();
556 for (int i = 0; i < parsedNames.size(); i++) {
557 if (names.count(parsedNames[i]) != 0) {
558 validSecond.push_back(parsedNames[i]);
562 selectedCount += validSecond.size();
564 //if the name in the first column is in the set then print it and any other names in second column also in set
565 if (names.count(firstCol) != 0) {
567 wroteSomething = true;
569 out << firstCol << '\t';
571 //you know you have at least one valid second since first column is valid
572 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
573 out << validSecond[validSecond.size()-1] << endl;
575 //make first name in set you come to first column and then add the remaining names to second column
578 //you want part of this row
579 if (validSecond.size() != 0) {
581 wroteSomething = true;
583 out << validSecond[0] << '\t';
585 //you know you have at least one valid second since first column is valid
586 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
587 out << validSecond[validSecond.size()-1] << endl;
596 if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); }
597 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
599 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your name file."); m->mothurOutEndLine();
603 catch(exception& e) {
604 m->errorOut(e, "GetGroupsCommand", "readName");
609 //**********************************************************************************************************************
610 int GetGroupsCommand::readGroup(){
612 string thisOutputDir = outputDir;
613 if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
614 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
617 m->openOutputFile(outputFileName, out);
620 m->openInputFile(groupfile, in);
623 bool wroteSomething = false;
624 int selectedCount = 0;
627 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
629 in >> name; //read from first column
630 in >> group; //read from second column
632 //if this name is in the accnos file
633 if (names.count(name) != 0) {
634 wroteSomething = true;
635 out << name << '\t' << group << endl;
644 if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); }
645 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
647 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your group file."); m->mothurOutEndLine();
651 catch(exception& e) {
652 m->errorOut(e, "GetGroupsCommand", "readGroup");
656 //**********************************************************************************************************************
657 int GetGroupsCommand::readTax(){
659 string thisOutputDir = outputDir;
660 if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
661 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
663 m->openOutputFile(outputFileName, out);
666 m->openInputFile(taxfile, in);
669 bool wroteSomething = false;
672 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
674 in >> name; //read from first column
675 in >> tax; //read from second column
677 //if this name is in the accnos file
678 if (names.count(name) != 0) {
679 wroteSomething = true;
680 out << name << '\t' << tax << endl;
688 if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); }
689 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
693 catch(exception& e) {
694 m->errorOut(e, "GetGroupsCommand", "readTax");
698 //**********************************************************************************************************************
699 void GetGroupsCommand::readAccnos(){
704 m->openInputFile(accnosfile, in);
710 Groups.push_back(name);
716 m->setGroups(Groups);
719 catch(exception& e) {
720 m->errorOut(e, "GetGroupsCommand", "readAccnos");
724 //**********************************************************************************************************************
725 int GetGroupsCommand::fillNames(){
727 vector<string> seqs = groupMap->getNamesSeqs();
729 for (int i = 0; i < seqs.size(); i++) {
731 if (m->control_pressed) { return 0; }
733 string group = groupMap->getGroup(seqs[i]);
735 if (m->inUsersGroups(group, Groups)) {
736 names.insert(seqs[i]);
742 catch(exception& e) {
743 m->errorOut(e, "GetGroupsCommand", "fillNames");
748 //**********************************************************************************************************************