5 * Created by westcott on 11/10/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "getgroupscommand.h"
11 #include "sequence.hpp"
12 #include "listvector.hpp"
13 #include "sharedutilities.h"
14 #include "inputdata.h"
16 //**********************************************************************************************************************
17 vector<string> GetGroupsCommand::setParameters(){
19 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pfasta);
20 CommandParameter pshared("shared", "InputTypes", "", "", "none", "FNGLT-sharedGroup", "none",false,false); parameters.push_back(pshared);
21 CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
22 CommandParameter pgroup("group", "InputTypes", "", "", "none", "FNGLT-sharedGroup", "none",false,false); parameters.push_back(pgroup);
23 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
24 CommandParameter ptaxonomy("taxonomy", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(ptaxonomy);
25 CommandParameter paccnos("accnos", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(paccnos);
26 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
27 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
28 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
30 vector<string> myArray;
31 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
35 m->errorOut(e, "GetGroupsCommand", "setParameters");
39 //**********************************************************************************************************************
40 string GetGroupsCommand::getHelpString(){
42 string helpString = "";
43 helpString += "The get.groups command selects sequences from a specfic group or set of groups from the following file types: fasta, name, group, list, taxonomy or shared file.\n";
44 helpString += "It outputs a file containing the sequences in the those specified groups, or a sharedfile containing only those groups.\n";
45 helpString += "The get.groups command parameters are accnos, fasta, name, group, list, taxonomy, shared and groups. The group parameter is required, unless you have a current group file, or are using a shared file.\n";
46 helpString += "You must also provide an accnos containing the list of groups to get or set the groups parameter to the groups you wish to select.\n";
47 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like. You can separate group names with dashes.\n";
48 helpString += "The get.groups command should be in the following format: get.groups(accnos=yourAccnos, fasta=yourFasta, group=yourGroupFile).\n";
49 helpString += "Example get.groups(accnos=amazon.accnos, fasta=amazon.fasta, group=amazon.groups).\n";
50 helpString += "or get.groups(groups=pasture, fasta=amazon.fasta, group=amazon.groups).\n";
51 helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
55 m->errorOut(e, "GetGroupsCommand", "getHelpString");
59 //**********************************************************************************************************************
60 GetGroupsCommand::GetGroupsCommand(){
62 abort = true; calledHelp = true;
64 vector<string> tempOutNames;
65 outputTypes["fasta"] = tempOutNames;
66 outputTypes["taxonomy"] = tempOutNames;
67 outputTypes["name"] = tempOutNames;
68 outputTypes["group"] = tempOutNames;
69 outputTypes["list"] = tempOutNames;
70 outputTypes["shared"] = tempOutNames;
73 m->errorOut(e, "GetGroupsCommand", "GetGroupsCommand");
77 //**********************************************************************************************************************
78 GetGroupsCommand::GetGroupsCommand(string option) {
80 abort = false; calledHelp = false;
82 //allow user to run help
83 if(option == "help") { help(); abort = true; calledHelp = true; }
84 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
87 vector<string> myArray = setParameters();
89 OptionParser parser(option);
90 map<string,string> parameters = parser.getParameters();
92 ValidParameters validParameter;
93 map<string,string>::iterator it;
95 //check to make sure all parameters are valid for command
96 for (it = parameters.begin(); it != parameters.end(); it++) {
97 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
100 //initialize outputTypes
101 vector<string> tempOutNames;
102 outputTypes["fasta"] = tempOutNames;
103 outputTypes["taxonomy"] = tempOutNames;
104 outputTypes["name"] = tempOutNames;
105 outputTypes["group"] = tempOutNames;
106 outputTypes["list"] = tempOutNames;
107 outputTypes["shared"] = tempOutNames;
110 //if the user changes the output directory command factory will send this info to us in the output parameter
111 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
113 //if the user changes the input directory command factory will send this info to us in the output parameter
114 string inputDir = validParameter.validFile(parameters, "inputdir", false);
115 if (inputDir == "not found"){ inputDir = ""; }
118 it = parameters.find("fasta");
119 //user has given a template file
120 if(it != parameters.end()){
121 path = m->hasPath(it->second);
122 //if the user has not given a path then, add inputdir. else leave path alone.
123 if (path == "") { parameters["fasta"] = inputDir + it->second; }
126 it = parameters.find("accnos");
127 //user has given a template file
128 if(it != parameters.end()){
129 path = m->hasPath(it->second);
130 //if the user has not given a path then, add inputdir. else leave path alone.
131 if (path == "") { parameters["accnos"] = inputDir + it->second; }
134 it = parameters.find("list");
135 //user has given a template file
136 if(it != parameters.end()){
137 path = m->hasPath(it->second);
138 //if the user has not given a path then, add inputdir. else leave path alone.
139 if (path == "") { parameters["list"] = inputDir + it->second; }
142 it = parameters.find("name");
143 //user has given a template file
144 if(it != parameters.end()){
145 path = m->hasPath(it->second);
146 //if the user has not given a path then, add inputdir. else leave path alone.
147 if (path == "") { parameters["name"] = inputDir + it->second; }
150 it = parameters.find("group");
151 //user has given a template file
152 if(it != parameters.end()){
153 path = m->hasPath(it->second);
154 //if the user has not given a path then, add inputdir. else leave path alone.
155 if (path == "") { parameters["group"] = inputDir + it->second; }
158 it = parameters.find("taxonomy");
159 //user has given a template file
160 if(it != parameters.end()){
161 path = m->hasPath(it->second);
162 //if the user has not given a path then, add inputdir. else leave path alone.
163 if (path == "") { parameters["taxonomy"] = inputDir + it->second; }
166 it = parameters.find("shared");
167 //user has given a template file
168 if(it != parameters.end()){
169 path = m->hasPath(it->second);
170 //if the user has not given a path then, add inputdir. else leave path alone.
171 if (path == "") { parameters["shared"] = inputDir + it->second; }
176 //check for required parameters
177 accnosfile = validParameter.validFile(parameters, "accnos", true);
178 if (accnosfile == "not open") { abort = true; }
179 else if (accnosfile == "not found") { accnosfile = ""; }
180 else { m->setAccnosFile(accnosfile); }
182 fastafile = validParameter.validFile(parameters, "fasta", true);
183 if (fastafile == "not open") { abort = true; }
184 else if (fastafile == "not found") { fastafile = ""; }
185 else { m->setFastaFile(fastafile); }
187 namefile = validParameter.validFile(parameters, "name", true);
188 if (namefile == "not open") { abort = true; }
189 else if (namefile == "not found") { namefile = ""; }
190 else { m->setNameFile(namefile); }
192 listfile = validParameter.validFile(parameters, "list", true);
193 if (listfile == "not open") { abort = true; }
194 else if (listfile == "not found") { listfile = ""; }
195 else { m->setListFile(listfile); }
197 taxfile = validParameter.validFile(parameters, "taxonomy", true);
198 if (taxfile == "not open") { abort = true; }
199 else if (taxfile == "not found") { taxfile = ""; }
200 else { m->setTaxonomyFile(taxfile); }
202 groups = validParameter.validFile(parameters, "groups", false);
203 if (groups == "not found") { groups = ""; }
205 m->splitAtDash(groups, Groups);
209 sharedfile = validParameter.validFile(parameters, "shared", true);
210 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
211 else if (sharedfile == "not found") { sharedfile = ""; }
212 else { m->setSharedFile(sharedfile); }
214 groupfile = validParameter.validFile(parameters, "group", true);
215 if (groupfile == "not open") { groupfile = ""; abort = true; }
216 else if (groupfile == "not found") { groupfile = ""; }
217 else { m->setGroupFile(groupfile); }
219 if ((sharedfile == "") && (groupfile == "")) {
220 //is there are current file available for any of these?
221 if ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != "")) {
222 //give priority to group, then shared
223 groupfile = m->getGroupFile();
224 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
226 sharedfile = m->getSharedFile();
227 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
229 m->mothurOut("You have no current groupfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
233 //give priority to shared, then group
234 sharedfile = m->getSharedFile();
235 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
237 groupfile = m->getGroupFile();
238 if (groupfile != "") { m->mothurOut("Using " + groupfile + " as input file for the group parameter."); m->mothurOutEndLine(); }
240 m->mothurOut("You have no current groupfile or sharedfile and one is required."); m->mothurOutEndLine(); abort = true;
246 if ((accnosfile == "") && (Groups.size() == 0)) { m->mothurOut("You must provide an accnos file or specify groups using the groups parameter."); m->mothurOutEndLine(); abort = true; }
248 if ((fastafile == "") && (namefile == "") && (groupfile == "") && (sharedfile == "") && (listfile == "") && (taxfile == "")) { m->mothurOut("You must provide at least one of the following: fasta, name, taxonomy, group, shared or list."); m->mothurOutEndLine(); abort = true; }
249 if ((groupfile == "") && ((namefile != "") || (fastafile != "") || (listfile != "") || (taxfile != ""))) { m->mothurOut("If using a fasta, name, taxonomy, group or list, then you must provide a group file."); m->mothurOutEndLine(); abort = true; }
254 catch(exception& e) {
255 m->errorOut(e, "GetGroupsCommand", "GetGroupsCommand");
259 //**********************************************************************************************************************
261 int GetGroupsCommand::execute(){
264 if (abort == true) { if (calledHelp) { return 0; } return 2; }
266 //get groups you want to remove
267 if (accnosfile != "") { readAccnos(); }
269 if (groupfile != "") {
270 groupMap = new GroupMap(groupfile);
273 //make sure groups are valid
274 //takes care of user setting groupNames that are invalid or setting groups=all
275 SharedUtil* util = new SharedUtil();
276 util->setGroups(Groups, groupMap->namesOfGroups);
279 //fill names with names of sequences that are from the groups we want to remove
285 if (m->control_pressed) { return 0; }
287 //read through the correct file and output lines you want to keep
288 if (namefile != "") { readName(); }
289 if (fastafile != "") { readFasta(); }
290 if (groupfile != "") { readGroup(); }
291 if (listfile != "") { readList(); }
292 if (taxfile != "") { readTax(); }
293 if (sharedfile != "") { readShared(); }
295 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
298 if (outputNames.size() != 0) {
299 m->mothurOutEndLine();
300 m->mothurOut("Output File names: "); m->mothurOutEndLine();
301 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
302 m->mothurOutEndLine();
304 //set fasta file as new current fastafile
306 itTypes = outputTypes.find("fasta");
307 if (itTypes != outputTypes.end()) {
308 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
311 itTypes = outputTypes.find("name");
312 if (itTypes != outputTypes.end()) {
313 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
316 itTypes = outputTypes.find("group");
317 if (itTypes != outputTypes.end()) {
318 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
321 itTypes = outputTypes.find("list");
322 if (itTypes != outputTypes.end()) {
323 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
326 itTypes = outputTypes.find("taxonomy");
327 if (itTypes != outputTypes.end()) {
328 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTaxonomyFile(current); }
331 itTypes = outputTypes.find("shared");
332 if (itTypes != outputTypes.end()) {
333 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setSharedFile(current); }
340 catch(exception& e) {
341 m->errorOut(e, "GetGroupsCommand", "execute");
346 //**********************************************************************************************************************
347 int GetGroupsCommand::readFasta(){
349 string thisOutputDir = outputDir;
350 if (outputDir == "") { thisOutputDir += m->hasPath(fastafile); }
351 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(fastafile)) + "pick" + m->getExtension(fastafile);
354 m->openOutputFile(outputFileName, out);
357 m->openInputFile(fastafile, in);
360 bool wroteSomething = false;
361 int selectedCount = 0;
364 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
366 Sequence currSeq(in);
367 name = currSeq.getName();
370 //if this name is in the accnos file
371 if (names.count(name) != 0) {
372 wroteSomething = true;
374 currSeq.printSequence(out);
383 if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); }
384 outputTypes["fasta"].push_back(outputFileName); outputNames.push_back(outputFileName);
386 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your fasta file."); m->mothurOutEndLine();
392 catch(exception& e) {
393 m->errorOut(e, "GetGroupsCommand", "readFasta");
397 //**********************************************************************************************************************
398 int GetGroupsCommand::readShared(){
400 string thisOutputDir = outputDir;
401 if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); }
403 InputData input(sharedfile, "sharedfile");
404 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
406 bool wroteSomething = false;
408 while(lookup[0] != NULL) {
410 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + lookup[0]->getLabel() + ".pick" + m->getExtension(sharedfile);
412 m->openOutputFile(outputFileName, out);
413 outputTypes["shared"].push_back(outputFileName); outputNames.push_back(outputFileName);
415 if (m->control_pressed) { out.close(); m->mothurRemove(outputFileName); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
417 lookup[0]->printHeaders(out);
419 for (int i = 0; i < lookup.size(); i++) {
420 out << lookup[i]->getLabel() << '\t' << lookup[i]->getGroup() << '\t';
421 lookup[i]->print(out);
422 wroteSomething = true;
426 //get next line to process
427 //prevent memory leak
428 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
429 lookup = input.getSharedRAbundVectors();
434 if (wroteSomething == false) { m->mothurOut("Your file contains only the groups you wish to remove."); m->mothurOutEndLine(); }
436 string groupsString = "";
437 for (int i = 0; i < Groups.size()-1; i++) { groupsString += Groups[i] + ", "; }
438 groupsString += Groups[Groups.size()-1];
440 m->mothurOut("Selected groups: " + groupsString + " from your shared file."); m->mothurOutEndLine();
445 catch(exception& e) {
446 m->errorOut(e, "GetGroupsCommand", "readShared");
450 //**********************************************************************************************************************
451 int GetGroupsCommand::readList(){
453 string thisOutputDir = outputDir;
454 if (outputDir == "") { thisOutputDir += m->hasPath(listfile); }
455 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "pick" + m->getExtension(listfile);
458 m->openOutputFile(outputFileName, out);
461 m->openInputFile(listfile, in);
463 bool wroteSomething = false;
464 int selectedCount = 0;
470 //read in list vector
473 //make a new list vector
475 newList.setLabel(list.getLabel());
478 for (int i = 0; i < list.getNumBins(); i++) {
479 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
481 //parse out names that are in accnos file
482 string binnames = list.get(i);
484 string newNames = "";
485 while (binnames.find_first_of(',') != -1) {
486 string name = binnames.substr(0,binnames.find_first_of(','));
487 binnames = binnames.substr(binnames.find_first_of(',')+1, binnames.length());
489 //if that name is in the .accnos file, add it
490 if (names.count(name) != 0) { newNames += name + ","; selectedCount++; }
494 if (names.count(binnames) != 0) { newNames += binnames + ","; selectedCount++; }
496 //if there are names in this bin add to new list
497 if (newNames != "") {
498 newNames = newNames.substr(0, newNames.length()-1); //rip off extra comma
499 newList.push_back(newNames);
503 //print new listvector
504 if (newList.getNumBins() != 0) {
505 wroteSomething = true;
514 if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); }
515 outputTypes["list"].push_back(outputFileName); outputNames.push_back(outputFileName);
517 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your list file."); m->mothurOutEndLine();
522 catch(exception& e) {
523 m->errorOut(e, "GetGroupsCommand", "readList");
527 //**********************************************************************************************************************
528 int GetGroupsCommand::readName(){
530 string thisOutputDir = outputDir;
531 if (outputDir == "") { thisOutputDir += m->hasPath(namefile); }
532 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(namefile)) + "pick" + m->getExtension(namefile);
535 m->openOutputFile(outputFileName, out);
538 m->openInputFile(namefile, in);
539 string name, firstCol, secondCol;
541 bool wroteSomething = false;
542 int selectedCount = 0;
545 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
547 in >> firstCol; m->gobble(in);
550 vector<string> parsedNames;
551 m->splitAtComma(secondCol, parsedNames);
553 vector<string> validSecond; validSecond.clear();
554 for (int i = 0; i < parsedNames.size(); i++) {
555 if (names.count(parsedNames[i]) != 0) {
556 validSecond.push_back(parsedNames[i]);
560 selectedCount += validSecond.size();
562 //if the name in the first column is in the set then print it and any other names in second column also in set
563 if (names.count(firstCol) != 0) {
565 wroteSomething = true;
567 out << firstCol << '\t';
569 //you know you have at least one valid second since first column is valid
570 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
571 out << validSecond[validSecond.size()-1] << endl;
573 //make first name in set you come to first column and then add the remaining names to second column
576 //you want part of this row
577 if (validSecond.size() != 0) {
579 wroteSomething = true;
581 out << validSecond[0] << '\t';
583 //you know you have at least one valid second since first column is valid
584 for (int i = 0; i < validSecond.size()-1; i++) { out << validSecond[i] << ','; }
585 out << validSecond[validSecond.size()-1] << endl;
594 if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); }
595 outputTypes["name"].push_back(outputFileName); outputNames.push_back(outputFileName);
597 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your name file."); m->mothurOutEndLine();
601 catch(exception& e) {
602 m->errorOut(e, "GetGroupsCommand", "readName");
607 //**********************************************************************************************************************
608 int GetGroupsCommand::readGroup(){
610 string thisOutputDir = outputDir;
611 if (outputDir == "") { thisOutputDir += m->hasPath(groupfile); }
612 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "pick" + m->getExtension(groupfile);
615 m->openOutputFile(outputFileName, out);
618 m->openInputFile(groupfile, in);
621 bool wroteSomething = false;
622 int selectedCount = 0;
625 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
627 in >> name; //read from first column
628 in >> group; //read from second column
630 //if this name is in the accnos file
631 if (names.count(name) != 0) {
632 wroteSomething = true;
633 out << name << '\t' << group << endl;
642 if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); }
643 outputTypes["group"].push_back(outputFileName); outputNames.push_back(outputFileName);
645 m->mothurOut("Selected " + toString(selectedCount) + " sequences from your group file."); m->mothurOutEndLine();
649 catch(exception& e) {
650 m->errorOut(e, "GetGroupsCommand", "readGroup");
654 //**********************************************************************************************************************
655 int GetGroupsCommand::readTax(){
657 string thisOutputDir = outputDir;
658 if (outputDir == "") { thisOutputDir += m->hasPath(taxfile); }
659 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(taxfile)) + "pick" + m->getExtension(taxfile);
661 m->openOutputFile(outputFileName, out);
664 m->openInputFile(taxfile, in);
667 bool wroteSomething = false;
670 if (m->control_pressed) { in.close(); out.close(); m->mothurRemove(outputFileName); return 0; }
672 in >> name; //read from first column
673 in >> tax; //read from second column
675 //if this name is in the accnos file
676 if (names.count(name) != 0) {
677 wroteSomething = true;
678 out << name << '\t' << tax << endl;
686 if (wroteSomething == false) { m->mothurOut("Your file does NOT contain sequences from the groups you wish to get."); m->mothurOutEndLine(); }
687 outputTypes["taxonomy"].push_back(outputFileName); outputNames.push_back(outputFileName);
691 catch(exception& e) {
692 m->errorOut(e, "GetGroupsCommand", "readTax");
696 //**********************************************************************************************************************
697 void GetGroupsCommand::readAccnos(){
702 m->openInputFile(accnosfile, in);
708 Groups.push_back(name);
717 catch(exception& e) {
718 m->errorOut(e, "GetGroupsCommand", "readAccnos");
722 //**********************************************************************************************************************
723 int GetGroupsCommand::fillNames(){
725 vector<string> seqs = groupMap->getNamesSeqs();
727 for (int i = 0; i < seqs.size(); i++) {
729 if (m->control_pressed) { return 0; }
731 string group = groupMap->getGroup(seqs[i]);
733 if (m->inUsersGroups(group, Groups)) {
734 names.insert(seqs[i]);
740 catch(exception& e) {
741 m->errorOut(e, "GetGroupsCommand", "fillNames");
746 //**********************************************************************************************************************