2 * splitabundcommand.cpp
5 * Created by westcott on 5/17/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "splitabundcommand.h"
12 //**********************************************************************************************************************
13 vector<string> SplitAbundCommand::setParameters(){
15 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
16 CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
17 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
18 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
19 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
20 CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "",false,true); parameters.push_back(pcutoff);
21 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22 CommandParameter paccnos("accnos", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(paccnos);
23 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26 vector<string> myArray;
27 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
31 m->errorOut(e, "SplitAbundCommand", "setParameters");
35 //**********************************************************************************************************************
36 string SplitAbundCommand::getHelpString(){
38 string helpString = "";
39 helpString += "The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n";
40 helpString += "The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n";
41 helpString += "The fasta and a list or name parameter are required, and you must provide a cutoff value.\n";
42 helpString += "The cutoff parameter is used to qualify what is abundant and rare.\n";
43 helpString += "The group parameter allows you to parse a group file into rare and abundant groups.\n";
44 helpString += "The label parameter is used to read specific labels in your listfile you want to use.\n";
45 helpString += "The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n";
46 helpString += "The groups parameter allows you to parse the files into rare and abundant files by group. \n";
47 helpString += "For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files. \n";
48 helpString += "If you want .abund and .rare files for all groups, set groups=all. \n";
49 helpString += "The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n";
50 helpString += "Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n";
51 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n";
55 m->errorOut(e, "SplitAbundCommand", "getHelpString");
59 //**********************************************************************************************************************
60 string SplitAbundCommand::getOutputFileNameTag(string type, string inputName=""){
62 string outputFileName = "";
63 map<string, vector<string> >::iterator it;
65 //is this a type this command creates
66 it = outputTypes.find(type);
67 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
69 if (type == "fasta") { outputFileName = "fasta"; }
70 else if (type == "list") { outputFileName = "list"; }
71 else if (type == "name") { outputFileName = "names"; }
72 else if (type == "group") { outputFileName = "groups"; }
73 else if (type == "accnos") { outputFileName = "accnos"; }
74 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
76 return outputFileName;
79 m->errorOut(e, "SplitAbundCommand", "getOutputFileNameTag");
83 //**********************************************************************************************************************
84 SplitAbundCommand::SplitAbundCommand(){
86 abort = true; calledHelp = true;
88 vector<string> tempOutNames;
89 outputTypes["list"] = tempOutNames;
90 outputTypes["name"] = tempOutNames;
91 outputTypes["accnos"] = tempOutNames;
92 outputTypes["group"] = tempOutNames;
93 outputTypes["fasta"] = tempOutNames;
96 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
100 //**********************************************************************************************************************
101 SplitAbundCommand::SplitAbundCommand(string option) {
103 abort = false; calledHelp = false;
106 //allow user to run help
107 if(option == "help") { help(); abort = true; calledHelp = true; }
108 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
110 vector<string> myArray = setParameters();
112 OptionParser parser(option);
113 map<string, string> parameters = parser.getParameters();
115 ValidParameters validParameter;
116 map<string, string>::iterator it;
118 //check to make sure all parameters are valid for command
119 for (it = parameters.begin(); it != parameters.end(); it++) {
120 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
123 //initialize outputTypes
124 vector<string> tempOutNames;
125 outputTypes["list"] = tempOutNames;
126 outputTypes["name"] = tempOutNames;
127 outputTypes["accnos"] = tempOutNames;
128 outputTypes["group"] = tempOutNames;
129 outputTypes["fasta"] = tempOutNames;
131 //if the user changes the input directory command factory will send this info to us in the output parameter
132 string inputDir = validParameter.validFile(parameters, "inputdir", false);
133 if (inputDir == "not found"){ inputDir = ""; }
136 it = parameters.find("list");
137 //user has given a template file
138 if(it != parameters.end()){
139 path = m->hasPath(it->second);
140 //if the user has not given a path then, add inputdir. else leave path alone.
141 if (path == "") { parameters["list"] = inputDir + it->second; }
144 it = parameters.find("group");
145 //user has given a template file
146 if(it != parameters.end()){
147 path = m->hasPath(it->second);
148 //if the user has not given a path then, add inputdir. else leave path alone.
149 if (path == "") { parameters["group"] = inputDir + it->second; }
152 it = parameters.find("fasta");
153 //user has given a template file
154 if(it != parameters.end()){
155 path = m->hasPath(it->second);
156 //if the user has not given a path then, add inputdir. else leave path alone.
157 if (path == "") { parameters["fasta"] = inputDir + it->second; }
160 it = parameters.find("name");
161 //user has given a template file
162 if(it != parameters.end()){
163 path = m->hasPath(it->second);
164 //if the user has not given a path then, add inputdir. else leave path alone.
165 if (path == "") { parameters["name"] = inputDir + it->second; }
171 //if the user changes the output directory command factory will send this info to us in the output parameter
172 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
174 //check for required parameters
175 listfile = validParameter.validFile(parameters, "list", true);
176 if (listfile == "not open") { abort = true; }
177 else if (listfile == "not found") { listfile = ""; }
178 else{ inputFile = listfile; m->setListFile(listfile); }
180 namefile = validParameter.validFile(parameters, "name", true);
181 if (namefile == "not open") { abort = true; }
182 else if (namefile == "not found") { namefile = ""; }
183 else{ inputFile = namefile; m->setNameFile(namefile); }
185 fastafile = validParameter.validFile(parameters, "fasta", true);
186 if (fastafile == "not open") { abort = true; }
187 else if (fastafile == "not found") {
188 fastafile = m->getFastaFile();
189 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
190 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
191 }else { m->setFastaFile(fastafile); }
193 groupfile = validParameter.validFile(parameters, "group", true);
194 if (groupfile == "not open") { groupfile = ""; abort = true; }
195 else if (groupfile == "not found") { groupfile = ""; }
197 groupMap = new GroupMap(groupfile);
199 int error = groupMap->readMap();
200 if (error == 1) { abort = true; }
201 m->setGroupFile(groupfile);
204 groups = validParameter.validFile(parameters, "groups", false);
205 if (groups == "not found") { groups = ""; }
206 else if (groups == "all") {
207 if (groupfile != "") { Groups = groupMap->getNamesOfGroups(); }
208 else { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; }
210 m->splitAtDash(groups, Groups);
213 if ((groupfile == "") && (groups != "")) { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; Groups.clear(); }
215 //do you have all files needed
216 if ((listfile == "") && (namefile == "")) {
217 namefile = m->getNameFile();
218 if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
220 listfile = m->getListFile();
221 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
222 else { m->mothurOut("You have no current list or namefile and the list or name parameter is required."); m->mothurOutEndLine(); abort = true; }
226 //check for optional parameter and set defaults
227 // ...at some point should added some additional type checking...
228 label = validParameter.validFile(parameters, "label", false);
229 if (label == "not found") { label = ""; allLines = 1; }
231 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
232 else { allLines = 1; }
235 string temp = validParameter.validFile(parameters, "accnos", false); if (temp == "not found") { temp = "F"; }
236 accnos = m->isTrue(temp);
238 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0"; }
239 m->mothurConvert(temp, cutoff);
241 if (cutoff == 0) { m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
245 catch(exception& e) {
246 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
250 //**********************************************************************************************************************
251 SplitAbundCommand::~SplitAbundCommand(){
252 if (groupfile != "") { delete groupMap; }
254 //**********************************************************************************************************************
255 int SplitAbundCommand::execute(){
258 if (abort == true) { if (calledHelp) { return 0; } return 2; }
260 if (listfile != "") { //you are using a listfile to determine abundance
261 if (outputDir == "") { outputDir = m->hasPath(listfile); }
263 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
264 set<string> processedLabels;
265 set<string> userLabels = labels;
267 input = new InputData(listfile, "list");
268 list = input->getListVector();
269 string lastLabel = list->getLabel();
271 //do you have a namefile or do we need to similate one?
272 if (namefile != "") { readNamesFile(); }
273 else { createNameMap(list); }
275 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
277 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
279 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
281 if(allLines == 1 || labels.count(list->getLabel()) == 1){
283 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
286 processedLabels.insert(list->getLabel());
287 userLabels.erase(list->getLabel());
290 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
291 string saveLabel = list->getLabel();
294 list = input->getListVector(lastLabel); //get new list vector to process
296 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
299 processedLabels.insert(list->getLabel());
300 userLabels.erase(list->getLabel());
302 //restore real lastlabel to save below
303 list->setLabel(saveLabel);
307 lastLabel = list->getLabel();
310 list = input->getListVector(); //get new list vector to process
313 if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
315 //output error messages about any remaining user labels
316 set<string>::iterator it;
317 bool needToRun = false;
318 for (it = userLabels.begin(); it != userLabels.end(); it++) {
319 m->mothurOut("Your file does not include the label " + *it);
320 if (processedLabels.count(lastLabel) != 1) {
321 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
324 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
329 if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
331 //run last label if you need to
332 if (needToRun == true) {
333 if (list != NULL) { delete list; }
334 list = input->getListVector(lastLabel); //get new list vector to process
336 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
344 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
346 }else { //you are using the namefile to determine abundance
347 if (outputDir == "") { outputDir = m->hasPath(namefile); }
353 if (groupfile != "") { parseGroup(tag); }
354 if (accnos) { writeAccnos(tag); }
355 if (fastafile != "") { parseFasta(tag); }
358 //set fasta file as new current fastafile
360 itTypes = outputTypes.find("fasta");
361 if (itTypes != outputTypes.end()) {
362 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
365 itTypes = outputTypes.find("name");
366 if (itTypes != outputTypes.end()) {
367 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
370 itTypes = outputTypes.find("group");
371 if (itTypes != outputTypes.end()) {
372 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
375 itTypes = outputTypes.find("list");
376 if (itTypes != outputTypes.end()) {
377 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
380 itTypes = outputTypes.find("accnos");
381 if (itTypes != outputTypes.end()) {
382 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
385 m->mothurOutEndLine();
386 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
387 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
388 m->mothurOutEndLine();
392 catch(exception& e) {
393 m->errorOut(e, "SplitAbundCommand", "execute");
397 /**********************************************************************************************************************/
398 int SplitAbundCommand::splitList(ListVector* thisList) {
403 //get rareNames and abundNames
404 for (int i = 0; i < thisList->getNumBins(); i++) {
405 if (m->control_pressed) { return 0; }
407 string bin = thisList->get(i);
409 vector<string> names;
410 m->splitAtComma(bin, names); //parses bin into individual sequence names
411 int size = names.size();
413 if (size <= cutoff) {
414 for (int j = 0; j < names.size(); j++) { rareNames.insert(names[j]); }
416 for (int j = 0; j < names.size(); j++) { abundNames.insert(names[j]); }
421 string tag = thisList->getLabel() + ".";
423 writeList(thisList, tag);
425 if (groupfile != "") { parseGroup(tag); }
426 if (accnos) { writeAccnos(tag); }
427 if (fastafile != "") { parseFasta(tag); }
432 catch(exception& e) {
433 m->errorOut(e, "SplitAbundCommand", "splitList");
437 /**********************************************************************************************************************/
438 int SplitAbundCommand::writeList(ListVector* thisList, string tag) {
441 map<string, ofstream*> filehandles;
443 if (Groups.size() == 0) {
444 SAbundVector* sabund = new SAbundVector();
445 *sabund = thisList->getSAbundVector();
447 //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
448 // and don't have to store the bins until you are done with the whole vector, this save alot of space.
450 for (int i = 0; i <= sabund->getMaxRank(); i++) {
451 if (i > cutoff) { break; }
452 numRareBins += sabund->get(i);
454 int numAbundBins = thisList->getNumBins() - numRareBins;
460 string rare = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "rare." + getOutputFileNameTag("list");
461 m->openOutputFile(rare, rout);
462 outputNames.push_back(rare); outputTypes["list"].push_back(rare);
464 string abund = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "abund." + getOutputFileNameTag("list");
465 m->openOutputFile(abund, aout);
466 outputNames.push_back(abund); outputTypes["list"].push_back(abund);
468 if (rareNames.size() != 0) { rout << thisList->getLabel() << '\t' << numRareBins << '\t'; }
469 if (abundNames.size() != 0) { aout << thisList->getLabel() << '\t' << numAbundBins << '\t'; }
471 for (int i = 0; i < thisList->getNumBins(); i++) {
472 if (m->control_pressed) { break; }
474 string bin = list->get(i);
476 int size = m->getNumNames(bin);
478 if (size <= cutoff) { rout << bin << '\t'; }
479 else { aout << bin << '\t'; }
482 if (rareNames.size() != 0) { rout << endl; }
483 if (abundNames.size() != 0) { aout << endl; }
488 }else{ //parse names by abundance and group
489 string fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
492 //map<string, bool> wroteFile;
493 map<string, ofstream*> filehandles;
494 map<string, ofstream*>::iterator it3;
496 for (int i=0; i<Groups.size(); i++) {
498 filehandles[Groups[i]+".rare"] = temp;
499 temp2 = new ofstream;
500 filehandles[Groups[i]+".abund"] = temp2;
502 string rareGroupFileName = fileroot + Groups[i] + tag + ".rare." + getOutputFileNameTag("list");
503 string abundGroupFileName = fileroot + Groups[i] + tag + ".abund." + getOutputFileNameTag("list");
504 m->openOutputFile(rareGroupFileName, *(filehandles[Groups[i]+".rare"]));
505 m->openOutputFile(abundGroupFileName, *(filehandles[Groups[i]+".abund"]));
506 outputNames.push_back(rareGroupFileName); outputTypes["list"].push_back(rareGroupFileName);
507 outputNames.push_back(abundGroupFileName); outputTypes["list"].push_back(abundGroupFileName);
510 map<string, string> groupVector;
511 map<string, string>::iterator itGroup;
512 map<string, int> groupNumBins;
514 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
515 groupNumBins[it3->first] = 0;
516 groupVector[it3->first] = "";
519 for (int i = 0; i < thisList->getNumBins(); i++) {
520 if (m->control_pressed) { break; }
522 map<string, string> groupBins;
523 string bin = list->get(i);
525 vector<string> names;
526 m->splitAtComma(bin, names); //parses bin into individual sequence names
528 //parse bin into list of sequences in each group
529 for (int j = 0; j < names.size(); j++) {
531 if (rareNames.count(names[j]) != 0) { //you are a rare name
533 }else{ //you are a abund name
534 rareAbund = ".abund";
537 string group = groupMap->getGroup(names[j]);
539 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
540 itGroup = groupBins.find(group+rareAbund);
541 if(itGroup == groupBins.end()) {
542 groupBins[group+rareAbund] = names[j]; //add first name
543 groupNumBins[group+rareAbund]++;
544 }else{ //add another name
545 groupBins[group+rareAbund] += "," + names[j];
547 }else if(group == "not found") {
548 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
553 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
554 groupVector[itGroup->first] += itGroup->second + '\t';
559 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
560 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl; // label numBins listvector for that group
561 (*(filehandles[it3->first])).close();
569 catch(exception& e) {
570 m->errorOut(e, "SplitAbundCommand", "writeList");
574 /**********************************************************************************************************************/
575 int SplitAbundCommand::splitNames() { //namefile
583 m->openInputFile(namefile, in);
586 if (m->control_pressed) { break; }
588 string firstCol, secondCol;
589 in >> firstCol >> secondCol; m->gobble(in);
591 nameMap[firstCol] = secondCol;
593 int size = m->getNumNames(secondCol);
595 if (size <= cutoff) {
596 rareNames.insert(firstCol);
598 abundNames.insert(firstCol);
606 catch(exception& e) {
607 m->errorOut(e, "SplitAbundCommand", "splitNames");
611 /**********************************************************************************************************************/
612 int SplitAbundCommand::readNamesFile() {
616 m->openInputFile(namefile, in);
619 if (m->control_pressed) { break; }
621 string firstCol, secondCol;
622 in >> firstCol >> secondCol; m->gobble(in);
624 nameMap[firstCol] = secondCol;
631 catch(exception& e) {
632 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
636 /**********************************************************************************************************************/
637 int SplitAbundCommand::createNameMap(ListVector* thisList) {
640 if (thisList != NULL) {
641 for (int i = 0; i < thisList->getNumBins(); i++) {
642 if (m->control_pressed) { return 0; }
644 string bin = thisList->get(i);
646 vector<string> names;
647 m->splitAtComma(bin, names); //parses bin into individual sequence names
649 for (int j = 0; j < names.size(); j++) { nameMap[names[j]] = names[j]; }
655 catch(exception& e) {
656 m->errorOut(e, "SplitAbundCommand", "createNameMap");
660 /**********************************************************************************************************************/
661 int SplitAbundCommand::writeNames() { //namefile
664 map<string, ofstream*> filehandles;
666 if (Groups.size() == 0) {
670 string rare = outputDir + m->getRootName(m->getSimpleName(namefile)) + "rare." + getOutputFileNameTag("name");
671 m->openOutputFile(rare, rout);
672 outputNames.push_back(rare); outputTypes["name"].push_back(rare);
674 string abund = outputDir + m->getRootName(m->getSimpleName(namefile)) + "abund." + getOutputFileNameTag("name");
675 m->openOutputFile(abund, aout);
676 outputNames.push_back(abund); outputTypes["name"].push_back(abund);
678 if (rareNames.size() != 0) {
679 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
680 rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
685 if (abundNames.size() != 0) {
686 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
687 aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
692 }else{ //parse names by abundance and group
693 string fileroot = outputDir + m->getRootName(m->getSimpleName(namefile));
696 map<string, ofstream*> filehandles;
697 map<string, ofstream*>::iterator it3;
699 for (int i=0; i<Groups.size(); i++) {
701 filehandles[Groups[i]+".rare"] = temp;
702 temp2 = new ofstream;
703 filehandles[Groups[i]+".abund"] = temp2;
705 string rareGroupFileName = fileroot + Groups[i] + ".rare." + getOutputFileNameTag("name");
706 string abundGroupFileName = fileroot + Groups[i] + ".abund." + getOutputFileNameTag("name");
707 m->openOutputFile(rareGroupFileName, *(filehandles[Groups[i]+".rare"]));
708 m->openOutputFile(abundGroupFileName, *(filehandles[Groups[i]+".abund"]));
711 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
712 vector<string> names;
713 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
716 if (rareNames.count(itName->first) != 0) { //you are a rare name
718 }else{ //you are a abund name
719 rareAbund = ".abund";
722 map<string, string> outputStrings;
723 map<string, string>::iterator itout;
724 for (int i = 0; i < names.size(); i++) {
726 string group = groupMap->getGroup(names[i]);
728 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
729 itout = outputStrings.find(group+rareAbund);
730 if (itout == outputStrings.end()) {
731 outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
732 }else { outputStrings[group+rareAbund] += "," + names[i]; }
733 }else if(group == "not found") {
734 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
738 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { *(filehandles[itout->first]) << itout->second << endl; }
742 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
743 (*(filehandles[it3->first])).close();
744 outputNames.push_back(fileroot + it3->first + "." + getOutputFileNameTag("name")); outputTypes["name"].push_back(fileroot + it3->first + "." + getOutputFileNameTag("name"));
752 catch(exception& e) {
753 m->errorOut(e, "SplitAbundCommand", "writeNames");
757 /**********************************************************************************************************************/
758 //just write the unique names - if a namesfile is given
759 int SplitAbundCommand::writeAccnos(string tag) {
762 map<string, ofstream*> filehandles;
764 if (Groups.size() == 0) {
769 string rare = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "rare." + getOutputFileNameTag("accnos");
770 m->openOutputFile(rare, rout);
771 outputNames.push_back(rare); outputTypes["accnos"].push_back(rare);
773 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
774 rout << (*itRare) << endl;
778 string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "abund." + getOutputFileNameTag("accnos");
779 m->openOutputFile(abund, aout);
780 outputNames.push_back(abund); outputTypes["accnos"].push_back(abund);
782 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
783 aout << (*itAbund) << endl;
787 }else{ //parse names by abundance and group
788 string fileroot = outputDir + m->getRootName(m->getSimpleName(inputFile));
791 map<string, ofstream*> filehandles;
792 map<string, ofstream*>::iterator it3;
794 for (int i=0; i<Groups.size(); i++) {
796 filehandles[Groups[i]+".rare"] = temp;
797 temp2 = new ofstream;
798 filehandles[Groups[i]+".abund"] = temp2;
800 m->openOutputFile(fileroot + tag + Groups[i] + ".rare." + getOutputFileNameTag("accnos"), *(filehandles[Groups[i]+".rare"]));
801 m->openOutputFile(fileroot + tag + Groups[i] + ".abund." + getOutputFileNameTag("accnos"), *(filehandles[Groups[i]+".abund"]));
805 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
806 string group = groupMap->getGroup(*itRare);
808 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
809 *(filehandles[group+".rare"]) << *itRare << endl;
814 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
815 string group = groupMap->getGroup(*itAbund);
817 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
818 *(filehandles[group+".abund"]) << *itAbund << endl;
823 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
824 (*(filehandles[it3->first])).close();
825 outputNames.push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("accnos")); outputTypes["accnos"].push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("accnos"));
833 catch(exception& e) {
834 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
838 /**********************************************************************************************************************/
839 int SplitAbundCommand::parseGroup(string tag) { //namefile
842 map<string, ofstream*> filehandles;
844 if (Groups.size() == 0) {
848 string rare = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "rare." + getOutputFileNameTag("group");
849 m->openOutputFile(rare, rout);
850 outputNames.push_back(rare); outputTypes["group"].push_back(rare);
852 string abund = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "abund." + getOutputFileNameTag("group");
854 m->openOutputFile(abund, aout);
855 outputNames.push_back(abund); outputTypes["group"].push_back(abund);
857 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
858 vector<string> names;
859 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
861 for (int i = 0; i < names.size(); i++) {
863 string group = groupMap->getGroup(names[i]);
865 if (group == "not found") {
866 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
868 if (rareNames.count(itName->first) != 0) { //you are a rare name
869 rout << names[i] << '\t' << group << endl;
870 }else{ //you are a abund name
871 aout << names[i] << '\t' << group << endl;
880 }else{ //parse names by abundance and group
881 string fileroot = outputDir + m->getRootName(m->getSimpleName(groupfile));
884 map<string, ofstream*> filehandles;
885 map<string, ofstream*>::iterator it3;
887 for (int i=0; i<Groups.size(); i++) {
889 filehandles[Groups[i]+".rare"] = temp;
890 temp2 = new ofstream;
891 filehandles[Groups[i]+".abund"] = temp2;
893 m->openOutputFile(fileroot + tag + Groups[i] + ".rare." + getOutputFileNameTag("group"), *(filehandles[Groups[i]+".rare"]));
894 m->openOutputFile(fileroot + tag + Groups[i] + ".abund." + getOutputFileNameTag("group"), *(filehandles[Groups[i]+".abund"]));
897 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
898 vector<string> names;
899 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
902 if (rareNames.count(itName->first) != 0) { //you are a rare name
904 }else{ //you are a abund name
905 rareAbund = ".abund";
908 for (int i = 0; i < names.size(); i++) {
910 string group = groupMap->getGroup(names[i]);
912 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
913 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
918 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
919 (*(filehandles[it3->first])).close();
920 outputNames.push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("group")); outputTypes["group"].push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("group"));
928 catch(exception& e) {
929 m->errorOut(e, "SplitAbundCommand", "parseGroups");
933 /**********************************************************************************************************************/
934 int SplitAbundCommand::parseFasta(string tag) { //namefile
937 map<string, ofstream*> filehandles;
939 if (Groups.size() == 0) {
943 string rare = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "rare." + getOutputFileNameTag("fasta");
944 m->openOutputFile(rare, rout);
945 outputNames.push_back(rare); outputTypes["fasta"].push_back(rare);
947 string abund = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "abund." + getOutputFileNameTag("fasta");
948 m->openOutputFile(abund, aout);
949 outputNames.push_back(abund); outputTypes["fasta"].push_back(abund);
953 m->openInputFile(fastafile, in);
956 if (m->control_pressed) { break; }
958 Sequence seq(in); m->gobble(in);
960 if (seq.getName() != "") {
962 map<string, string>::iterator itNames;
964 itNames = nameMap.find(seq.getName());
966 if (itNames == nameMap.end()) {
967 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
969 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
970 seq.printSequence(rout);
971 }else{ //you are a abund name
972 seq.printSequence(aout);
981 }else{ //parse names by abundance and group
982 string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
985 map<string, ofstream*> filehandles;
986 map<string, ofstream*>::iterator it3;
988 for (int i=0; i<Groups.size(); i++) {
990 filehandles[Groups[i]+".rare"] = temp;
991 temp2 = new ofstream;
992 filehandles[Groups[i]+".abund"] = temp2;
994 m->openOutputFile(fileroot + tag + Groups[i] + ".rare." + getOutputFileNameTag("fasta"), *(filehandles[Groups[i]+".rare"]));
995 m->openOutputFile(fileroot + tag + Groups[i] + ".abund." + getOutputFileNameTag("fasta"), *(filehandles[Groups[i]+".abund"]));
1000 m->openInputFile(fastafile, in);
1003 if (m->control_pressed) { break; }
1005 Sequence seq(in); m->gobble(in);
1007 if (seq.getName() != "") {
1008 map<string, string>::iterator itNames = nameMap.find(seq.getName());
1010 if (itNames == nameMap.end()) {
1011 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
1013 vector<string> names;
1014 m->splitAtComma(itNames->second, names); //parses bin into individual sequence names
1017 if (rareNames.count(itNames->first) != 0) { //you are a rare name
1018 rareAbund = ".rare";
1019 }else{ //you are a abund name
1020 rareAbund = ".abund";
1023 for (int i = 0; i < names.size(); i++) {
1025 string group = groupMap->getGroup(seq.getName());
1027 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1028 seq.printSequence(*(filehandles[group+rareAbund]));
1029 }else if(group == "not found") {
1030 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
1038 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1039 (*(filehandles[it3->first])).close();
1040 outputNames.push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("fasta")); outputTypes["fasta"].push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("fasta"));
1048 catch(exception& e) {
1049 m->errorOut(e, "SplitAbundCommand", "parseFasta");
1053 /**********************************************************************************************************************/