2 * splitabundcommand.cpp
5 * Created by westcott on 5/17/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "splitabundcommand.h"
12 //**********************************************************************************************************************
13 vector<string> SplitAbundCommand::getValidParameters(){
15 string Array[] = {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"};
16 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
20 m->errorOut(e, "SplitAbundCommand", "getValidParameters");
24 //**********************************************************************************************************************
25 SplitAbundCommand::SplitAbundCommand(){
28 //initialize outputTypes
29 vector<string> tempOutNames;
30 outputTypes["list"] = tempOutNames;
31 outputTypes["name"] = tempOutNames;
32 outputTypes["accnos"] = tempOutNames;
33 outputTypes["group"] = tempOutNames;
34 outputTypes["fasta"] = tempOutNames;
37 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
41 //**********************************************************************************************************************
42 vector<string> SplitAbundCommand::getRequiredParameters(){
44 string Array[] = {"fasta","list","name","or"};
45 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
49 m->errorOut(e, "SplitAbundCommand", "getRequiredParameters");
53 //**********************************************************************************************************************
54 vector<string> SplitAbundCommand::getRequiredFiles(){
56 vector<string> myArray;
60 m->errorOut(e, "SplitAbundCommand", "getRequiredFiles");
64 //**********************************************************************************************************************
65 SplitAbundCommand::SplitAbundCommand(string option) {
70 //allow user to run help
71 if(option == "help") { help(); abort = true; }
74 //valid paramters for this command
75 string Array[] = {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"}; //
76 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
78 OptionParser parser(option);
79 map<string, string> parameters = parser.getParameters();
81 ValidParameters validParameter;
82 map<string, string>::iterator it;
84 //check to make sure all parameters are valid for command
85 for (it = parameters.begin(); it != parameters.end(); it++) {
86 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
89 //initialize outputTypes
90 vector<string> tempOutNames;
91 outputTypes["list"] = tempOutNames;
92 outputTypes["name"] = tempOutNames;
93 outputTypes["accnos"] = tempOutNames;
94 outputTypes["group"] = tempOutNames;
95 outputTypes["fasta"] = tempOutNames;
97 //if the user changes the input directory command factory will send this info to us in the output parameter
98 string inputDir = validParameter.validFile(parameters, "inputdir", false);
99 if (inputDir == "not found"){ inputDir = ""; }
102 it = parameters.find("list");
103 //user has given a template file
104 if(it != parameters.end()){
105 path = m->hasPath(it->second);
106 //if the user has not given a path then, add inputdir. else leave path alone.
107 if (path == "") { parameters["list"] = inputDir + it->second; }
110 it = parameters.find("group");
111 //user has given a template file
112 if(it != parameters.end()){
113 path = m->hasPath(it->second);
114 //if the user has not given a path then, add inputdir. else leave path alone.
115 if (path == "") { parameters["group"] = inputDir + it->second; }
118 it = parameters.find("fasta");
119 //user has given a template file
120 if(it != parameters.end()){
121 path = m->hasPath(it->second);
122 //if the user has not given a path then, add inputdir. else leave path alone.
123 if (path == "") { parameters["fasta"] = inputDir + it->second; }
126 it = parameters.find("name");
127 //user has given a template file
128 if(it != parameters.end()){
129 path = m->hasPath(it->second);
130 //if the user has not given a path then, add inputdir. else leave path alone.
131 if (path == "") { parameters["name"] = inputDir + it->second; }
137 //if the user changes the output directory command factory will send this info to us in the output parameter
138 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
140 //check for required parameters
141 listfile = validParameter.validFile(parameters, "list", true);
142 if (listfile == "not open") { abort = true; }
143 else if (listfile == "not found") { listfile = ""; }
144 else{ inputFile = listfile; }
146 namefile = validParameter.validFile(parameters, "name", true);
147 if (namefile == "not open") { abort = true; }
148 else if (namefile == "not found") { namefile = ""; }
149 else{ inputFile = namefile; }
151 fastafile = validParameter.validFile(parameters, "fasta", true);
152 if (fastafile == "not open") { abort = true; }
153 else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
155 groupfile = validParameter.validFile(parameters, "group", true);
156 if (groupfile == "not open") { groupfile = ""; abort = true; }
157 else if (groupfile == "not found") { groupfile = ""; }
159 groupMap = new GroupMap(groupfile);
161 int error = groupMap->readMap();
162 if (error == 1) { abort = true; }
166 groups = validParameter.validFile(parameters, "groups", false);
167 if (groups == "not found") { groups = ""; }
168 else if (groups == "all") {
169 if (groupfile != "") { Groups = groupMap->namesOfGroups; }
170 else { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; }
172 m->splitAtDash(groups, Groups);
175 if ((groupfile == "") && (groups != "")) { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; Groups.clear(); }
177 //do you have all files needed
178 if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
180 //check for optional parameter and set defaults
181 // ...at some point should added some additional type checking...
182 label = validParameter.validFile(parameters, "label", false);
183 if (label == "not found") { label = ""; allLines = 1; }
185 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
186 else { allLines = 1; }
189 string temp = validParameter.validFile(parameters, "accnos", false); if (temp == "not found") { temp = "F"; }
190 accnos = m->isTrue(temp);
192 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0"; }
193 convert(temp, cutoff);
195 if (cutoff == 0) { m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
200 catch(exception& e) {
201 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
205 //**********************************************************************************************************************
206 void SplitAbundCommand::help(){
208 m->mothurOut("The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n");
209 m->mothurOut("The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n");
210 m->mothurOut("The fasta and a list or name parameter are required, and you must provide a cutoff value.\n");
211 m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n");
212 m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n");
213 m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n");
214 m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n");
215 m->mothurOut("The groups parameter allows you to parse the files into rare and abundant files by group. \n");
216 m->mothurOut("For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files. \n");
217 m->mothurOut("If you want .abund and .rare files for all groups, set groups=all. \n");
218 m->mothurOut("The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n");
219 m->mothurOut("Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n");
220 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
223 catch(exception& e) {
224 m->errorOut(e, "SplitAbundCommand", "help");
228 //**********************************************************************************************************************
229 SplitAbundCommand::~SplitAbundCommand(){
230 if (groupfile != "") { delete groupMap; }
232 //**********************************************************************************************************************
233 int SplitAbundCommand::execute(){
236 if (abort == true) { return 0; }
238 if (listfile != "") { //you are using a listfile to determine abundance
239 if (outputDir == "") { outputDir = m->hasPath(listfile); }
241 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
242 set<string> processedLabels;
243 set<string> userLabels = labels;
245 input = new InputData(listfile, "list");
246 list = input->getListVector();
247 string lastLabel = list->getLabel();
249 //do you have a namefile or do we need to similate one?
250 if (namefile != "") { readNamesFile(); }
251 else { createNameMap(list); }
253 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
255 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
257 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
259 if(allLines == 1 || labels.count(list->getLabel()) == 1){
261 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
264 processedLabels.insert(list->getLabel());
265 userLabels.erase(list->getLabel());
268 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
269 string saveLabel = list->getLabel();
272 list = input->getListVector(lastLabel); //get new list vector to process
274 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
277 processedLabels.insert(list->getLabel());
278 userLabels.erase(list->getLabel());
280 //restore real lastlabel to save below
281 list->setLabel(saveLabel);
285 lastLabel = list->getLabel();
288 list = input->getListVector(); //get new list vector to process
291 if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
293 //output error messages about any remaining user labels
294 set<string>::iterator it;
295 bool needToRun = false;
296 for (it = userLabels.begin(); it != userLabels.end(); it++) {
297 m->mothurOut("Your file does not include the label " + *it);
298 if (processedLabels.count(lastLabel) != 1) {
299 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
302 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
307 if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
309 //run last label if you need to
310 if (needToRun == true) {
311 if (list != NULL) { delete list; }
312 list = input->getListVector(lastLabel); //get new list vector to process
314 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
322 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
324 }else { //you are using the namefile to determine abundance
325 if (outputDir == "") { outputDir = m->hasPath(namefile); }
331 if (groupfile != "") { parseGroup(tag); }
332 if (accnos) { writeAccnos(tag); }
333 if (fastafile != "") { parseFasta(tag); }
336 m->mothurOutEndLine();
337 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
338 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
339 m->mothurOutEndLine();
343 catch(exception& e) {
344 m->errorOut(e, "SplitAbundCommand", "execute");
348 /**********************************************************************************************************************/
349 int SplitAbundCommand::splitList(ListVector* thisList) {
354 //get rareNames and abundNames
355 for (int i = 0; i < thisList->getNumBins(); i++) {
356 if (m->control_pressed) { return 0; }
358 string bin = thisList->get(i);
360 vector<string> names;
361 m->splitAtComma(bin, names); //parses bin into individual sequence names
362 int size = names.size();
364 if (size <= cutoff) {
365 for (int j = 0; j < names.size(); j++) { rareNames.insert(names[j]); }
367 for (int j = 0; j < names.size(); j++) { abundNames.insert(names[j]); }
372 string tag = thisList->getLabel() + ".";
374 writeList(thisList, tag);
376 if (groupfile != "") { parseGroup(tag); }
377 if (accnos) { writeAccnos(tag); }
378 if (fastafile != "") { parseFasta(tag); }
383 catch(exception& e) {
384 m->errorOut(e, "SplitAbundCommand", "splitList");
388 /**********************************************************************************************************************/
389 int SplitAbundCommand::writeList(ListVector* thisList, string tag) {
392 map<string, ofstream*> filehandles;
394 if (Groups.size() == 0) {
395 SAbundVector* sabund = new SAbundVector();
396 *sabund = thisList->getSAbundVector();
398 //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
399 // and don't have to store the bins until you are done with the whole vector, this save alot of space.
401 for (int i = 0; i <= sabund->getMaxRank(); i++) {
402 if (i > cutoff) { break; }
403 numRareBins += sabund->get(i);
405 int numAbundBins = thisList->getNumBins() - numRareBins;
411 string rare = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "rare.list";
412 m->openOutputFile(rare, rout);
413 outputNames.push_back(rare); outputTypes["list"].push_back(rare);
415 string abund = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "abund.list";
416 m->openOutputFile(abund, aout);
417 outputNames.push_back(abund); outputTypes["list"].push_back(abund);
419 if (rareNames.size() != 0) { rout << thisList->getLabel() << '\t' << numRareBins << '\t'; }
420 if (abundNames.size() != 0) { aout << thisList->getLabel() << '\t' << numAbundBins << '\t'; }
422 for (int i = 0; i < thisList->getNumBins(); i++) {
423 if (m->control_pressed) { break; }
425 string bin = list->get(i);
427 int size = m->getNumNames(bin);
429 if (size <= cutoff) { rout << bin << '\t'; }
430 else { aout << bin << '\t'; }
433 if (rareNames.size() != 0) { rout << endl; }
434 if (abundNames.size() != 0) { aout << endl; }
439 }else{ //parse names by abundance and group
440 string fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
443 //map<string, bool> wroteFile;
444 map<string, ofstream*> filehandles;
445 map<string, ofstream*>::iterator it3;
447 for (int i=0; i<Groups.size(); i++) {
449 filehandles[Groups[i]+".rare"] = temp;
450 temp2 = new ofstream;
451 filehandles[Groups[i]+".abund"] = temp2;
453 m->openOutputFile(fileroot + Groups[i] + tag + ".rare.list", *(filehandles[Groups[i]+".rare"]));
454 m->openOutputFile(fileroot + Groups[i] + tag + ".abund.list", *(filehandles[Groups[i]+".abund"]));
455 outputNames.push_back(fileroot + Groups[i] + tag + ".rare.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".rare.list");
456 outputNames.push_back(fileroot + Groups[i] + tag + ".abund.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".abund.list");
459 map<string, string> groupVector;
460 map<string, string>::iterator itGroup;
461 map<string, int> groupNumBins;
463 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
464 groupNumBins[it3->first] = 0;
465 groupVector[it3->first] = "";
468 for (int i = 0; i < thisList->getNumBins(); i++) {
469 if (m->control_pressed) { break; }
471 map<string, string> groupBins;
472 string bin = list->get(i);
474 vector<string> names;
475 m->splitAtComma(bin, names); //parses bin into individual sequence names
477 //parse bin into list of sequences in each group
478 for (int j = 0; j < names.size(); j++) {
480 if (rareNames.count(names[j]) != 0) { //you are a rare name
482 }else{ //you are a abund name
483 rareAbund = ".abund";
486 string group = groupMap->getGroup(names[j]);
488 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
489 itGroup = groupBins.find(group+rareAbund);
490 if(itGroup == groupBins.end()) {
491 groupBins[group+rareAbund] = names[j]; //add first name
492 groupNumBins[group+rareAbund]++;
493 }else{ //add another name
494 groupBins[group+rareAbund] += "," + names[j];
496 }else if(group == "not found") {
497 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
502 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
503 groupVector[itGroup->first] += itGroup->second + '\t';
508 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
509 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl; // label numBins listvector for that group
510 (*(filehandles[it3->first])).close();
518 catch(exception& e) {
519 m->errorOut(e, "SplitAbundCommand", "writeList");
523 /**********************************************************************************************************************/
524 int SplitAbundCommand::splitNames() { //namefile
532 m->openInputFile(namefile, in);
535 if (m->control_pressed) { break; }
537 string firstCol, secondCol;
538 in >> firstCol >> secondCol; m->gobble(in);
540 nameMap[firstCol] = secondCol;
542 int size = m->getNumNames(secondCol);
544 if (size <= cutoff) {
545 rareNames.insert(firstCol);
547 abundNames.insert(firstCol);
555 catch(exception& e) {
556 m->errorOut(e, "SplitAbundCommand", "splitNames");
560 /**********************************************************************************************************************/
561 int SplitAbundCommand::readNamesFile() {
565 m->openInputFile(namefile, in);
568 if (m->control_pressed) { break; }
570 string firstCol, secondCol;
571 in >> firstCol >> secondCol; m->gobble(in);
573 nameMap[firstCol] = secondCol;
580 catch(exception& e) {
581 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
585 /**********************************************************************************************************************/
586 int SplitAbundCommand::createNameMap(ListVector* thisList) {
589 if (thisList != NULL) {
590 for (int i = 0; i < thisList->getNumBins(); i++) {
591 if (m->control_pressed) { return 0; }
593 string bin = thisList->get(i);
595 vector<string> names;
596 m->splitAtComma(bin, names); //parses bin into individual sequence names
598 for (int j = 0; j < names.size(); j++) { nameMap[names[j]] = names[j]; }
604 catch(exception& e) {
605 m->errorOut(e, "SplitAbundCommand", "createNameMap");
609 /**********************************************************************************************************************/
610 int SplitAbundCommand::writeNames() { //namefile
613 map<string, ofstream*> filehandles;
615 if (Groups.size() == 0) {
619 string rare = outputDir + m->getRootName(m->getSimpleName(namefile)) + "rare.names";
620 m->openOutputFile(rare, rout);
621 outputNames.push_back(rare); outputTypes["name"].push_back(rare);
623 string abund = outputDir + m->getRootName(m->getSimpleName(namefile)) + "abund.names";
624 m->openOutputFile(abund, aout);
625 outputNames.push_back(abund); outputTypes["name"].push_back(abund);
627 if (rareNames.size() != 0) {
628 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
629 rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
634 if (abundNames.size() != 0) {
635 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
636 aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
641 }else{ //parse names by abundance and group
642 string fileroot = outputDir + m->getRootName(m->getSimpleName(namefile));
645 map<string, ofstream*> filehandles;
646 map<string, ofstream*>::iterator it3;
648 for (int i=0; i<Groups.size(); i++) {
650 filehandles[Groups[i]+".rare"] = temp;
651 temp2 = new ofstream;
652 filehandles[Groups[i]+".abund"] = temp2;
654 m->openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"]));
655 m->openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"]));
658 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
659 vector<string> names;
660 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
663 if (rareNames.count(itName->first) != 0) { //you are a rare name
665 }else{ //you are a abund name
666 rareAbund = ".abund";
669 map<string, string> outputStrings;
670 map<string, string>::iterator itout;
671 for (int i = 0; i < names.size(); i++) {
673 string group = groupMap->getGroup(names[i]);
675 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
676 itout = outputStrings.find(group+rareAbund);
677 if (itout == outputStrings.end()) {
678 outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
679 }else { outputStrings[group+rareAbund] += "," + names[i]; }
680 }else if(group == "not found") {
681 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
685 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { *(filehandles[itout->first]) << itout->second << endl; }
689 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
690 (*(filehandles[it3->first])).close();
691 outputNames.push_back(fileroot + it3->first + ".names"); outputTypes["name"].push_back(fileroot + it3->first + ".names");
699 catch(exception& e) {
700 m->errorOut(e, "SplitAbundCommand", "writeNames");
704 /**********************************************************************************************************************/
705 //just write the unique names - if a namesfile is given
706 int SplitAbundCommand::writeAccnos(string tag) {
709 map<string, ofstream*> filehandles;
711 if (Groups.size() == 0) {
716 string rare = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "rare.accnos";
717 m->openOutputFile(rare, rout);
718 outputNames.push_back(rare); outputTypes["accnos"].push_back(rare);
720 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
721 rout << (*itRare) << endl;
725 string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "abund.accnos";
726 m->openOutputFile(abund, aout);
727 outputNames.push_back(abund); outputTypes["accnos"].push_back(abund);
729 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
730 aout << (*itAbund) << endl;
734 }else{ //parse names by abundance and group
735 string fileroot = outputDir + m->getRootName(m->getSimpleName(inputFile));
738 map<string, ofstream*> filehandles;
739 map<string, ofstream*>::iterator it3;
741 for (int i=0; i<Groups.size(); i++) {
743 filehandles[Groups[i]+".rare"] = temp;
744 temp2 = new ofstream;
745 filehandles[Groups[i]+".abund"] = temp2;
747 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"]));
748 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"]));
752 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
753 string group = groupMap->getGroup(*itRare);
755 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
756 *(filehandles[group+".rare"]) << *itRare << endl;
761 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
762 string group = groupMap->getGroup(*itAbund);
764 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
765 *(filehandles[group+".abund"]) << *itAbund << endl;
770 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
771 (*(filehandles[it3->first])).close();
772 outputNames.push_back(fileroot + tag + it3->first + ".accnos"); outputTypes["accnos"].push_back(fileroot + tag + it3->first + ".accnos");
780 catch(exception& e) {
781 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
785 /**********************************************************************************************************************/
786 int SplitAbundCommand::parseGroup(string tag) { //namefile
789 map<string, ofstream*> filehandles;
791 if (Groups.size() == 0) {
795 string rare = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "rare.groups";
796 m->openOutputFile(rare, rout);
797 outputNames.push_back(rare); outputTypes["group"].push_back(rare);
799 string abund = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "abund.groups";
800 m->openOutputFile(abund, aout);
801 outputNames.push_back(abund); outputTypes["group"].push_back(abund);
803 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
804 vector<string> names;
805 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
807 for (int i = 0; i < names.size(); i++) {
809 string group = groupMap->getGroup(names[i]);
811 if (group == "not found") {
812 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
814 if (rareNames.count(itName->first) != 0) { //you are a rare name
815 rout << names[i] << '\t' << group << endl;
816 }else{ //you are a abund name
817 aout << names[i] << '\t' << group << endl;
826 }else{ //parse names by abundance and group
827 string fileroot = outputDir + m->getRootName(m->getSimpleName(groupfile));
830 map<string, ofstream*> filehandles;
831 map<string, ofstream*>::iterator it3;
833 for (int i=0; i<Groups.size(); i++) {
835 filehandles[Groups[i]+".rare"] = temp;
836 temp2 = new ofstream;
837 filehandles[Groups[i]+".abund"] = temp2;
839 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"]));
840 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"]));
843 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
844 vector<string> names;
845 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
848 if (rareNames.count(itName->first) != 0) { //you are a rare name
850 }else{ //you are a abund name
851 rareAbund = ".abund";
854 for (int i = 0; i < names.size(); i++) {
856 string group = groupMap->getGroup(names[i]);
858 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
859 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
864 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
865 (*(filehandles[it3->first])).close();
866 outputNames.push_back(fileroot + tag + it3->first + ".groups"); outputTypes["group"].push_back(fileroot + tag + it3->first + ".groups");
874 catch(exception& e) {
875 m->errorOut(e, "SplitAbundCommand", "parseGroups");
879 /**********************************************************************************************************************/
880 int SplitAbundCommand::parseFasta(string tag) { //namefile
883 map<string, ofstream*> filehandles;
885 if (Groups.size() == 0) {
889 string rare = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "rare.fasta";
890 m->openOutputFile(rare, rout);
891 outputNames.push_back(rare); outputTypes["fasta"].push_back(rare);
893 string abund = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "abund.fasta";
894 m->openOutputFile(abund, aout);
895 outputNames.push_back(abund); outputTypes["fasta"].push_back(abund);
899 m->openInputFile(fastafile, in);
902 if (m->control_pressed) { break; }
904 Sequence seq(in); m->gobble(in);
906 if (seq.getName() != "") {
908 map<string, string>::iterator itNames;
910 itNames = nameMap.find(seq.getName());
912 if (itNames == nameMap.end()) {
913 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
915 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
916 seq.printSequence(rout);
917 }else{ //you are a abund name
918 seq.printSequence(aout);
927 }else{ //parse names by abundance and group
928 string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
931 map<string, ofstream*> filehandles;
932 map<string, ofstream*>::iterator it3;
934 for (int i=0; i<Groups.size(); i++) {
936 filehandles[Groups[i]+".rare"] = temp;
937 temp2 = new ofstream;
938 filehandles[Groups[i]+".abund"] = temp2;
940 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"]));
941 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"]));
946 m->openInputFile(fastafile, in);
949 if (m->control_pressed) { break; }
951 Sequence seq(in); m->gobble(in);
953 if (seq.getName() != "") {
954 map<string, string>::iterator itNames = nameMap.find(seq.getName());
956 if (itNames == nameMap.end()) {
957 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
959 vector<string> names;
960 m->splitAtComma(itNames->second, names); //parses bin into individual sequence names
963 if (rareNames.count(itNames->first) != 0) { //you are a rare name
965 }else{ //you are a abund name
966 rareAbund = ".abund";
969 for (int i = 0; i < names.size(); i++) {
971 string group = groupMap->getGroup(seq.getName());
973 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
974 seq.printSequence(*(filehandles[group+rareAbund]));
975 }else if(group == "not found") {
976 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
984 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
985 (*(filehandles[it3->first])).close();
986 outputNames.push_back(fileroot + tag + it3->first + ".fasta"); outputTypes["fasta"].push_back(fileroot + tag + it3->first + ".fasta");
994 catch(exception& e) {
995 m->errorOut(e, "SplitAbundCommand", "parseFasta");
999 /**********************************************************************************************************************/