2 * splitabundcommand.cpp
5 * Created by westcott on 5/17/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "splitabundcommand.h"
12 //**********************************************************************************************************************
13 SplitAbundCommand::SplitAbundCommand(string option) {
18 //allow user to run help
19 if(option == "help") { help(); abort = true; }
22 //valid paramters for this command
23 string Array[] = {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"}; //
24 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26 OptionParser parser(option);
27 map<string, string> parameters = parser.getParameters();
29 ValidParameters validParameter;
30 map<string, string>::iterator it;
32 //check to make sure all parameters are valid for command
33 for (it = parameters.begin(); it != parameters.end(); it++) {
34 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
37 //if the user changes the input directory command factory will send this info to us in the output parameter
38 string inputDir = validParameter.validFile(parameters, "inputdir", false);
39 if (inputDir == "not found"){ inputDir = ""; }
42 it = parameters.find("list");
43 //user has given a template file
44 if(it != parameters.end()){
45 path = m->hasPath(it->second);
46 //if the user has not given a path then, add inputdir. else leave path alone.
47 if (path == "") { parameters["list"] = inputDir + it->second; }
50 it = parameters.find("group");
51 //user has given a template file
52 if(it != parameters.end()){
53 path = m->hasPath(it->second);
54 //if the user has not given a path then, add inputdir. else leave path alone.
55 if (path == "") { parameters["group"] = inputDir + it->second; }
58 it = parameters.find("fasta");
59 //user has given a template file
60 if(it != parameters.end()){
61 path = m->hasPath(it->second);
62 //if the user has not given a path then, add inputdir. else leave path alone.
63 if (path == "") { parameters["fasta"] = inputDir + it->second; }
66 it = parameters.find("name");
67 //user has given a template file
68 if(it != parameters.end()){
69 path = m->hasPath(it->second);
70 //if the user has not given a path then, add inputdir. else leave path alone.
71 if (path == "") { parameters["name"] = inputDir + it->second; }
77 //if the user changes the output directory command factory will send this info to us in the output parameter
78 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
80 //check for required parameters
81 listfile = validParameter.validFile(parameters, "list", true);
82 if (listfile == "not open") { abort = true; }
83 else if (listfile == "not found") { listfile = ""; }
84 else{ inputFile = listfile; }
86 namefile = validParameter.validFile(parameters, "name", true);
87 if (namefile == "not open") { abort = true; }
88 else if (namefile == "not found") { namefile = ""; }
89 else{ inputFile = namefile; }
91 fastafile = validParameter.validFile(parameters, "fasta", true);
92 if (fastafile == "not open") { abort = true; }
93 else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
95 groupfile = validParameter.validFile(parameters, "group", true);
96 if (groupfile == "not open") { groupfile = ""; abort = true; }
97 else if (groupfile == "not found") { groupfile = ""; }
99 groupMap = new GroupMap(groupfile);
101 int error = groupMap->readMap();
102 if (error == 1) { abort = true; }
106 groups = validParameter.validFile(parameters, "groups", false);
107 if (groups == "not found") { groups = ""; }
108 else if (groups == "all") {
109 if (groupfile != "") { Groups = groupMap->namesOfGroups; }
110 else { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; }
112 m->splitAtDash(groups, Groups);
115 if ((groupfile == "") && (groups != "")) { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; Groups.clear(); }
117 //do you have all files needed
118 if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
120 //check for optional parameter and set defaults
121 // ...at some point should added some additional type checking...
122 label = validParameter.validFile(parameters, "label", false);
123 if (label == "not found") { label = ""; allLines = 1; }
125 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
126 else { allLines = 1; }
129 string temp = validParameter.validFile(parameters, "accnos", false); if (temp == "not found") { temp = "F"; }
130 accnos = m->isTrue(temp);
132 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0"; }
133 convert(temp, cutoff);
135 if (cutoff == 0) { m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
140 catch(exception& e) {
141 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
145 //**********************************************************************************************************************
146 void SplitAbundCommand::help(){
148 m->mothurOut("The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n");
149 m->mothurOut("The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n");
150 m->mothurOut("The fasta and a list or name parameter are required, and you must provide a cutoff value.\n");
151 m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n");
152 m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n");
153 m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n");
154 m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n");
155 m->mothurOut("The groups parameter allows you to parse the files into rare and abundant files by group. \n");
156 m->mothurOut("For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files. \n");
157 m->mothurOut("If you want .abund and .rare files for all groups, set groups=all. \n");
158 m->mothurOut("The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n");
159 m->mothurOut("Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n");
160 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
163 catch(exception& e) {
164 m->errorOut(e, "SplitAbundCommand", "help");
168 //**********************************************************************************************************************
169 SplitAbundCommand::~SplitAbundCommand(){
170 if (groupfile != "") { delete groupMap; }
172 //**********************************************************************************************************************
173 int SplitAbundCommand::execute(){
176 if (abort == true) { return 0; }
178 if (listfile != "") { //you are using a listfile to determine abundance
179 if (outputDir == "") { outputDir = m->hasPath(listfile); }
181 //remove old files so you can append later....
182 string fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
183 if (Groups.size() == 0) {
184 remove((fileroot + "rare.list").c_str());
185 remove((fileroot + "abund.list").c_str());
187 outputNames.push_back((fileroot + "rare.list"));
188 outputNames.push_back((fileroot + "abund.list"));
190 for (int i=0; i<Groups.size(); i++) {
191 remove((fileroot + Groups[i] + ".rare.list").c_str());
192 remove((fileroot + Groups[i] + ".abund.list").c_str());
194 outputNames.push_back((fileroot + Groups[i] + ".rare.list"));
195 outputNames.push_back((fileroot + Groups[i] + ".abund.list"));
199 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
200 set<string> processedLabels;
201 set<string> userLabels = labels;
203 input = new InputData(listfile, "list");
204 list = input->getListVector();
205 string lastLabel = list->getLabel();
207 //do you have a namefile or do we need to similate one?
208 if (namefile != "") { readNamesFile(); }
209 else { createNameMap(list); }
211 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
213 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
215 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
217 if(allLines == 1 || labels.count(list->getLabel()) == 1){
219 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
222 processedLabels.insert(list->getLabel());
223 userLabels.erase(list->getLabel());
226 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
227 string saveLabel = list->getLabel();
230 list = input->getListVector(lastLabel); //get new list vector to process
232 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
235 processedLabels.insert(list->getLabel());
236 userLabels.erase(list->getLabel());
238 //restore real lastlabel to save below
239 list->setLabel(saveLabel);
243 lastLabel = list->getLabel();
246 list = input->getListVector(); //get new list vector to process
249 if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
251 //output error messages about any remaining user labels
252 set<string>::iterator it;
253 bool needToRun = false;
254 for (it = userLabels.begin(); it != userLabels.end(); it++) {
255 m->mothurOut("Your file does not include the label " + *it);
256 if (processedLabels.count(lastLabel) != 1) {
257 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
260 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
265 if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
267 //run last label if you need to
268 if (needToRun == true) {
269 if (list != NULL) { delete list; }
270 list = input->getListVector(lastLabel); //get new list vector to process
272 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
280 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
282 }else { //you are using the namefile to determine abundance
283 if (outputDir == "") { outputDir = m->hasPath(namefile); }
289 if (groupfile != "") { parseGroup(tag); }
290 if (accnos) { writeAccnos(tag); }
291 if (fastafile != "") { parseFasta(tag); }
294 m->mothurOutEndLine();
295 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
296 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
297 m->mothurOutEndLine();
301 catch(exception& e) {
302 m->errorOut(e, "SplitAbundCommand", "execute");
306 /**********************************************************************************************************************/
307 int SplitAbundCommand::splitList(ListVector* thisList) {
312 //get rareNames and abundNames
313 for (int i = 0; i < thisList->getNumBins(); i++) {
314 if (m->control_pressed) { return 0; }
316 string bin = thisList->get(i);
318 vector<string> names;
319 m->splitAtComma(bin, names); //parses bin into individual sequence names
320 int size = names.size();
322 if (size <= cutoff) {
323 for (int j = 0; j < names.size(); j++) { rareNames.insert(names[j]); }
325 for (int j = 0; j < names.size(); j++) { abundNames.insert(names[j]); }
331 string tag = thisList->getLabel() + ".";
332 if (groupfile != "") { parseGroup(tag); }
333 if (accnos) { writeAccnos(tag); }
334 if (fastafile != "") { parseFasta(tag); }
339 catch(exception& e) {
340 m->errorOut(e, "SplitAbundCommand", "splitList");
344 /**********************************************************************************************************************/
345 int SplitAbundCommand::writeList(ListVector* thisList) {
348 map<string, ofstream*> filehandles;
350 if (Groups.size() == 0) {
351 SAbundVector* sabund = new SAbundVector();
352 *sabund = thisList->getSAbundVector();
354 //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
355 // and don't have to store the bins until you are done with the whole vector, this save alot of space.
357 for (int i = 0; i <= sabund->getMaxRank(); i++) {
358 if (i > cutoff) { break; }
359 numRareBins += sabund->get(i);
361 int numAbundBins = thisList->getNumBins() - numRareBins;
367 string rare = outputDir + m->getRootName(m->getSimpleName(listfile)) + "rare.list";
368 m->openOutputFileAppend(rare, rout);
369 outputNames.push_back(rare);
371 string abund = outputDir + m->getRootName(m->getSimpleName(listfile)) + "abund.list";
372 m->openOutputFileAppend(abund, aout);
373 outputNames.push_back(abund);
375 if (rareNames.size() != 0) { rout << thisList->getLabel() << '\t' << numRareBins << '\t'; }
376 if (abundNames.size() != 0) { aout << thisList->getLabel() << '\t' << numAbundBins << '\t'; }
378 for (int i = 0; i < thisList->getNumBins(); i++) {
379 if (m->control_pressed) { break; }
381 string bin = list->get(i);
383 int size = m->getNumNames(bin);
385 if (size <= cutoff) { rout << bin << '\t'; }
386 else { aout << bin << '\t'; }
389 if (rareNames.size() != 0) { rout << endl; }
390 if (abundNames.size() != 0) { aout << endl; }
395 }else{ //parse names by abundance and group
396 string fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
399 //map<string, bool> wroteFile;
400 map<string, ofstream*> filehandles;
401 map<string, ofstream*>::iterator it3;
403 for (int i=0; i<Groups.size(); i++) {
405 filehandles[Groups[i]+".rare"] = temp;
406 temp2 = new ofstream;
407 filehandles[Groups[i]+".abund"] = temp2;
409 m->openOutputFileAppend(fileroot + Groups[i] + ".rare.list", *(filehandles[Groups[i]+".rare"]));
410 m->openOutputFileAppend(fileroot + Groups[i] + ".abund.list", *(filehandles[Groups[i]+".abund"]));
413 map<string, string> groupVector;
414 map<string, string>::iterator itGroup;
415 map<string, int> groupNumBins;
417 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
418 groupNumBins[it3->first] = 0;
419 groupVector[it3->first] = "";
422 for (int i = 0; i < thisList->getNumBins(); i++) {
423 if (m->control_pressed) { break; }
425 map<string, string> groupBins;
426 string bin = list->get(i);
428 vector<string> names;
429 m->splitAtComma(bin, names); //parses bin into individual sequence names
431 //parse bin into list of sequences in each group
432 for (int j = 0; j < names.size(); j++) {
434 if (rareNames.count(names[j]) != 0) { //you are a rare name
436 }else{ //you are a abund name
437 rareAbund = ".abund";
440 string group = groupMap->getGroup(names[j]);
442 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
443 itGroup = groupBins.find(group+rareAbund);
444 if(itGroup == groupBins.end()) {
445 groupBins[group+rareAbund] = names[j]; //add first name
446 groupNumBins[group+rareAbund]++;
447 }else{ //add another name
448 groupBins[group+rareAbund] += "," + names[j];
450 }else if(group == "not found") {
451 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
456 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
457 groupVector[itGroup->first] += itGroup->second + '\t';
462 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
463 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl; // label numBins listvector for that group
464 (*(filehandles[it3->first])).close();
472 catch(exception& e) {
473 m->errorOut(e, "SplitAbundCommand", "writeList");
477 /**********************************************************************************************************************/
478 int SplitAbundCommand::splitNames() { //namefile
486 m->openInputFile(namefile, in);
489 if (m->control_pressed) { break; }
491 string firstCol, secondCol;
492 in >> firstCol >> secondCol; m->gobble(in);
494 nameMap[firstCol] = secondCol;
496 int size = m->getNumNames(secondCol);
498 if (size <= cutoff) {
499 rareNames.insert(firstCol);
501 abundNames.insert(firstCol);
509 catch(exception& e) {
510 m->errorOut(e, "SplitAbundCommand", "splitNames");
514 /**********************************************************************************************************************/
515 int SplitAbundCommand::readNamesFile() {
519 m->openInputFile(namefile, in);
522 if (m->control_pressed) { break; }
524 string firstCol, secondCol;
525 in >> firstCol >> secondCol; m->gobble(in);
527 nameMap[firstCol] = secondCol;
534 catch(exception& e) {
535 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
539 /**********************************************************************************************************************/
540 int SplitAbundCommand::createNameMap(ListVector* thisList) {
543 if (thisList != NULL) {
544 for (int i = 0; i < thisList->getNumBins(); i++) {
545 if (m->control_pressed) { return 0; }
547 string bin = thisList->get(i);
549 vector<string> names;
550 m->splitAtComma(bin, names); //parses bin into individual sequence names
552 for (int j = 0; j < names.size(); j++) { nameMap[names[j]] = names[j]; }
558 catch(exception& e) {
559 m->errorOut(e, "SplitAbundCommand", "createNameMap");
563 /**********************************************************************************************************************/
564 int SplitAbundCommand::writeNames() { //namefile
567 map<string, ofstream*> filehandles;
569 if (Groups.size() == 0) {
573 string rare = outputDir + m->getRootName(m->getSimpleName(namefile)) + "rare.names";
574 m->openOutputFile(rare, rout);
575 outputNames.push_back(rare);
577 string abund = outputDir + m->getRootName(m->getSimpleName(namefile)) + "abund.names";
578 m->openOutputFile(abund, aout);
579 outputNames.push_back(abund);
581 if (rareNames.size() != 0) {
582 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
583 rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
588 if (abundNames.size() != 0) {
589 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
590 aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
595 }else{ //parse names by abundance and group
596 string fileroot = outputDir + m->getRootName(m->getSimpleName(namefile));
599 map<string, ofstream*> filehandles;
600 map<string, ofstream*>::iterator it3;
602 for (int i=0; i<Groups.size(); i++) {
604 filehandles[Groups[i]+".rare"] = temp;
605 temp2 = new ofstream;
606 filehandles[Groups[i]+".abund"] = temp2;
608 m->openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"]));
609 m->openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"]));
612 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
613 vector<string> names;
614 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
617 if (rareNames.count(itName->first) != 0) { //you are a rare name
619 }else{ //you are a abund name
620 rareAbund = ".abund";
623 map<string, string> outputStrings;
624 map<string, string>::iterator itout;
625 for (int i = 0; i < names.size(); i++) {
627 string group = groupMap->getGroup(names[i]);
629 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
630 itout = outputStrings.find(group+rareAbund);
631 if (itout == outputStrings.end()) {
632 outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
633 }else { outputStrings[group+rareAbund] += "," + names[i]; }
634 }else if(group == "not found") {
635 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
639 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { *(filehandles[itout->first]) << itout->second << endl; }
643 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
644 (*(filehandles[it3->first])).close();
645 outputNames.push_back(fileroot + it3->first + ".names");
653 catch(exception& e) {
654 m->errorOut(e, "SplitAbundCommand", "writeNames");
658 /**********************************************************************************************************************/
659 //just write the unique names - if a namesfile is given
660 int SplitAbundCommand::writeAccnos(string tag) {
663 map<string, ofstream*> filehandles;
665 if (Groups.size() == 0) {
670 string rare = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "rare.accnos";
671 m->openOutputFile(rare, rout);
672 outputNames.push_back(rare);
674 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
675 rout << (*itRare) << endl;
679 string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "abund.accnos";
680 m->openOutputFile(abund, aout);
681 outputNames.push_back(abund);
683 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
684 aout << (*itAbund) << endl;
688 }else{ //parse names by abundance and group
689 string fileroot = outputDir + m->getRootName(m->getSimpleName(inputFile));
692 map<string, ofstream*> filehandles;
693 map<string, ofstream*>::iterator it3;
695 for (int i=0; i<Groups.size(); i++) {
697 filehandles[Groups[i]+".rare"] = temp;
698 temp2 = new ofstream;
699 filehandles[Groups[i]+".abund"] = temp2;
701 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"]));
702 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"]));
706 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
707 string group = groupMap->getGroup(*itRare);
709 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
710 *(filehandles[group+".rare"]) << *itRare << endl;
715 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
716 string group = groupMap->getGroup(*itAbund);
718 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
719 *(filehandles[group+".abund"]) << *itAbund << endl;
724 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
725 (*(filehandles[it3->first])).close();
726 outputNames.push_back(fileroot + tag + it3->first + ".accnos");
734 catch(exception& e) {
735 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
739 /**********************************************************************************************************************/
740 int SplitAbundCommand::parseGroup(string tag) { //namefile
743 map<string, ofstream*> filehandles;
745 if (Groups.size() == 0) {
749 string rare = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "rare.groups";
750 m->openOutputFile(rare, rout);
751 outputNames.push_back(rare);
753 string abund = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "abund.groups";
754 m->openOutputFile(abund, aout);
755 outputNames.push_back(abund);
757 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
758 vector<string> names;
759 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
761 for (int i = 0; i < names.size(); i++) {
763 string group = groupMap->getGroup(names[i]);
765 if (group == "not found") {
766 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
768 if (rareNames.count(itName->first) != 0) { //you are a rare name
769 rout << names[i] << '\t' << group << endl;
770 }else{ //you are a abund name
771 aout << names[i] << '\t' << group << endl;
780 }else{ //parse names by abundance and group
781 string fileroot = outputDir + m->getRootName(m->getSimpleName(groupfile));
784 map<string, ofstream*> filehandles;
785 map<string, ofstream*>::iterator it3;
787 for (int i=0; i<Groups.size(); i++) {
789 filehandles[Groups[i]+".rare"] = temp;
790 temp2 = new ofstream;
791 filehandles[Groups[i]+".abund"] = temp2;
793 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"]));
794 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"]));
797 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
798 vector<string> names;
799 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
802 if (rareNames.count(itName->first) != 0) { //you are a rare name
804 }else{ //you are a abund name
805 rareAbund = ".abund";
808 for (int i = 0; i < names.size(); i++) {
810 string group = groupMap->getGroup(names[i]);
812 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
813 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
818 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
819 (*(filehandles[it3->first])).close();
820 outputNames.push_back(fileroot + tag + it3->first + ".groups");
828 catch(exception& e) {
829 m->errorOut(e, "SplitAbundCommand", "parseGroups");
833 /**********************************************************************************************************************/
834 int SplitAbundCommand::parseFasta(string tag) { //namefile
837 map<string, ofstream*> filehandles;
839 if (Groups.size() == 0) {
843 string rare = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "rare.fasta";
844 m->openOutputFile(rare, rout);
845 outputNames.push_back(rare);
847 string abund = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "abund.fasta";
848 m->openOutputFile(abund, aout);
849 outputNames.push_back(abund);
853 m->openInputFile(fastafile, in);
856 if (m->control_pressed) { break; }
858 Sequence seq(in); m->gobble(in);
860 if (seq.getName() != "") {
862 map<string, string>::iterator itNames;
864 itNames = nameMap.find(seq.getName());
866 if (itNames == nameMap.end()) {
867 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
869 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
870 seq.printSequence(rout);
871 }else{ //you are a abund name
872 seq.printSequence(aout);
881 }else{ //parse names by abundance and group
882 string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
885 map<string, ofstream*> filehandles;
886 map<string, ofstream*>::iterator it3;
888 for (int i=0; i<Groups.size(); i++) {
890 filehandles[Groups[i]+".rare"] = temp;
891 temp2 = new ofstream;
892 filehandles[Groups[i]+".abund"] = temp2;
894 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"]));
895 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"]));
900 m->openInputFile(fastafile, in);
903 if (m->control_pressed) { break; }
905 Sequence seq(in); m->gobble(in);
907 if (seq.getName() != "") {
908 map<string, string>::iterator itNames = nameMap.find(seq.getName());
910 if (itNames == nameMap.end()) {
911 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
913 vector<string> names;
914 m->splitAtComma(itNames->second, names); //parses bin into individual sequence names
917 if (rareNames.count(itNames->first) != 0) { //you are a rare name
919 }else{ //you are a abund name
920 rareAbund = ".abund";
923 for (int i = 0; i < names.size(); i++) {
925 string group = groupMap->getGroup(seq.getName());
927 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
928 seq.printSequence(*(filehandles[group+rareAbund]));
929 }else if(group == "not found") {
930 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
938 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
939 (*(filehandles[it3->first])).close();
940 outputNames.push_back(fileroot + tag + it3->first + ".fasta");
948 catch(exception& e) {
949 m->errorOut(e, "SplitAbundCommand", "parseFasta");
953 /**********************************************************************************************************************/