2 * splitabundcommand.cpp
5 * Created by westcott on 5/17/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "splitabundcommand.h"
12 //**********************************************************************************************************************
13 vector<string> SplitAbundCommand::getValidParameters(){
15 string Array[] = {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"};
16 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
20 m->errorOut(e, "SplitAbundCommand", "getValidParameters");
24 //**********************************************************************************************************************
25 SplitAbundCommand::SplitAbundCommand(){
28 //initialize outputTypes
29 vector<string> tempOutNames;
30 outputTypes["list"] = tempOutNames;
31 outputTypes["name"] = tempOutNames;
32 outputTypes["accnos"] = tempOutNames;
33 outputTypes["group"] = tempOutNames;
34 outputTypes["fasta"] = tempOutNames;
37 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
41 //**********************************************************************************************************************
42 vector<string> SplitAbundCommand::getRequiredParameters(){
44 string Array[] = {"fasta","list","name","or"};
45 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
49 m->errorOut(e, "SplitAbundCommand", "getRequiredParameters");
53 //**********************************************************************************************************************
54 vector<string> SplitAbundCommand::getRequiredFiles(){
56 vector<string> myArray;
60 m->errorOut(e, "SplitAbundCommand", "getRequiredFiles");
64 //**********************************************************************************************************************
65 SplitAbundCommand::SplitAbundCommand(string option) {
70 //allow user to run help
71 if(option == "help") { help(); abort = true; }
74 //valid paramters for this command
75 string Array[] = {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"}; //
76 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
78 OptionParser parser(option);
79 map<string, string> parameters = parser.getParameters();
81 ValidParameters validParameter;
82 map<string, string>::iterator it;
84 //check to make sure all parameters are valid for command
85 for (it = parameters.begin(); it != parameters.end(); it++) {
86 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
89 //initialize outputTypes
90 vector<string> tempOutNames;
91 outputTypes["list"] = tempOutNames;
92 outputTypes["name"] = tempOutNames;
93 outputTypes["accnos"] = tempOutNames;
94 outputTypes["group"] = tempOutNames;
95 outputTypes["fasta"] = tempOutNames;
97 //if the user changes the input directory command factory will send this info to us in the output parameter
98 string inputDir = validParameter.validFile(parameters, "inputdir", false);
99 if (inputDir == "not found"){ inputDir = ""; }
102 it = parameters.find("list");
103 //user has given a template file
104 if(it != parameters.end()){
105 path = m->hasPath(it->second);
106 //if the user has not given a path then, add inputdir. else leave path alone.
107 if (path == "") { parameters["list"] = inputDir + it->second; }
110 it = parameters.find("group");
111 //user has given a template file
112 if(it != parameters.end()){
113 path = m->hasPath(it->second);
114 //if the user has not given a path then, add inputdir. else leave path alone.
115 if (path == "") { parameters["group"] = inputDir + it->second; }
118 it = parameters.find("fasta");
119 //user has given a template file
120 if(it != parameters.end()){
121 path = m->hasPath(it->second);
122 //if the user has not given a path then, add inputdir. else leave path alone.
123 if (path == "") { parameters["fasta"] = inputDir + it->second; }
126 it = parameters.find("name");
127 //user has given a template file
128 if(it != parameters.end()){
129 path = m->hasPath(it->second);
130 //if the user has not given a path then, add inputdir. else leave path alone.
131 if (path == "") { parameters["name"] = inputDir + it->second; }
137 //if the user changes the output directory command factory will send this info to us in the output parameter
138 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
140 //check for required parameters
141 listfile = validParameter.validFile(parameters, "list", true);
142 if (listfile == "not open") { abort = true; }
143 else if (listfile == "not found") { listfile = ""; }
144 else{ inputFile = listfile; }
146 namefile = validParameter.validFile(parameters, "name", true);
147 if (namefile == "not open") { abort = true; }
148 else if (namefile == "not found") { namefile = ""; }
149 else{ inputFile = namefile; }
151 fastafile = validParameter.validFile(parameters, "fasta", true);
152 if (fastafile == "not open") { abort = true; }
153 else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
155 groupfile = validParameter.validFile(parameters, "group", true);
156 if (groupfile == "not open") { groupfile = ""; abort = true; }
157 else if (groupfile == "not found") { groupfile = ""; }
159 groupMap = new GroupMap(groupfile);
161 int error = groupMap->readMap();
162 if (error == 1) { abort = true; }
166 groups = validParameter.validFile(parameters, "groups", false);
167 if (groups == "not found") { groups = ""; }
168 else if (groups == "all") {
169 if (groupfile != "") { Groups = groupMap->namesOfGroups; }
170 else { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; }
172 m->splitAtDash(groups, Groups);
175 if ((groupfile == "") && (groups != "")) { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; Groups.clear(); }
177 //do you have all files needed
178 if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
180 //check for optional parameter and set defaults
181 // ...at some point should added some additional type checking...
182 label = validParameter.validFile(parameters, "label", false);
183 if (label == "not found") { label = ""; allLines = 1; }
185 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
186 else { allLines = 1; }
189 string temp = validParameter.validFile(parameters, "accnos", false); if (temp == "not found") { temp = "F"; }
190 accnos = m->isTrue(temp);
192 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0"; }
193 convert(temp, cutoff);
195 if (cutoff == 0) { m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
200 catch(exception& e) {
201 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
205 //**********************************************************************************************************************
206 void SplitAbundCommand::help(){
208 m->mothurOut("The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n");
209 m->mothurOut("The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n");
210 m->mothurOut("The fasta and a list or name parameter are required, and you must provide a cutoff value.\n");
211 m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n");
212 m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n");
213 m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n");
214 m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n");
215 m->mothurOut("The groups parameter allows you to parse the files into rare and abundant files by group. \n");
216 m->mothurOut("For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files. \n");
217 m->mothurOut("If you want .abund and .rare files for all groups, set groups=all. \n");
218 m->mothurOut("The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n");
219 m->mothurOut("Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n");
220 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
223 catch(exception& e) {
224 m->errorOut(e, "SplitAbundCommand", "help");
228 //**********************************************************************************************************************
229 SplitAbundCommand::~SplitAbundCommand(){
230 if (groupfile != "") { delete groupMap; }
232 //**********************************************************************************************************************
233 int SplitAbundCommand::execute(){
236 if (abort == true) { return 0; }
238 if (listfile != "") { //you are using a listfile to determine abundance
239 if (outputDir == "") { outputDir = m->hasPath(listfile); }
241 //remove old files so you can append later....
242 string fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
243 if (Groups.size() == 0) {
244 remove((fileroot + "rare.list").c_str());
245 remove((fileroot + "abund.list").c_str());
247 outputNames.push_back((fileroot + "rare.list"));
248 outputNames.push_back((fileroot + "abund.list"));
249 outputTypes["list"].push_back((fileroot + "rare.list"));
250 outputTypes["list"].push_back((fileroot + "abund.list"));
252 for (int i=0; i<Groups.size(); i++) {
253 remove((fileroot + Groups[i] + ".rare.list").c_str());
254 remove((fileroot + Groups[i] + ".abund.list").c_str());
256 outputNames.push_back((fileroot + Groups[i] + ".rare.list"));
257 outputNames.push_back((fileroot + Groups[i] + ".abund.list"));
258 outputTypes["list"].push_back((fileroot + Groups[i] + ".rare.list"));
259 outputTypes["list"].push_back((fileroot + Groups[i] + ".abund.list"));
263 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
264 set<string> processedLabels;
265 set<string> userLabels = labels;
267 input = new InputData(listfile, "list");
268 list = input->getListVector();
269 string lastLabel = list->getLabel();
271 //do you have a namefile or do we need to similate one?
272 if (namefile != "") { readNamesFile(); }
273 else { createNameMap(list); }
275 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
277 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
279 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
281 if(allLines == 1 || labels.count(list->getLabel()) == 1){
283 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
286 processedLabels.insert(list->getLabel());
287 userLabels.erase(list->getLabel());
290 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
291 string saveLabel = list->getLabel();
294 list = input->getListVector(lastLabel); //get new list vector to process
296 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
299 processedLabels.insert(list->getLabel());
300 userLabels.erase(list->getLabel());
302 //restore real lastlabel to save below
303 list->setLabel(saveLabel);
307 lastLabel = list->getLabel();
310 list = input->getListVector(); //get new list vector to process
313 if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
315 //output error messages about any remaining user labels
316 set<string>::iterator it;
317 bool needToRun = false;
318 for (it = userLabels.begin(); it != userLabels.end(); it++) {
319 m->mothurOut("Your file does not include the label " + *it);
320 if (processedLabels.count(lastLabel) != 1) {
321 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
324 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
329 if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
331 //run last label if you need to
332 if (needToRun == true) {
333 if (list != NULL) { delete list; }
334 list = input->getListVector(lastLabel); //get new list vector to process
336 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
344 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
346 }else { //you are using the namefile to determine abundance
347 if (outputDir == "") { outputDir = m->hasPath(namefile); }
353 if (groupfile != "") { parseGroup(tag); }
354 if (accnos) { writeAccnos(tag); }
355 if (fastafile != "") { parseFasta(tag); }
358 m->mothurOutEndLine();
359 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
360 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
361 m->mothurOutEndLine();
365 catch(exception& e) {
366 m->errorOut(e, "SplitAbundCommand", "execute");
370 /**********************************************************************************************************************/
371 int SplitAbundCommand::splitList(ListVector* thisList) {
376 //get rareNames and abundNames
377 for (int i = 0; i < thisList->getNumBins(); i++) {
378 if (m->control_pressed) { return 0; }
380 string bin = thisList->get(i);
382 vector<string> names;
383 m->splitAtComma(bin, names); //parses bin into individual sequence names
384 int size = names.size();
386 if (size <= cutoff) {
387 for (int j = 0; j < names.size(); j++) { rareNames.insert(names[j]); }
389 for (int j = 0; j < names.size(); j++) { abundNames.insert(names[j]); }
395 string tag = thisList->getLabel() + ".";
396 if (groupfile != "") { parseGroup(tag); }
397 if (accnos) { writeAccnos(tag); }
398 if (fastafile != "") { parseFasta(tag); }
403 catch(exception& e) {
404 m->errorOut(e, "SplitAbundCommand", "splitList");
408 /**********************************************************************************************************************/
409 int SplitAbundCommand::writeList(ListVector* thisList) {
412 map<string, ofstream*> filehandles;
414 if (Groups.size() == 0) {
415 SAbundVector* sabund = new SAbundVector();
416 *sabund = thisList->getSAbundVector();
418 //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
419 // and don't have to store the bins until you are done with the whole vector, this save alot of space.
421 for (int i = 0; i <= sabund->getMaxRank(); i++) {
422 if (i > cutoff) { break; }
423 numRareBins += sabund->get(i);
425 int numAbundBins = thisList->getNumBins() - numRareBins;
431 string rare = outputDir + m->getRootName(m->getSimpleName(listfile)) + "rare.list";
432 m->openOutputFileAppend(rare, rout);
433 //outputNames.push_back(rare);
435 string abund = outputDir + m->getRootName(m->getSimpleName(listfile)) + "abund.list";
436 m->openOutputFileAppend(abund, aout);
437 //outputNames.push_back(abund);
439 if (rareNames.size() != 0) { rout << thisList->getLabel() << '\t' << numRareBins << '\t'; }
440 if (abundNames.size() != 0) { aout << thisList->getLabel() << '\t' << numAbundBins << '\t'; }
442 for (int i = 0; i < thisList->getNumBins(); i++) {
443 if (m->control_pressed) { break; }
445 string bin = list->get(i);
447 int size = m->getNumNames(bin);
449 if (size <= cutoff) { rout << bin << '\t'; }
450 else { aout << bin << '\t'; }
453 if (rareNames.size() != 0) { rout << endl; }
454 if (abundNames.size() != 0) { aout << endl; }
459 }else{ //parse names by abundance and group
460 string fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
463 //map<string, bool> wroteFile;
464 map<string, ofstream*> filehandles;
465 map<string, ofstream*>::iterator it3;
467 for (int i=0; i<Groups.size(); i++) {
469 filehandles[Groups[i]+".rare"] = temp;
470 temp2 = new ofstream;
471 filehandles[Groups[i]+".abund"] = temp2;
473 m->openOutputFileAppend(fileroot + Groups[i] + ".rare.list", *(filehandles[Groups[i]+".rare"]));
474 m->openOutputFileAppend(fileroot + Groups[i] + ".abund.list", *(filehandles[Groups[i]+".abund"]));
477 map<string, string> groupVector;
478 map<string, string>::iterator itGroup;
479 map<string, int> groupNumBins;
481 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
482 groupNumBins[it3->first] = 0;
483 groupVector[it3->first] = "";
486 for (int i = 0; i < thisList->getNumBins(); i++) {
487 if (m->control_pressed) { break; }
489 map<string, string> groupBins;
490 string bin = list->get(i);
492 vector<string> names;
493 m->splitAtComma(bin, names); //parses bin into individual sequence names
495 //parse bin into list of sequences in each group
496 for (int j = 0; j < names.size(); j++) {
498 if (rareNames.count(names[j]) != 0) { //you are a rare name
500 }else{ //you are a abund name
501 rareAbund = ".abund";
504 string group = groupMap->getGroup(names[j]);
506 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
507 itGroup = groupBins.find(group+rareAbund);
508 if(itGroup == groupBins.end()) {
509 groupBins[group+rareAbund] = names[j]; //add first name
510 groupNumBins[group+rareAbund]++;
511 }else{ //add another name
512 groupBins[group+rareAbund] += "," + names[j];
514 }else if(group == "not found") {
515 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
520 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
521 groupVector[itGroup->first] += itGroup->second + '\t';
526 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
527 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl; // label numBins listvector for that group
528 (*(filehandles[it3->first])).close();
536 catch(exception& e) {
537 m->errorOut(e, "SplitAbundCommand", "writeList");
541 /**********************************************************************************************************************/
542 int SplitAbundCommand::splitNames() { //namefile
550 m->openInputFile(namefile, in);
553 if (m->control_pressed) { break; }
555 string firstCol, secondCol;
556 in >> firstCol >> secondCol; m->gobble(in);
558 nameMap[firstCol] = secondCol;
560 int size = m->getNumNames(secondCol);
562 if (size <= cutoff) {
563 rareNames.insert(firstCol);
565 abundNames.insert(firstCol);
573 catch(exception& e) {
574 m->errorOut(e, "SplitAbundCommand", "splitNames");
578 /**********************************************************************************************************************/
579 int SplitAbundCommand::readNamesFile() {
583 m->openInputFile(namefile, in);
586 if (m->control_pressed) { break; }
588 string firstCol, secondCol;
589 in >> firstCol >> secondCol; m->gobble(in);
591 nameMap[firstCol] = secondCol;
598 catch(exception& e) {
599 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
603 /**********************************************************************************************************************/
604 int SplitAbundCommand::createNameMap(ListVector* thisList) {
607 if (thisList != NULL) {
608 for (int i = 0; i < thisList->getNumBins(); i++) {
609 if (m->control_pressed) { return 0; }
611 string bin = thisList->get(i);
613 vector<string> names;
614 m->splitAtComma(bin, names); //parses bin into individual sequence names
616 for (int j = 0; j < names.size(); j++) { nameMap[names[j]] = names[j]; }
622 catch(exception& e) {
623 m->errorOut(e, "SplitAbundCommand", "createNameMap");
627 /**********************************************************************************************************************/
628 int SplitAbundCommand::writeNames() { //namefile
631 map<string, ofstream*> filehandles;
633 if (Groups.size() == 0) {
637 string rare = outputDir + m->getRootName(m->getSimpleName(namefile)) + "rare.names";
638 m->openOutputFile(rare, rout);
639 outputNames.push_back(rare); outputTypes["name"].push_back(rare);
641 string abund = outputDir + m->getRootName(m->getSimpleName(namefile)) + "abund.names";
642 m->openOutputFile(abund, aout);
643 outputNames.push_back(abund); outputTypes["name"].push_back(abund);
645 if (rareNames.size() != 0) {
646 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
647 rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
652 if (abundNames.size() != 0) {
653 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
654 aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
659 }else{ //parse names by abundance and group
660 string fileroot = outputDir + m->getRootName(m->getSimpleName(namefile));
663 map<string, ofstream*> filehandles;
664 map<string, ofstream*>::iterator it3;
666 for (int i=0; i<Groups.size(); i++) {
668 filehandles[Groups[i]+".rare"] = temp;
669 temp2 = new ofstream;
670 filehandles[Groups[i]+".abund"] = temp2;
672 m->openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"]));
673 m->openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"]));
676 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
677 vector<string> names;
678 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
681 if (rareNames.count(itName->first) != 0) { //you are a rare name
683 }else{ //you are a abund name
684 rareAbund = ".abund";
687 map<string, string> outputStrings;
688 map<string, string>::iterator itout;
689 for (int i = 0; i < names.size(); i++) {
691 string group = groupMap->getGroup(names[i]);
693 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
694 itout = outputStrings.find(group+rareAbund);
695 if (itout == outputStrings.end()) {
696 outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
697 }else { outputStrings[group+rareAbund] += "," + names[i]; }
698 }else if(group == "not found") {
699 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
703 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { *(filehandles[itout->first]) << itout->second << endl; }
707 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
708 (*(filehandles[it3->first])).close();
709 outputNames.push_back(fileroot + it3->first + ".names"); outputTypes["name"].push_back(fileroot + it3->first + ".names");
717 catch(exception& e) {
718 m->errorOut(e, "SplitAbundCommand", "writeNames");
722 /**********************************************************************************************************************/
723 //just write the unique names - if a namesfile is given
724 int SplitAbundCommand::writeAccnos(string tag) {
727 map<string, ofstream*> filehandles;
729 if (Groups.size() == 0) {
734 string rare = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "rare.accnos";
735 m->openOutputFile(rare, rout);
736 outputNames.push_back(rare); outputTypes["accnos"].push_back(rare);
738 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
739 rout << (*itRare) << endl;
743 string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "abund.accnos";
744 m->openOutputFile(abund, aout);
745 outputNames.push_back(abund); outputTypes["accnos"].push_back(abund);
747 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
748 aout << (*itAbund) << endl;
752 }else{ //parse names by abundance and group
753 string fileroot = outputDir + m->getRootName(m->getSimpleName(inputFile));
756 map<string, ofstream*> filehandles;
757 map<string, ofstream*>::iterator it3;
759 for (int i=0; i<Groups.size(); i++) {
761 filehandles[Groups[i]+".rare"] = temp;
762 temp2 = new ofstream;
763 filehandles[Groups[i]+".abund"] = temp2;
765 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"]));
766 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"]));
770 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
771 string group = groupMap->getGroup(*itRare);
773 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
774 *(filehandles[group+".rare"]) << *itRare << endl;
779 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
780 string group = groupMap->getGroup(*itAbund);
782 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
783 *(filehandles[group+".abund"]) << *itAbund << endl;
788 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
789 (*(filehandles[it3->first])).close();
790 outputNames.push_back(fileroot + tag + it3->first + ".accnos"); outputTypes["accnos"].push_back(fileroot + tag + it3->first + ".accnos");
798 catch(exception& e) {
799 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
803 /**********************************************************************************************************************/
804 int SplitAbundCommand::parseGroup(string tag) { //namefile
807 map<string, ofstream*> filehandles;
809 if (Groups.size() == 0) {
813 string rare = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "rare.groups";
814 m->openOutputFile(rare, rout);
815 outputNames.push_back(rare); outputTypes["group"].push_back(rare);
817 string abund = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "abund.groups";
818 m->openOutputFile(abund, aout);
819 outputNames.push_back(abund); outputTypes["group"].push_back(abund);
821 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
822 vector<string> names;
823 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
825 for (int i = 0; i < names.size(); i++) {
827 string group = groupMap->getGroup(names[i]);
829 if (group == "not found") {
830 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
832 if (rareNames.count(itName->first) != 0) { //you are a rare name
833 rout << names[i] << '\t' << group << endl;
834 }else{ //you are a abund name
835 aout << names[i] << '\t' << group << endl;
844 }else{ //parse names by abundance and group
845 string fileroot = outputDir + m->getRootName(m->getSimpleName(groupfile));
848 map<string, ofstream*> filehandles;
849 map<string, ofstream*>::iterator it3;
851 for (int i=0; i<Groups.size(); i++) {
853 filehandles[Groups[i]+".rare"] = temp;
854 temp2 = new ofstream;
855 filehandles[Groups[i]+".abund"] = temp2;
857 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"]));
858 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"]));
861 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
862 vector<string> names;
863 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
866 if (rareNames.count(itName->first) != 0) { //you are a rare name
868 }else{ //you are a abund name
869 rareAbund = ".abund";
872 for (int i = 0; i < names.size(); i++) {
874 string group = groupMap->getGroup(names[i]);
876 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
877 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
882 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
883 (*(filehandles[it3->first])).close();
884 outputNames.push_back(fileroot + tag + it3->first + ".groups"); outputTypes["group"].push_back(fileroot + tag + it3->first + ".groups");
892 catch(exception& e) {
893 m->errorOut(e, "SplitAbundCommand", "parseGroups");
897 /**********************************************************************************************************************/
898 int SplitAbundCommand::parseFasta(string tag) { //namefile
901 map<string, ofstream*> filehandles;
903 if (Groups.size() == 0) {
907 string rare = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "rare.fasta";
908 m->openOutputFile(rare, rout);
909 outputNames.push_back(rare); outputTypes["fasta"].push_back(rare);
911 string abund = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "abund.fasta";
912 m->openOutputFile(abund, aout);
913 outputNames.push_back(abund); outputTypes["fasta"].push_back(abund);
917 m->openInputFile(fastafile, in);
920 if (m->control_pressed) { break; }
922 Sequence seq(in); m->gobble(in);
924 if (seq.getName() != "") {
926 map<string, string>::iterator itNames;
928 itNames = nameMap.find(seq.getName());
930 if (itNames == nameMap.end()) {
931 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
933 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
934 seq.printSequence(rout);
935 }else{ //you are a abund name
936 seq.printSequence(aout);
945 }else{ //parse names by abundance and group
946 string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
949 map<string, ofstream*> filehandles;
950 map<string, ofstream*>::iterator it3;
952 for (int i=0; i<Groups.size(); i++) {
954 filehandles[Groups[i]+".rare"] = temp;
955 temp2 = new ofstream;
956 filehandles[Groups[i]+".abund"] = temp2;
958 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"]));
959 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"]));
964 m->openInputFile(fastafile, in);
967 if (m->control_pressed) { break; }
969 Sequence seq(in); m->gobble(in);
971 if (seq.getName() != "") {
972 map<string, string>::iterator itNames = nameMap.find(seq.getName());
974 if (itNames == nameMap.end()) {
975 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
977 vector<string> names;
978 m->splitAtComma(itNames->second, names); //parses bin into individual sequence names
981 if (rareNames.count(itNames->first) != 0) { //you are a rare name
983 }else{ //you are a abund name
984 rareAbund = ".abund";
987 for (int i = 0; i < names.size(); i++) {
989 string group = groupMap->getGroup(seq.getName());
991 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
992 seq.printSequence(*(filehandles[group+rareAbund]));
993 }else if(group == "not found") {
994 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
1002 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1003 (*(filehandles[it3->first])).close();
1004 outputNames.push_back(fileroot + tag + it3->first + ".fasta"); outputTypes["fasta"].push_back(fileroot + tag + it3->first + ".fasta");
1012 catch(exception& e) {
1013 m->errorOut(e, "SplitAbundCommand", "parseFasta");
1017 /**********************************************************************************************************************/