2 * splitabundcommand.cpp
5 * Created by westcott on 5/17/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "splitabundcommand.h"
12 //**********************************************************************************************************************
13 SplitAbundCommand::SplitAbundCommand(string option) {
18 //allow user to run help
19 if(option == "help") { help(); abort = true; }
22 //valid paramters for this command
23 string Array[] = {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"}; //
24 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26 OptionParser parser(option);
27 map<string, string> parameters = parser.getParameters();
29 ValidParameters validParameter;
30 map<string, string>::iterator it;
32 //check to make sure all parameters are valid for command
33 for (it = parameters.begin(); it != parameters.end(); it++) {
34 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
37 //if the user changes the input directory command factory will send this info to us in the output parameter
38 string inputDir = validParameter.validFile(parameters, "inputdir", false);
39 if (inputDir == "not found"){ inputDir = ""; }
42 it = parameters.find("list");
43 //user has given a template file
44 if(it != parameters.end()){
45 path = hasPath(it->second);
46 //if the user has not given a path then, add inputdir. else leave path alone.
47 if (path == "") { parameters["list"] = inputDir + it->second; }
50 it = parameters.find("group");
51 //user has given a template file
52 if(it != parameters.end()){
53 path = hasPath(it->second);
54 //if the user has not given a path then, add inputdir. else leave path alone.
55 if (path == "") { parameters["group"] = inputDir + it->second; }
58 it = parameters.find("fasta");
59 //user has given a template file
60 if(it != parameters.end()){
61 path = hasPath(it->second);
62 //if the user has not given a path then, add inputdir. else leave path alone.
63 if (path == "") { parameters["fasta"] = inputDir + it->second; }
66 it = parameters.find("name");
67 //user has given a template file
68 if(it != parameters.end()){
69 path = hasPath(it->second);
70 //if the user has not given a path then, add inputdir. else leave path alone.
71 if (path == "") { parameters["name"] = inputDir + it->second; }
77 //if the user changes the output directory command factory will send this info to us in the output parameter
78 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
80 //check for required parameters
81 listfile = validParameter.validFile(parameters, "list", true);
82 if (listfile == "not open") { abort = true; }
83 else if (listfile == "not found") { listfile = ""; }
84 else{ inputFile = listfile; }
86 namefile = validParameter.validFile(parameters, "name", true);
87 if (namefile == "not open") { abort = true; }
88 else if (namefile == "not found") { namefile = ""; }
89 else{ inputFile = namefile; }
91 fastafile = validParameter.validFile(parameters, "fasta", true);
92 if (fastafile == "not open") { abort = true; }
93 else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
95 groupfile = validParameter.validFile(parameters, "group", true);
96 if (groupfile == "not open") { groupfile = ""; abort = true; }
97 else if (groupfile == "not found") { groupfile = ""; }
99 groupMap = new GroupMap(groupfile);
101 int error = groupMap->readMap();
102 if (error == 1) { abort = true; }
106 groups = validParameter.validFile(parameters, "groups", false);
107 if (groups == "not found") { groups = ""; }
108 else if (groups == "all") {
109 if (groupfile != "") { Groups = groupMap->namesOfGroups; }
110 else { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; }
112 splitAtDash(groups, Groups);
115 if ((groupfile == "") && (groups != "")) { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; Groups.clear(); }
117 //do you have all files needed
118 if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
120 //check for optional parameter and set defaults
121 // ...at some point should added some additional type checking...
122 label = validParameter.validFile(parameters, "label", false);
123 if (label == "not found") { label = ""; allLines = 1; }
125 if(label != "all") { splitAtDash(label, labels); allLines = 0; }
126 else { allLines = 1; }
129 string temp = validParameter.validFile(parameters, "accnos", false); if (temp == "not found") { temp = "F"; }
130 accnos = isTrue(temp);
132 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0"; }
133 convert(temp, cutoff);
135 if (cutoff == 0) { m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
140 catch(exception& e) {
141 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
145 //**********************************************************************************************************************
146 void SplitAbundCommand::help(){
148 m->mothurOut("The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n");
149 m->mothurOut("The split.abund command parameters are list, name, cutoff, group, label, groups and accnos.\n");
150 m->mothurOut("The fasta and a list or name parameter are required, and you must provide a cutoff value.\n");
151 m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n");
152 m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n");
153 m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n");
154 m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n");
155 m->mothurOut("The groups parameter allows you to parse the files into rare and abundant files by group. \n");
156 m->mothurOut("For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files. \n");
157 m->mothurOut("If you want .abund and .rare files for all groups, set groups=all. \n");
158 m->mothurOut("The split.abund command should be used in the following format: split.abund(list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n");
159 m->mothurOut("Example: split.abundt(list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n");
160 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
163 catch(exception& e) {
164 m->errorOut(e, "SplitAbundCommand", "help");
168 //**********************************************************************************************************************
169 SplitAbundCommand::~SplitAbundCommand(){
170 if (groupfile != "") { delete groupMap; }
172 //**********************************************************************************************************************
173 int SplitAbundCommand::execute(){
176 if (abort == true) { return 0; }
178 if (listfile != "") { //you are using a listfile to determine abundance
180 //remove old files so you can append later....
181 string fileroot = outputDir + getRootName(getSimpleName(listfile));
182 if (Groups.size() == 0) {
183 remove((fileroot + "rare.list").c_str());
184 remove((fileroot + "abund.list").c_str());
186 wroteListFile["rare"] = false;
187 wroteListFile["abund"] = false;
189 for (int i=0; i<Groups.size(); i++) {
190 remove((fileroot + Groups[i] + ".rare.list").c_str());
191 remove((fileroot + Groups[i] + ".abund.list").c_str());
193 wroteListFile[(Groups[i] + ".rare")] = false;
194 wroteListFile[(Groups[i] + ".abund")] = false;
198 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
199 set<string> processedLabels;
200 set<string> userLabels = labels;
202 input = new InputData(listfile, "list");
203 list = input->getListVector();
204 string lastLabel = list->getLabel();
206 //do you have a namefile or do we need to similate one?
207 if (namefile != "") { readNamesFile(); }
208 else { createNameMap(list); }
210 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
212 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
214 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
216 if(allLines == 1 || labels.count(list->getLabel()) == 1){
218 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
221 processedLabels.insert(list->getLabel());
222 userLabels.erase(list->getLabel());
225 if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
226 string saveLabel = list->getLabel();
229 list = input->getListVector(lastLabel); //get new list vector to process
231 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
234 processedLabels.insert(list->getLabel());
235 userLabels.erase(list->getLabel());
237 //restore real lastlabel to save below
238 list->setLabel(saveLabel);
242 lastLabel = list->getLabel();
245 list = input->getListVector(); //get new list vector to process
248 if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
250 //output error messages about any remaining user labels
251 set<string>::iterator it;
252 bool needToRun = false;
253 for (it = userLabels.begin(); it != userLabels.end(); it++) {
254 m->mothurOut("Your file does not include the label " + *it);
255 if (processedLabels.count(lastLabel) != 1) {
256 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
259 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
264 if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
266 //run last label if you need to
267 if (needToRun == true) {
268 if (list != NULL) { delete list; }
269 list = input->getListVector(lastLabel); //get new list vector to process
271 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
279 for (map<string, bool>::iterator itBool = wroteListFile.begin(); itBool != wroteListFile.end(); itBool++) {
280 string filename = fileroot + itBool->first;
281 if ((itBool->first == "rare") || (itBool->first == "abund")) {
282 filename = fileroot + itBool->first + ".list";
284 if (itBool->second) { //we wrote to this file
285 outputNames.push_back(filename);
287 remove(filename.c_str());
291 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
294 }else { //you are using the namefile to determine abundance
300 if (groupfile != "") { parseGroup(tag); }
301 if (accnos) { writeAccnos(tag); }
302 if (fastafile != "") { parseFasta(tag); }
305 m->mothurOutEndLine();
306 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
307 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
308 m->mothurOutEndLine();
312 catch(exception& e) {
313 m->errorOut(e, "SplitAbundCommand", "execute");
317 /**********************************************************************************************************************/
318 int SplitAbundCommand::splitList(ListVector* thisList) {
323 //get rareNames and abundNames
324 for (int i = 0; i < thisList->getNumBins(); i++) {
325 if (m->control_pressed) { return 0; }
327 string bin = thisList->get(i);
329 vector<string> names;
330 splitAtComma(bin, names); //parses bin into individual sequence names
331 int size = names.size();
333 if (size <= cutoff) {
334 for (int j = 0; j < names.size(); j++) { rareNames.insert(names[j]); }
336 for (int j = 0; j < names.size(); j++) { abundNames.insert(names[j]); }
342 string tag = thisList->getLabel() + ".";
343 if (groupfile != "") { parseGroup(tag); }
344 if (accnos) { writeAccnos(tag); }
345 if (fastafile != "") { parseFasta(tag); }
350 catch(exception& e) {
351 m->errorOut(e, "SplitAbundCommand", "splitList");
355 /**********************************************************************************************************************/
356 int SplitAbundCommand::writeList(ListVector* thisList) {
359 map<string, ofstream*> filehandles;
361 if (Groups.size() == 0) {
362 SAbundVector* sabund = new SAbundVector();
363 *sabund = thisList->getSAbundVector();
365 //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
366 // and don't have to store the bins until you are done with the whole vector, this save alot of space.
368 for (int i = 0; i <= sabund->getMaxRank(); i++) {
369 if (i > cutoff) { break; }
370 numRareBins += sabund->get(i);
372 int numAbundBins = thisList->getNumBins() - numRareBins;
378 if (rareNames.size() != 0) {
379 string rare = outputDir + getRootName(getSimpleName(listfile)) + "rare.list";
380 wroteListFile["rare"] = true;
381 openOutputFileAppend(rare, rout);
382 rout << thisList->getLabel() << '\t' << numRareBins << '\t';
385 if (abundNames.size() != 0) {
386 string abund = outputDir + getRootName(getSimpleName(listfile)) + "abund.list";
387 wroteListFile["abund"] = true;
388 openOutputFileAppend(abund, aout);
389 aout << thisList->getLabel() << '\t' << numAbundBins << '\t';
392 for (int i = 0; i < thisList->getNumBins(); i++) {
393 if (m->control_pressed) { break; }
395 string bin = list->get(i);
397 int size = getNumNames(bin);
399 if (size <= cutoff) { rout << bin << '\t'; }
400 else { aout << bin << '\t'; }
403 if (rareNames.size() != 0) { rout << endl; rout.close(); }
404 if (abundNames.size() != 0) { aout << endl; aout.close(); }
406 }else{ //parse names by abundance and group
407 string fileroot = outputDir + getRootName(getSimpleName(listfile));
410 map<string, bool> wroteFile;
411 map<string, ofstream*> filehandles;
412 map<string, ofstream*>::iterator it3;
414 for (int i=0; i<Groups.size(); i++) {
416 filehandles[Groups[i]+".rare"] = temp;
417 temp2 = new ofstream;
418 filehandles[Groups[i]+".abund"] = temp2;
420 openOutputFileAppend(fileroot + Groups[i] + ".rare.list", *(filehandles[Groups[i]+".rare"]));
421 openOutputFileAppend(fileroot + Groups[i] + ".abund.list", *(filehandles[Groups[i]+".abund"]));
424 map<string, string> groupVector;
425 map<string, string>::iterator itGroup;
426 map<string, int> groupNumBins;
428 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
429 groupNumBins[it3->first] = 0;
430 groupVector[it3->first] = "";
433 for (int i = 0; i < thisList->getNumBins(); i++) {
434 if (m->control_pressed) { break; }
436 map<string, string> groupBins;
437 string bin = list->get(i);
439 vector<string> names;
440 splitAtComma(bin, names); //parses bin into individual sequence names
442 //parse bin into list of sequences in each group
443 for (int j = 0; j < names.size(); j++) {
445 if (rareNames.count(names[j]) != 0) { //you are a rare name
447 }else{ //you are a abund name
448 rareAbund = ".abund";
451 string group = groupMap->getGroup(names[j]);
453 if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
454 itGroup = groupBins.find(group+rareAbund);
455 if(itGroup == groupBins.end()) {
456 groupBins[group+rareAbund] = names[j]; //add first name
457 groupNumBins[group+rareAbund]++;
458 }else{ //add another name
459 groupBins[group+rareAbund] += "," + names[j];
461 }else if(group == "not found") {
462 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
467 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
468 groupVector[itGroup->first] += itGroup->second + '\t';
473 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
474 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl; // label numBins listvector for that group
475 wroteListFile[it3->first] = true;
476 (*(filehandles[it3->first])).close();
484 catch(exception& e) {
485 m->errorOut(e, "SplitAbundCommand", "writeList");
489 /**********************************************************************************************************************/
490 int SplitAbundCommand::splitNames() { //namefile
498 openInputFile(namefile, in);
501 if (m->control_pressed) { break; }
503 string firstCol, secondCol;
504 in >> firstCol >> secondCol; gobble(in);
506 nameMap[firstCol] = secondCol;
508 int size = getNumNames(secondCol);
510 if (size <= cutoff) {
511 rareNames.insert(firstCol);
513 abundNames.insert(firstCol);
521 catch(exception& e) {
522 m->errorOut(e, "SplitAbundCommand", "splitNames");
526 /**********************************************************************************************************************/
527 int SplitAbundCommand::readNamesFile() {
531 openInputFile(namefile, in);
534 if (m->control_pressed) { break; }
536 string firstCol, secondCol;
537 in >> firstCol >> secondCol; gobble(in);
539 nameMap[firstCol] = secondCol;
546 catch(exception& e) {
547 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
551 /**********************************************************************************************************************/
552 int SplitAbundCommand::createNameMap(ListVector* thisList) {
555 if (thisList != NULL) {
556 for (int i = 0; i < thisList->getNumBins(); i++) {
557 if (m->control_pressed) { return 0; }
559 string bin = thisList->get(i);
561 vector<string> names;
562 splitAtComma(bin, names); //parses bin into individual sequence names
564 for (int j = 0; j < names.size(); j++) { nameMap[names[j]] = names[j]; }
570 catch(exception& e) {
571 m->errorOut(e, "SplitAbundCommand", "createNameMap");
575 /**********************************************************************************************************************/
576 int SplitAbundCommand::writeNames() { //namefile
579 map<string, ofstream*> filehandles;
581 if (Groups.size() == 0) {
585 if (rareNames.size() != 0) {
586 string rare = outputDir + getRootName(getSimpleName(namefile)) + "rare.names";
587 openOutputFile(rare, rout);
588 outputNames.push_back(rare);
590 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
591 rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
596 if (abundNames.size() != 0) {
597 string abund = outputDir + getRootName(getSimpleName(namefile)) + "abund.names";
598 openOutputFile(abund, aout);
599 outputNames.push_back(abund);
601 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
602 aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
607 }else{ //parse names by abundance and group
608 string fileroot = outputDir + getRootName(getSimpleName(namefile));
611 map<string, bool> wroteFile;
612 map<string, ofstream*> filehandles;
613 map<string, ofstream*>::iterator it3;
615 for (int i=0; i<Groups.size(); i++) {
617 filehandles[Groups[i]+".rare"] = temp;
618 temp2 = new ofstream;
619 filehandles[Groups[i]+".abund"] = temp2;
621 openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"]));
622 openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"]));
624 wroteFile[Groups[i] + ".rare"] = false;
625 wroteFile[Groups[i] + ".abund"] = false;
628 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
629 vector<string> names;
630 splitAtComma(itName->second, names); //parses bin into individual sequence names
633 if (rareNames.count(itName->first) != 0) { //you are a rare name
635 }else{ //you are a abund name
636 rareAbund = ".abund";
639 map<string, string> outputStrings;
640 map<string, string>::iterator itout;
641 for (int i = 0; i < names.size(); i++) {
643 string group = groupMap->getGroup(names[i]);
645 if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
646 itout = outputStrings.find(group+rareAbund);
647 if (itout == outputStrings.end()) {
648 outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
649 }else { outputStrings[group+rareAbund] += "," + names[i]; }
650 }else if(group == "not found") {
651 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
655 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) {
656 *(filehandles[itout->first]) << itout->second << endl;
657 wroteFile[itout->first] = true;
662 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
663 (*(filehandles[it3->first])).close();
664 if (wroteFile[it3->first] == true) { outputNames.push_back(fileroot + it3->first + ".names"); }
665 else { remove((it3->first).c_str()); }
673 catch(exception& e) {
674 m->errorOut(e, "SplitAbundCommand", "writeNames");
678 /**********************************************************************************************************************/
679 //just write the unique names - if a namesfile is given
680 int SplitAbundCommand::writeAccnos(string tag) {
683 map<string, ofstream*> filehandles;
685 if (Groups.size() == 0) {
689 if (rareNames.size() != 0) {
690 string rare = outputDir + getRootName(getSimpleName(inputFile)) + tag + "rare.accnos";
691 openOutputFile(rare, rout);
692 outputNames.push_back(rare);
694 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
695 rout << (*itRare) << endl;
700 if (abundNames.size() != 0) {
701 string abund = outputDir + getRootName(getSimpleName(inputFile)) + tag + "abund.accnos";
702 openOutputFile(abund, aout);
703 outputNames.push_back(abund);
705 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
706 aout << (*itAbund) << endl;
710 }else{ //parse names by abundance and group
711 string fileroot = outputDir + getRootName(getSimpleName(inputFile));
714 map<string, bool> wroteFile;
715 map<string, ofstream*> filehandles;
716 map<string, ofstream*>::iterator it3;
718 for (int i=0; i<Groups.size(); i++) {
720 filehandles[Groups[i]+".rare"] = temp;
721 temp2 = new ofstream;
722 filehandles[Groups[i]+".abund"] = temp2;
724 openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"]));
725 openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"]));
727 wroteFile[Groups[i] + ".rare"] = false;
728 wroteFile[Groups[i] + ".abund"] = false;
732 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
733 string group = groupMap->getGroup(*itRare);
735 if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
736 *(filehandles[group+".rare"]) << *itRare << endl;
737 wroteFile[group+".rare"] = true;
742 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
743 string group = groupMap->getGroup(*itAbund);
745 if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
746 *(filehandles[group+".abund"]) << *itAbund << endl;
747 wroteFile[group+".abund"] = true;
752 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
753 (*(filehandles[it3->first])).close();
754 if (wroteFile[it3->first] == true) { outputNames.push_back(fileroot + tag + it3->first + ".accnos"); }
755 else { remove((fileroot + tag + it3->first + ".accnos").c_str()); }
763 catch(exception& e) {
764 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
768 /**********************************************************************************************************************/
769 int SplitAbundCommand::parseGroup(string tag) { //namefile
772 map<string, ofstream*> filehandles;
774 if (Groups.size() == 0) {
778 if (rareNames.size() != 0) {
779 string rare = outputDir + getRootName(getSimpleName(groupfile)) + tag + "rare.groups";
780 openOutputFile(rare, rout);
781 outputNames.push_back(rare);
784 if (abundNames.size() != 0) {
785 string abund = outputDir + getRootName(getSimpleName(groupfile)) + tag + "abund.groups";
786 openOutputFile(abund, aout);
787 outputNames.push_back(abund);
791 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
792 vector<string> names;
793 splitAtComma(itName->second, names); //parses bin into individual sequence names
795 for (int i = 0; i < names.size(); i++) {
797 string group = groupMap->getGroup(names[i]);
799 if (group == "not found") {
800 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
802 if (rareNames.count(itName->first) != 0) { //you are a rare name
803 rout << names[i] << '\t' << group << endl;
804 }else{ //you are a abund name
805 rout << names[i] << '\t' << group << endl;
811 if (rareNames.size() != 0) { rout.close(); }
812 if (abundNames.size() != 0) { aout.close(); }
814 }else{ //parse names by abundance and group
815 string fileroot = outputDir + getRootName(getSimpleName(groupfile));
818 map<string, bool> wroteFile;
819 map<string, ofstream*> filehandles;
820 map<string, ofstream*>::iterator it3;
822 for (int i=0; i<Groups.size(); i++) {
824 filehandles[Groups[i]+".rare"] = temp;
825 temp2 = new ofstream;
826 filehandles[Groups[i]+".abund"] = temp2;
828 openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"]));
829 openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"]));
831 wroteFile[Groups[i] + ".rare"] = false;
832 wroteFile[Groups[i] + ".abund"] = false;
835 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
836 vector<string> names;
837 splitAtComma(itName->second, names); //parses bin into individual sequence names
840 if (rareNames.count(itName->first) != 0) { //you are a rare name
842 }else{ //you are a abund name
843 rareAbund = ".abund";
846 for (int i = 0; i < names.size(); i++) {
848 string group = groupMap->getGroup(names[i]);
850 if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
851 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
852 wroteFile[group+rareAbund] = true;
857 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
858 (*(filehandles[it3->first])).close();
859 if (wroteFile[it3->first] == true) { outputNames.push_back(fileroot + tag + it3->first + ".groups"); }
860 else { remove((fileroot + tag + it3->first + ".groups").c_str()); }
868 catch(exception& e) {
869 m->errorOut(e, "SplitAbundCommand", "parseGroups");
873 /**********************************************************************************************************************/
874 int SplitAbundCommand::parseFasta(string tag) { //namefile
877 map<string, ofstream*> filehandles;
879 if (Groups.size() == 0) {
883 if (rareNames.size() != 0) {
884 string rare = outputDir + getRootName(getSimpleName(fastafile)) + tag + "rare.fasta";
885 openOutputFile(rare, rout);
886 outputNames.push_back(rare);
889 if (abundNames.size() != 0) {
890 string abund = outputDir + getRootName(getSimpleName(fastafile)) + tag + "abund.fasta";
891 openOutputFile(abund, aout);
892 outputNames.push_back(abund);
898 openInputFile(fastafile, in);
901 if (m->control_pressed) { break; }
903 Sequence seq(in); gobble(in);
905 if (seq.getName() != "") {
907 map<string, string>::iterator itNames;
909 itNames = nameMap.find(seq.getName());
911 if (itNames == nameMap.end()) {
912 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
914 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
915 seq.printSequence(rout);
916 }else{ //you are a abund name
917 seq.printSequence(aout);
923 if (rareNames.size() != 0) { rout.close(); }
924 if (abundNames.size() != 0) { aout.close(); }
926 }else{ //parse names by abundance and group
927 string fileroot = outputDir + getRootName(getSimpleName(fastafile));
930 map<string, bool> wroteFile;
931 map<string, ofstream*> filehandles;
932 map<string, ofstream*>::iterator it3;
934 for (int i=0; i<Groups.size(); i++) {
936 filehandles[Groups[i]+".rare"] = temp;
937 temp2 = new ofstream;
938 filehandles[Groups[i]+".abund"] = temp2;
940 openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"]));
941 openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"]));
943 wroteFile[Groups[i] + ".rare"] = false;
944 wroteFile[Groups[i] + ".abund"] = false;
949 openInputFile(fastafile, in);
952 if (m->control_pressed) { break; }
954 Sequence seq(in); gobble(in);
956 if (seq.getName() != "") {
957 map<string, string>::iterator itNames = nameMap.find(seq.getName());
959 if (itNames == nameMap.end()) {
960 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
962 vector<string> names;
963 splitAtComma(itNames->second, names); //parses bin into individual sequence names
966 if (rareNames.count(itNames->first) != 0) { //you are a rare name
968 }else{ //you are a abund name
969 rareAbund = ".abund";
972 for (int i = 0; i < names.size(); i++) {
974 string group = groupMap->getGroup(seq.getName());
976 if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
977 seq.printSequence(*(filehandles[group+rareAbund]));
978 wroteFile[group+rareAbund] = true;
979 }else if(group == "not found") {
980 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
988 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
989 (*(filehandles[it3->first])).close();
990 if (wroteFile[it3->first] == true) { outputNames.push_back(fileroot + tag + it3->first + ".fasta"); }
991 else { remove((fileroot + tag + it3->first + ".fasta").c_str()); }
999 catch(exception& e) {
1000 m->errorOut(e, "SplitAbundCommand", "parseFasta");
1004 /**********************************************************************************************************************/