2 * splitabundcommand.cpp
5 * Created by westcott on 5/17/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "splitabundcommand.h"
12 //**********************************************************************************************************************
13 SplitAbundCommand::SplitAbundCommand(string option) {
18 //allow user to run help
19 if(option == "help") { help(); abort = true; }
22 //valid paramters for this command
23 string Array[] = {"name","group","label","accnos","groups","fasta","cutoff","outputdir","inputdir"}; //"list",
24 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26 OptionParser parser(option);
27 map<string, string> parameters = parser.getParameters();
29 ValidParameters validParameter;
30 map<string, string>::iterator it;
32 //check to make sure all parameters are valid for command
33 for (it = parameters.begin(); it != parameters.end(); it++) {
34 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
37 //if the user changes the input directory command factory will send this info to us in the output parameter
38 string inputDir = validParameter.validFile(parameters, "inputdir", false);
39 if (inputDir == "not found"){ inputDir = ""; }
42 it = parameters.find("list");
43 //user has given a template file
44 if(it != parameters.end()){
45 path = hasPath(it->second);
46 //if the user has not given a path then, add inputdir. else leave path alone.
47 if (path == "") { parameters["list"] = inputDir + it->second; }
50 it = parameters.find("group");
51 //user has given a template file
52 if(it != parameters.end()){
53 path = hasPath(it->second);
54 //if the user has not given a path then, add inputdir. else leave path alone.
55 if (path == "") { parameters["group"] = inputDir + it->second; }
58 it = parameters.find("fasta");
59 //user has given a template file
60 if(it != parameters.end()){
61 path = hasPath(it->second);
62 //if the user has not given a path then, add inputdir. else leave path alone.
63 if (path == "") { parameters["fasta"] = inputDir + it->second; }
66 it = parameters.find("name");
67 //user has given a template file
68 if(it != parameters.end()){
69 path = hasPath(it->second);
70 //if the user has not given a path then, add inputdir. else leave path alone.
71 if (path == "") { parameters["name"] = inputDir + it->second; }
77 //if the user changes the output directory command factory will send this info to us in the output parameter
78 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
80 //check for required parameters
81 listfile = validParameter.validFile(parameters, "list", true);
82 if (listfile == "not open") { abort = true; }
83 else if (listfile == "not found") { listfile = ""; }
84 else{ inputFile = listfile; }
86 namefile = validParameter.validFile(parameters, "name", true);
87 if (namefile == "not open") { abort = true; }
88 else if (namefile == "not found") { namefile = ""; }
89 else{ inputFile = namefile; }
91 fastafile = validParameter.validFile(parameters, "fasta", true);
92 if (fastafile == "not open") { abort = true; }
93 else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
95 groupfile = validParameter.validFile(parameters, "group", true);
96 if (groupfile == "not open") { groupfile = ""; abort = true; }
97 else if (groupfile == "not found") { groupfile = ""; }
99 groupMap = new GroupMap(groupfile);
101 int error = groupMap->readMap();
102 if (error == 1) { abort = true; }
106 groups = validParameter.validFile(parameters, "groups", false);
107 if (groups == "not found") { groups = ""; }
108 else if (groups == "all") {
109 if (groupfile != "") { Groups = groupMap->namesOfGroups; }
110 else { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; }
112 splitAtDash(groups, Groups);
115 if ((groupfile == "") && (groups != "")) { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; Groups.clear(); }
117 //do you have all files needed
118 if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
120 //check for optional parameter and set defaults
121 // ...at some point should added some additional type checking...
122 label = validParameter.validFile(parameters, "label", false);
123 if (label == "not found") { label = ""; allLines = 1; }
125 if(label != "all") { splitAtDash(label, labels); allLines = 0; }
126 else { allLines = 1; }
129 string temp = validParameter.validFile(parameters, "accnos", false); if (temp == "not found") { temp = "F"; }
130 accnos = isTrue(temp);
132 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0"; }
133 convert(temp, cutoff);
135 if (cutoff == 0) { m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
140 catch(exception& e) {
141 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
145 //**********************************************************************************************************************
146 void SplitAbundCommand::help(){
148 m->mothurOut("The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n");
149 m->mothurOut("The split.abund command parameters are list, name, cutoff, group, label, groups and accnos.\n");
150 m->mothurOut("The fasta and a list or name parameter are required, and you must provide a cutoff value.\n");
151 m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n");
152 m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n");
153 m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n");
154 m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n");
155 m->mothurOut("The groups parameter allows you to parse the files into rare and abundant files by group. \n");
156 m->mothurOut("For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files. \n");
157 m->mothurOut("If you want .abund and .rare files for all groups, set groups=all. \n");
158 m->mothurOut("The split.abund command should be used in the following format: split.abund(list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n");
159 m->mothurOut("Example: split.abundt(list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n");
160 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
163 catch(exception& e) {
164 m->errorOut(e, "SplitAbundCommand", "help");
168 //**********************************************************************************************************************
169 SplitAbundCommand::~SplitAbundCommand(){
170 if (groupfile != "") { delete groupMap; }
172 //**********************************************************************************************************************
173 int SplitAbundCommand::execute(){
176 if (abort == true) { return 0; }
178 if (listfile != "") { //you are using a listfile to determine abundance
180 //remove old files so you can append later....
181 string fileroot = outputDir + getRootName(getSimpleName(listfile));
182 if (Groups.size() == 0) {
183 remove((fileroot + "rare.list").c_str());
184 remove((fileroot + "abund.list").c_str());
186 wroteListFile["rare"] = false;
187 wroteListFile["abund"] = false;
189 for (int i=0; i<Groups.size(); i++) {
190 remove((fileroot + Groups[i] + ".rare.list").c_str());
191 remove((fileroot + Groups[i] + ".abund.list").c_str());
193 wroteListFile[(Groups[i] + ".rare")] = false;
194 wroteListFile[(Groups[i] + ".abund")] = false;
198 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
199 set<string> processedLabels;
200 set<string> userLabels = labels;
202 input = new InputData(listfile, "list");
203 list = input->getListVector();
204 string lastLabel = list->getLabel();
206 //do you have a namefile or do we need to similate one?
207 if (namefile != "") { readNamesFile(); }
208 else { createNameMap(list); }
210 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
212 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
214 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
216 if(allLines == 1 || labels.count(list->getLabel()) == 1){
218 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
221 processedLabels.insert(list->getLabel());
222 userLabels.erase(list->getLabel());
225 if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
226 string saveLabel = list->getLabel();
229 list = input->getListVector(lastLabel); //get new list vector to process
231 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
234 processedLabels.insert(list->getLabel());
235 userLabels.erase(list->getLabel());
237 //restore real lastlabel to save below
238 list->setLabel(saveLabel);
242 lastLabel = list->getLabel();
245 list = input->getListVector(); //get new list vector to process
248 if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
250 //output error messages about any remaining user labels
251 set<string>::iterator it;
252 bool needToRun = false;
253 for (it = userLabels.begin(); it != userLabels.end(); it++) {
254 m->mothurOut("Your file does not include the label " + *it);
255 if (processedLabels.count(lastLabel) != 1) {
256 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
259 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
264 if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
266 //run last label if you need to
267 if (needToRun == true) {
268 if (list != NULL) { delete list; }
269 list = input->getListVector(lastLabel); //get new list vector to process
271 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
279 for (map<string, bool>::iterator itBool = wroteListFile.begin(); itBool != wroteListFile.end(); itBool++) {
280 string filename = fileroot + itBool->first;
281 if (itBool->second) { //we wrote to this file
282 outputNames.push_back(filename);
284 remove(filename.c_str());
288 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
291 }else { //you are using the namefile to determine abundance
297 if (groupfile != "") { parseGroup(tag); }
298 if (accnos) { writeAccnos(tag); }
299 if (fastafile != "") { parseFasta(tag); }
302 m->mothurOutEndLine();
303 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
304 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
305 m->mothurOutEndLine();
309 catch(exception& e) {
310 m->errorOut(e, "SplitAbundCommand", "execute");
314 /**********************************************************************************************************************/
315 int SplitAbundCommand::splitList(ListVector* thisList) {
320 //get rareNames and abundNames
321 for (int i = 0; i < thisList->getNumBins(); i++) {
322 if (m->control_pressed) { return 0; }
324 string bin = thisList->get(i);
326 vector<string> names;
327 splitAtComma(bin, names); //parses bin into individual sequence names
328 int size = names.size();
330 if (size <= cutoff) {
331 for (int j = 0; j < names.size(); j++) { rareNames.insert(names[j]); }
333 for (int j = 0; j < names.size(); j++) { abundNames.insert(names[j]); }
339 string tag = thisList->getLabel() + ".";
340 if (groupfile != "") { parseGroup(tag); }
341 if (accnos) { writeAccnos(tag); }
342 if (fastafile != "") { parseFasta(tag); }
347 catch(exception& e) {
348 m->errorOut(e, "SplitAbundCommand", "splitList");
352 /**********************************************************************************************************************/
353 int SplitAbundCommand::writeList(ListVector* thisList) {
356 map<string, ofstream*> filehandles;
358 if (Groups.size() == 0) {
359 SAbundVector* sabund = new SAbundVector();
360 *sabund = thisList->getSAbundVector();
362 //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
363 // and don't have to store the bins until you are done with the whole vector, this save alot of space.
365 for (int i = 0; i <= sabund->getMaxRank(); i++) {
366 if (i > cutoff) { break; }
367 numRareBins += sabund->get(i);
369 int numAbundBins = thisList->getNumBins() - numRareBins;
375 if (rareNames.size() != 0) {
376 string rare = outputDir + getRootName(getSimpleName(listfile)) + ".rare.list";
377 wroteListFile["rare"] = true;
378 openOutputFileAppend(rare, rout);
379 rout << thisList->getLabel() << '\t' << numRareBins << '\t';
382 if (abundNames.size() != 0) {
383 string abund = outputDir + getRootName(getSimpleName(listfile)) + ".abund.list";
384 wroteListFile["abund"] = true;
385 openOutputFileAppend(abund, aout);
386 rout << thisList->getLabel() << '\t' << numAbundBins << '\t';
389 for (int i = 0; i < thisList->getNumBins(); i++) {
390 if (m->control_pressed) { break; }
392 string bin = list->get(i);
394 int size = getNumNames(bin);
396 if (size <= cutoff) { rout << bin << '\t'; }
397 else { aout << bin << '\t'; }
400 if (rareNames.size() != 0) { rout << endl; rout.close(); }
401 if (abundNames.size() != 0) { aout << endl; aout.close(); }
403 }else{ //parse names by abundance and group
404 string fileroot = outputDir + getRootName(getSimpleName(listfile));
407 map<string, bool> wroteFile;
408 map<string, ofstream*> filehandles;
409 map<string, ofstream*>::iterator it3;
411 for (int i=0; i<Groups.size(); i++) {
413 filehandles[Groups[i]+".rare"] = temp;
414 temp2 = new ofstream;
415 filehandles[Groups[i]+".abund"] = temp2;
417 openOutputFileAppend(fileroot + Groups[i] + ".rare.list", *(filehandles[Groups[i]+".rare"]));
418 openOutputFileAppend(fileroot + Groups[i] + ".abund.list", *(filehandles[Groups[i]+".abund"]));
421 map<string, string> groupVector;
422 map<string, string>::iterator itGroup;
423 map<string, int> groupNumBins;
425 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
426 groupNumBins[it3->first] = 0;
427 groupVector[it3->first] = "";
430 for (int i = 0; i < thisList->getNumBins(); i++) {
431 if (m->control_pressed) { break; }
433 map<string, string> groupBins;
434 string bin = list->get(i);
436 vector<string> names;
437 splitAtComma(bin, names); //parses bin into individual sequence names
439 //parse bin into list of sequences in each group
440 for (int j = 0; j < names.size(); j++) {
442 if (rareNames.count(names[j]) != 0) { //you are a rare name
444 }else{ //you are a abund name
445 rareAbund = ".abund";
448 string group = groupMap->getGroup(names[j]);
450 if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
451 itGroup = groupBins.find(group+rareAbund);
452 if(itGroup == groupBins.end()) {
453 groupBins[group+rareAbund] = names[j]; //add first name
454 groupNumBins[group+rareAbund]++;
455 }else{ //add another name
456 groupBins[group+rareAbund] += "," + names[j];
458 }else if(group == "not found") {
459 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
464 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
465 groupVector[itGroup->first] += itGroup->second + '\t';
470 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
471 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl; // label numBins listvector for that group
472 wroteListFile[it3->first] = true;
473 (*(filehandles[it3->first])).close();
481 catch(exception& e) {
482 m->errorOut(e, "SplitAbundCommand", "writeList");
486 /**********************************************************************************************************************/
487 int SplitAbundCommand::splitNames() { //namefile
495 openInputFile(namefile, in);
498 if (m->control_pressed) { break; }
500 string firstCol, secondCol;
501 in >> firstCol >> secondCol; gobble(in);
503 nameMap[firstCol] = secondCol;
505 int size = getNumNames(secondCol);
507 if (size <= cutoff) {
508 rareNames.insert(firstCol);
510 abundNames.insert(firstCol);
518 catch(exception& e) {
519 m->errorOut(e, "SplitAbundCommand", "splitNames");
523 /**********************************************************************************************************************/
524 int SplitAbundCommand::readNamesFile() {
528 openInputFile(namefile, in);
531 if (m->control_pressed) { break; }
533 string firstCol, secondCol;
534 in >> firstCol >> secondCol; gobble(in);
536 nameMap[firstCol] = secondCol;
543 catch(exception& e) {
544 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
548 /**********************************************************************************************************************/
549 int SplitAbundCommand::createNameMap(ListVector* thisList) {
552 if (thisList != NULL) {
553 for (int i = 0; i < thisList->getNumBins(); i++) {
554 if (m->control_pressed) { return 0; }
556 string bin = thisList->get(i);
558 vector<string> names;
559 splitAtComma(bin, names); //parses bin into individual sequence names
561 for (int j = 0; j < names.size(); j++) { nameMap[names[j]] = names[j]; }
567 catch(exception& e) {
568 m->errorOut(e, "SplitAbundCommand", "createNameMap");
572 /**********************************************************************************************************************/
573 int SplitAbundCommand::writeNames() { //namefile
576 map<string, ofstream*> filehandles;
578 if (Groups.size() == 0) {
582 if (rareNames.size() != 0) {
583 string rare = outputDir + getRootName(getSimpleName(namefile)) + "rare.names";
584 openOutputFile(rare, rout);
585 outputNames.push_back(rare);
587 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
588 rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
593 if (abundNames.size() != 0) {
594 string abund = outputDir + getRootName(getSimpleName(namefile)) + "abund.names";
595 openOutputFile(abund, aout);
596 outputNames.push_back(abund);
598 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
599 aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
604 }else{ //parse names by abundance and group
605 string fileroot = outputDir + getRootName(getSimpleName(namefile));
608 map<string, bool> wroteFile;
609 map<string, ofstream*> filehandles;
610 map<string, ofstream*>::iterator it3;
612 for (int i=0; i<Groups.size(); i++) {
614 filehandles[Groups[i]+".rare"] = temp;
615 temp2 = new ofstream;
616 filehandles[Groups[i]+".abund"] = temp2;
618 openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"]));
619 openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"]));
621 wroteFile[Groups[i] + ".rare"] = false;
622 wroteFile[Groups[i] + ".abund"] = false;
625 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
626 vector<string> names;
627 splitAtComma(itName->second, names); //parses bin into individual sequence names
630 if (rareNames.count(itName->first) != 0) { //you are a rare name
632 }else{ //you are a abund name
633 rareAbund = ".abund";
636 map<string, string> outputStrings;
637 map<string, string>::iterator itout;
638 for (int i = 0; i < names.size(); i++) {
640 string group = groupMap->getGroup(names[i]);
642 if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
643 itout = outputStrings.find(group+rareAbund);
644 if (itout == outputStrings.end()) {
645 outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
646 }else { outputStrings[group+rareAbund] += "," + names[i]; }
647 }else if(group == "not found") {
648 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
652 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) {
653 *(filehandles[itout->first]) << itout->second << endl;
654 wroteFile[itout->first] = true;
659 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
660 (*(filehandles[it3->first])).close();
661 if (wroteFile[it3->first] == true) { outputNames.push_back(fileroot + it3->first + ".names"); }
662 else { remove((it3->first).c_str()); }
670 catch(exception& e) {
671 m->errorOut(e, "SplitAbundCommand", "writeNames");
675 /**********************************************************************************************************************/
676 //just write the unique names - if a namesfile is given
677 int SplitAbundCommand::writeAccnos(string tag) {
680 map<string, ofstream*> filehandles;
682 if (Groups.size() == 0) {
686 if (rareNames.size() != 0) {
687 string rare = outputDir + getRootName(getSimpleName(inputFile)) + tag + "rare.accnos";
688 openOutputFile(rare, rout);
689 outputNames.push_back(rare);
691 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
692 rout << (*itRare) << endl;
697 if (abundNames.size() != 0) {
698 string abund = outputDir + getRootName(getSimpleName(inputFile)) + tag + "abund.accnos";
699 openOutputFile(abund, aout);
700 outputNames.push_back(abund);
702 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
703 aout << (*itAbund) << endl;
707 }else{ //parse names by abundance and group
708 string fileroot = outputDir + getRootName(getSimpleName(inputFile));
711 map<string, bool> wroteFile;
712 map<string, ofstream*> filehandles;
713 map<string, ofstream*>::iterator it3;
715 for (int i=0; i<Groups.size(); i++) {
717 filehandles[Groups[i]+".rare"] = temp;
718 temp2 = new ofstream;
719 filehandles[Groups[i]+".abund"] = temp2;
721 openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"]));
722 openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"]));
724 wroteFile[Groups[i] + ".rare"] = false;
725 wroteFile[Groups[i] + ".abund"] = false;
729 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
730 string group = groupMap->getGroup(*itRare);
732 if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
733 *(filehandles[group+".rare"]) << *itRare << endl;
734 wroteFile[group+".rare"] = true;
739 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
740 string group = groupMap->getGroup(*itAbund);
742 if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
743 *(filehandles[group+".abund"]) << *itAbund << endl;
744 wroteFile[group+".abund"] = true;
749 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
750 (*(filehandles[it3->first])).close();
751 if (wroteFile[it3->first] == true) { outputNames.push_back(fileroot + tag + it3->first + ".accnos"); }
752 else { remove((fileroot + tag + it3->first + ".accnos").c_str()); }
760 catch(exception& e) {
761 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
765 /**********************************************************************************************************************/
766 int SplitAbundCommand::parseGroup(string tag) { //namefile
769 map<string, ofstream*> filehandles;
771 if (Groups.size() == 0) {
775 if (rareNames.size() != 0) {
776 string rare = outputDir + getRootName(getSimpleName(groupfile)) + tag + "rare.groups";
777 openOutputFile(rare, rout);
778 outputNames.push_back(rare);
781 if (abundNames.size() != 0) {
782 string abund = outputDir + getRootName(getSimpleName(groupfile)) + tag + "abund.groups";
783 openOutputFile(abund, aout);
784 outputNames.push_back(abund);
788 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
789 vector<string> names;
790 splitAtComma(itName->second, names); //parses bin into individual sequence names
792 for (int i = 0; i < names.size(); i++) {
794 string group = groupMap->getGroup(names[i]);
796 if (group == "not found") {
797 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
799 if (rareNames.count(itName->first) != 0) { //you are a rare name
800 rout << names[i] << '\t' << group << endl;
801 }else{ //you are a abund name
802 rout << names[i] << '\t' << group << endl;
808 if (rareNames.size() != 0) { rout.close(); }
809 if (abundNames.size() != 0) { aout.close(); }
811 }else{ //parse names by abundance and group
812 string fileroot = outputDir + getRootName(getSimpleName(groupfile));
815 map<string, bool> wroteFile;
816 map<string, ofstream*> filehandles;
817 map<string, ofstream*>::iterator it3;
819 for (int i=0; i<Groups.size(); i++) {
821 filehandles[Groups[i]+".rare"] = temp;
822 temp2 = new ofstream;
823 filehandles[Groups[i]+".abund"] = temp2;
825 openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"]));
826 openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"]));
828 wroteFile[Groups[i] + ".rare"] = false;
829 wroteFile[Groups[i] + ".abund"] = false;
832 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
833 vector<string> names;
834 splitAtComma(itName->second, names); //parses bin into individual sequence names
837 if (rareNames.count(itName->first) != 0) { //you are a rare name
839 }else{ //you are a abund name
840 rareAbund = ".abund";
843 for (int i = 0; i < names.size(); i++) {
845 string group = groupMap->getGroup(names[i]);
847 if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
848 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
849 wroteFile[group+rareAbund] = true;
854 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
855 (*(filehandles[it3->first])).close();
856 if (wroteFile[it3->first] == true) { outputNames.push_back(fileroot + tag + it3->first + ".groups"); }
857 else { remove((fileroot + tag + it3->first + ".groups").c_str()); }
865 catch(exception& e) {
866 m->errorOut(e, "SplitAbundCommand", "parseGroups");
870 /**********************************************************************************************************************/
871 int SplitAbundCommand::parseFasta(string tag) { //namefile
874 map<string, ofstream*> filehandles;
876 if (Groups.size() == 0) {
880 if (rareNames.size() != 0) {
881 string rare = outputDir + getRootName(getSimpleName(fastafile)) + tag + "rare.fasta";
882 openOutputFile(rare, rout);
883 outputNames.push_back(rare);
886 if (abundNames.size() != 0) {
887 string abund = outputDir + getRootName(getSimpleName(fastafile)) + tag + "abund.fasta";
888 openOutputFile(abund, aout);
889 outputNames.push_back(abund);
895 openInputFile(fastafile, in);
898 if (m->control_pressed) { break; }
900 Sequence seq(in); gobble(in);
902 if (seq.getName() != "") {
904 map<string, string>::iterator itNames;
906 itNames = nameMap.find(seq.getName());
908 if (itNames == nameMap.end()) {
909 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
911 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
912 seq.printSequence(rout);
913 }else{ //you are a abund name
914 seq.printSequence(aout);
920 if (rareNames.size() != 0) { rout.close(); }
921 if (abundNames.size() != 0) { aout.close(); }
923 }else{ //parse names by abundance and group
924 string fileroot = outputDir + getRootName(getSimpleName(fastafile));
927 map<string, bool> wroteFile;
928 map<string, ofstream*> filehandles;
929 map<string, ofstream*>::iterator it3;
931 for (int i=0; i<Groups.size(); i++) {
933 filehandles[Groups[i]+".rare"] = temp;
934 temp2 = new ofstream;
935 filehandles[Groups[i]+".abund"] = temp2;
937 openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"]));
938 openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"]));
940 wroteFile[Groups[i] + ".rare"] = false;
941 wroteFile[Groups[i] + ".abund"] = false;
946 openInputFile(fastafile, in);
949 if (m->control_pressed) { break; }
951 Sequence seq(in); gobble(in);
953 if (seq.getName() != "") {
954 map<string, string>::iterator itNames = nameMap.find(seq.getName());
956 if (itNames == nameMap.end()) {
957 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
959 vector<string> names;
960 splitAtComma(itNames->second, names); //parses bin into individual sequence names
963 if (rareNames.count(itNames->first) != 0) { //you are a rare name
965 }else{ //you are a abund name
966 rareAbund = ".abund";
969 for (int i = 0; i < names.size(); i++) {
971 string group = groupMap->getGroup(seq.getName());
973 if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
974 seq.printSequence(*(filehandles[group+rareAbund]));
975 wroteFile[group+rareAbund] = true;
976 }else if(group == "not found") {
977 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
985 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
986 (*(filehandles[it3->first])).close();
987 if (wroteFile[it3->first] == true) { outputNames.push_back(fileroot + tag + it3->first + ".fasta"); }
988 else { remove((fileroot + tag + it3->first + ".fasta").c_str()); }
996 catch(exception& e) {
997 m->errorOut(e, "SplitAbundCommand", "parseFasta");
1001 /**********************************************************************************************************************/