2 * splitabundcommand.cpp
5 * Created by westcott on 5/17/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "splitabundcommand.h"
12 //**********************************************************************************************************************
13 SplitAbundCommand::SplitAbundCommand(string option) {
18 //allow user to run help
19 if(option == "help") { help(); abort = true; }
22 //valid paramters for this command
23 string Array[] = {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"}; //
24 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
26 OptionParser parser(option);
27 map<string, string> parameters = parser.getParameters();
29 ValidParameters validParameter;
30 map<string, string>::iterator it;
32 //check to make sure all parameters are valid for command
33 for (it = parameters.begin(); it != parameters.end(); it++) {
34 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
37 //if the user changes the input directory command factory will send this info to us in the output parameter
38 string inputDir = validParameter.validFile(parameters, "inputdir", false);
39 if (inputDir == "not found"){ inputDir = ""; }
42 it = parameters.find("list");
43 //user has given a template file
44 if(it != parameters.end()){
45 path = hasPath(it->second);
46 //if the user has not given a path then, add inputdir. else leave path alone.
47 if (path == "") { parameters["list"] = inputDir + it->second; }
50 it = parameters.find("group");
51 //user has given a template file
52 if(it != parameters.end()){
53 path = hasPath(it->second);
54 //if the user has not given a path then, add inputdir. else leave path alone.
55 if (path == "") { parameters["group"] = inputDir + it->second; }
58 it = parameters.find("fasta");
59 //user has given a template file
60 if(it != parameters.end()){
61 path = hasPath(it->second);
62 //if the user has not given a path then, add inputdir. else leave path alone.
63 if (path == "") { parameters["fasta"] = inputDir + it->second; }
66 it = parameters.find("name");
67 //user has given a template file
68 if(it != parameters.end()){
69 path = hasPath(it->second);
70 //if the user has not given a path then, add inputdir. else leave path alone.
71 if (path == "") { parameters["name"] = inputDir + it->second; }
77 //if the user changes the output directory command factory will send this info to us in the output parameter
78 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
80 //check for required parameters
81 listfile = validParameter.validFile(parameters, "list", true);
82 if (listfile == "not open") { abort = true; }
83 else if (listfile == "not found") { listfile = ""; }
84 else{ inputFile = listfile; }
86 namefile = validParameter.validFile(parameters, "name", true);
87 if (namefile == "not open") { abort = true; }
88 else if (namefile == "not found") { namefile = ""; }
89 else{ inputFile = namefile; }
91 fastafile = validParameter.validFile(parameters, "fasta", true);
92 if (fastafile == "not open") { abort = true; }
93 else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
95 groupfile = validParameter.validFile(parameters, "group", true);
96 if (groupfile == "not open") { groupfile = ""; abort = true; }
97 else if (groupfile == "not found") { groupfile = ""; }
99 groupMap = new GroupMap(groupfile);
101 int error = groupMap->readMap();
102 if (error == 1) { abort = true; }
106 groups = validParameter.validFile(parameters, "groups", false);
107 if (groups == "not found") { groups = ""; }
108 else if (groups == "all") {
109 if (groupfile != "") { Groups = groupMap->namesOfGroups; }
110 else { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; }
112 splitAtDash(groups, Groups);
115 if ((groupfile == "") && (groups != "")) { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; Groups.clear(); }
117 //do you have all files needed
118 if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
120 //check for optional parameter and set defaults
121 // ...at some point should added some additional type checking...
122 label = validParameter.validFile(parameters, "label", false);
123 if (label == "not found") { label = ""; allLines = 1; }
125 if(label != "all") { splitAtDash(label, labels); allLines = 0; }
126 else { allLines = 1; }
129 string temp = validParameter.validFile(parameters, "accnos", false); if (temp == "not found") { temp = "F"; }
130 accnos = isTrue(temp);
132 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0"; }
133 convert(temp, cutoff);
135 if (cutoff == 0) { m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
140 catch(exception& e) {
141 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
145 //**********************************************************************************************************************
146 void SplitAbundCommand::help(){
148 m->mothurOut("The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n");
149 m->mothurOut("The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n");
150 m->mothurOut("The fasta and a list or name parameter are required, and you must provide a cutoff value.\n");
151 m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n");
152 m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n");
153 m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n");
154 m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n");
155 m->mothurOut("The groups parameter allows you to parse the files into rare and abundant files by group. \n");
156 m->mothurOut("For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files. \n");
157 m->mothurOut("If you want .abund and .rare files for all groups, set groups=all. \n");
158 m->mothurOut("The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n");
159 m->mothurOut("Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n");
160 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
163 catch(exception& e) {
164 m->errorOut(e, "SplitAbundCommand", "help");
168 //**********************************************************************************************************************
169 SplitAbundCommand::~SplitAbundCommand(){
170 if (groupfile != "") { delete groupMap; }
172 //**********************************************************************************************************************
173 int SplitAbundCommand::execute(){
176 if (abort == true) { return 0; }
178 if (listfile != "") { //you are using a listfile to determine abundance
179 if (outputDir == "") { outputDir = hasPath(listfile); }
181 //remove old files so you can append later....
182 string fileroot = outputDir + getRootName(getSimpleName(listfile));
183 if (Groups.size() == 0) {
184 remove((fileroot + "rare.list").c_str());
185 remove((fileroot + "abund.list").c_str());
187 wroteListFile["rare"] = false;
188 wroteListFile["abund"] = false;
190 for (int i=0; i<Groups.size(); i++) {
191 remove((fileroot + Groups[i] + ".rare.list").c_str());
192 remove((fileroot + Groups[i] + ".abund.list").c_str());
194 wroteListFile[(Groups[i] + ".rare")] = false;
195 wroteListFile[(Groups[i] + ".abund")] = false;
199 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
200 set<string> processedLabels;
201 set<string> userLabels = labels;
203 input = new InputData(listfile, "list");
204 list = input->getListVector();
205 string lastLabel = list->getLabel();
207 //do you have a namefile or do we need to similate one?
208 if (namefile != "") { readNamesFile(); }
209 else { createNameMap(list); }
211 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
213 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
215 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
217 if(allLines == 1 || labels.count(list->getLabel()) == 1){
219 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
222 processedLabels.insert(list->getLabel());
223 userLabels.erase(list->getLabel());
226 if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
227 string saveLabel = list->getLabel();
230 list = input->getListVector(lastLabel); //get new list vector to process
232 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
235 processedLabels.insert(list->getLabel());
236 userLabels.erase(list->getLabel());
238 //restore real lastlabel to save below
239 list->setLabel(saveLabel);
243 lastLabel = list->getLabel();
246 list = input->getListVector(); //get new list vector to process
249 if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
251 //output error messages about any remaining user labels
252 set<string>::iterator it;
253 bool needToRun = false;
254 for (it = userLabels.begin(); it != userLabels.end(); it++) {
255 m->mothurOut("Your file does not include the label " + *it);
256 if (processedLabels.count(lastLabel) != 1) {
257 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
260 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
265 if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
267 //run last label if you need to
268 if (needToRun == true) {
269 if (list != NULL) { delete list; }
270 list = input->getListVector(lastLabel); //get new list vector to process
272 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
280 for (map<string, bool>::iterator itBool = wroteListFile.begin(); itBool != wroteListFile.end(); itBool++) {
281 string filename = fileroot + itBool->first;
282 if ((itBool->first == "rare") || (itBool->first == "abund")) {
283 filename = fileroot + itBool->first + ".list";
285 if (itBool->second) { //we wrote to this file
286 outputNames.push_back(filename);
288 remove(filename.c_str());
292 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
295 }else { //you are using the namefile to determine abundance
296 if (outputDir == "") { outputDir = hasPath(namefile); }
302 if (groupfile != "") { parseGroup(tag); }
303 if (accnos) { writeAccnos(tag); }
304 if (fastafile != "") { parseFasta(tag); }
307 m->mothurOutEndLine();
308 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
309 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
310 m->mothurOutEndLine();
314 catch(exception& e) {
315 m->errorOut(e, "SplitAbundCommand", "execute");
319 /**********************************************************************************************************************/
320 int SplitAbundCommand::splitList(ListVector* thisList) {
325 //get rareNames and abundNames
326 for (int i = 0; i < thisList->getNumBins(); i++) {
327 if (m->control_pressed) { return 0; }
329 string bin = thisList->get(i);
331 vector<string> names;
332 splitAtComma(bin, names); //parses bin into individual sequence names
333 int size = names.size();
335 if (size <= cutoff) {
336 for (int j = 0; j < names.size(); j++) { rareNames.insert(names[j]); }
338 for (int j = 0; j < names.size(); j++) { abundNames.insert(names[j]); }
344 string tag = thisList->getLabel() + ".";
345 if (groupfile != "") { parseGroup(tag); }
346 if (accnos) { writeAccnos(tag); }
347 if (fastafile != "") { parseFasta(tag); }
352 catch(exception& e) {
353 m->errorOut(e, "SplitAbundCommand", "splitList");
357 /**********************************************************************************************************************/
358 int SplitAbundCommand::writeList(ListVector* thisList) {
361 map<string, ofstream*> filehandles;
363 if (Groups.size() == 0) {
364 SAbundVector* sabund = new SAbundVector();
365 *sabund = thisList->getSAbundVector();
367 //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
368 // and don't have to store the bins until you are done with the whole vector, this save alot of space.
370 for (int i = 0; i <= sabund->getMaxRank(); i++) {
371 if (i > cutoff) { break; }
372 numRareBins += sabund->get(i);
374 int numAbundBins = thisList->getNumBins() - numRareBins;
380 if (rareNames.size() != 0) {
381 string rare = outputDir + getRootName(getSimpleName(listfile)) + "rare.list";
382 wroteListFile["rare"] = true;
383 openOutputFileAppend(rare, rout);
384 rout << thisList->getLabel() << '\t' << numRareBins << '\t';
387 if (abundNames.size() != 0) {
388 string abund = outputDir + getRootName(getSimpleName(listfile)) + "abund.list";
389 wroteListFile["abund"] = true;
390 openOutputFileAppend(abund, aout);
391 aout << thisList->getLabel() << '\t' << numAbundBins << '\t';
394 for (int i = 0; i < thisList->getNumBins(); i++) {
395 if (m->control_pressed) { break; }
397 string bin = list->get(i);
399 int size = getNumNames(bin);
401 if (size <= cutoff) { rout << bin << '\t'; }
402 else { aout << bin << '\t'; }
405 if (rareNames.size() != 0) { rout << endl; rout.close(); }
406 if (abundNames.size() != 0) { aout << endl; aout.close(); }
408 }else{ //parse names by abundance and group
409 string fileroot = outputDir + getRootName(getSimpleName(listfile));
412 map<string, bool> wroteFile;
413 map<string, ofstream*> filehandles;
414 map<string, ofstream*>::iterator it3;
416 for (int i=0; i<Groups.size(); i++) {
418 filehandles[Groups[i]+".rare"] = temp;
419 temp2 = new ofstream;
420 filehandles[Groups[i]+".abund"] = temp2;
422 openOutputFileAppend(fileroot + Groups[i] + ".rare.list", *(filehandles[Groups[i]+".rare"]));
423 openOutputFileAppend(fileroot + Groups[i] + ".abund.list", *(filehandles[Groups[i]+".abund"]));
426 map<string, string> groupVector;
427 map<string, string>::iterator itGroup;
428 map<string, int> groupNumBins;
430 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
431 groupNumBins[it3->first] = 0;
432 groupVector[it3->first] = "";
435 for (int i = 0; i < thisList->getNumBins(); i++) {
436 if (m->control_pressed) { break; }
438 map<string, string> groupBins;
439 string bin = list->get(i);
441 vector<string> names;
442 splitAtComma(bin, names); //parses bin into individual sequence names
444 //parse bin into list of sequences in each group
445 for (int j = 0; j < names.size(); j++) {
447 if (rareNames.count(names[j]) != 0) { //you are a rare name
449 }else{ //you are a abund name
450 rareAbund = ".abund";
453 string group = groupMap->getGroup(names[j]);
455 if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
456 itGroup = groupBins.find(group+rareAbund);
457 if(itGroup == groupBins.end()) {
458 groupBins[group+rareAbund] = names[j]; //add first name
459 groupNumBins[group+rareAbund]++;
460 }else{ //add another name
461 groupBins[group+rareAbund] += "," + names[j];
463 }else if(group == "not found") {
464 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
469 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
470 groupVector[itGroup->first] += itGroup->second + '\t';
475 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
476 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl; // label numBins listvector for that group
477 wroteListFile[it3->first] = true;
478 (*(filehandles[it3->first])).close();
486 catch(exception& e) {
487 m->errorOut(e, "SplitAbundCommand", "writeList");
491 /**********************************************************************************************************************/
492 int SplitAbundCommand::splitNames() { //namefile
500 openInputFile(namefile, in);
503 if (m->control_pressed) { break; }
505 string firstCol, secondCol;
506 in >> firstCol >> secondCol; gobble(in);
508 nameMap[firstCol] = secondCol;
510 int size = getNumNames(secondCol);
512 if (size <= cutoff) {
513 rareNames.insert(firstCol);
515 abundNames.insert(firstCol);
523 catch(exception& e) {
524 m->errorOut(e, "SplitAbundCommand", "splitNames");
528 /**********************************************************************************************************************/
529 int SplitAbundCommand::readNamesFile() {
533 openInputFile(namefile, in);
536 if (m->control_pressed) { break; }
538 string firstCol, secondCol;
539 in >> firstCol >> secondCol; gobble(in);
541 nameMap[firstCol] = secondCol;
548 catch(exception& e) {
549 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
553 /**********************************************************************************************************************/
554 int SplitAbundCommand::createNameMap(ListVector* thisList) {
557 if (thisList != NULL) {
558 for (int i = 0; i < thisList->getNumBins(); i++) {
559 if (m->control_pressed) { return 0; }
561 string bin = thisList->get(i);
563 vector<string> names;
564 splitAtComma(bin, names); //parses bin into individual sequence names
566 for (int j = 0; j < names.size(); j++) { nameMap[names[j]] = names[j]; }
572 catch(exception& e) {
573 m->errorOut(e, "SplitAbundCommand", "createNameMap");
577 /**********************************************************************************************************************/
578 int SplitAbundCommand::writeNames() { //namefile
581 map<string, ofstream*> filehandles;
583 if (Groups.size() == 0) {
587 if (rareNames.size() != 0) {
588 string rare = outputDir + getRootName(getSimpleName(namefile)) + "rare.names";
589 openOutputFile(rare, rout);
590 outputNames.push_back(rare);
592 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
593 rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
598 if (abundNames.size() != 0) {
599 string abund = outputDir + getRootName(getSimpleName(namefile)) + "abund.names";
600 openOutputFile(abund, aout);
601 outputNames.push_back(abund);
603 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
604 aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
609 }else{ //parse names by abundance and group
610 string fileroot = outputDir + getRootName(getSimpleName(namefile));
613 map<string, bool> wroteFile;
614 map<string, ofstream*> filehandles;
615 map<string, ofstream*>::iterator it3;
617 for (int i=0; i<Groups.size(); i++) {
619 filehandles[Groups[i]+".rare"] = temp;
620 temp2 = new ofstream;
621 filehandles[Groups[i]+".abund"] = temp2;
623 openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"]));
624 openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"]));
626 wroteFile[Groups[i] + ".rare"] = false;
627 wroteFile[Groups[i] + ".abund"] = false;
630 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
631 vector<string> names;
632 splitAtComma(itName->second, names); //parses bin into individual sequence names
635 if (rareNames.count(itName->first) != 0) { //you are a rare name
637 }else{ //you are a abund name
638 rareAbund = ".abund";
641 map<string, string> outputStrings;
642 map<string, string>::iterator itout;
643 for (int i = 0; i < names.size(); i++) {
645 string group = groupMap->getGroup(names[i]);
647 if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
648 itout = outputStrings.find(group+rareAbund);
649 if (itout == outputStrings.end()) {
650 outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
651 }else { outputStrings[group+rareAbund] += "," + names[i]; }
652 }else if(group == "not found") {
653 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
657 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) {
658 *(filehandles[itout->first]) << itout->second << endl;
659 wroteFile[itout->first] = true;
664 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
665 (*(filehandles[it3->first])).close();
666 if (wroteFile[it3->first] == true) { outputNames.push_back(fileroot + it3->first + ".names"); }
667 else { remove((it3->first).c_str()); }
675 catch(exception& e) {
676 m->errorOut(e, "SplitAbundCommand", "writeNames");
680 /**********************************************************************************************************************/
681 //just write the unique names - if a namesfile is given
682 int SplitAbundCommand::writeAccnos(string tag) {
685 map<string, ofstream*> filehandles;
687 if (Groups.size() == 0) {
691 if (rareNames.size() != 0) {
692 string rare = outputDir + getRootName(getSimpleName(inputFile)) + tag + "rare.accnos";
693 openOutputFile(rare, rout);
694 outputNames.push_back(rare);
696 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
697 rout << (*itRare) << endl;
702 if (abundNames.size() != 0) {
703 string abund = outputDir + getRootName(getSimpleName(inputFile)) + tag + "abund.accnos";
704 openOutputFile(abund, aout);
705 outputNames.push_back(abund);
707 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
708 aout << (*itAbund) << endl;
712 }else{ //parse names by abundance and group
713 string fileroot = outputDir + getRootName(getSimpleName(inputFile));
716 map<string, bool> wroteFile;
717 map<string, ofstream*> filehandles;
718 map<string, ofstream*>::iterator it3;
720 for (int i=0; i<Groups.size(); i++) {
722 filehandles[Groups[i]+".rare"] = temp;
723 temp2 = new ofstream;
724 filehandles[Groups[i]+".abund"] = temp2;
726 openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"]));
727 openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"]));
729 wroteFile[Groups[i] + ".rare"] = false;
730 wroteFile[Groups[i] + ".abund"] = false;
734 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
735 string group = groupMap->getGroup(*itRare);
737 if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
738 *(filehandles[group+".rare"]) << *itRare << endl;
739 wroteFile[group+".rare"] = true;
744 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
745 string group = groupMap->getGroup(*itAbund);
747 if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
748 *(filehandles[group+".abund"]) << *itAbund << endl;
749 wroteFile[group+".abund"] = true;
754 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
755 (*(filehandles[it3->first])).close();
756 if (wroteFile[it3->first] == true) { outputNames.push_back(fileroot + tag + it3->first + ".accnos"); }
757 else { remove((fileroot + tag + it3->first + ".accnos").c_str()); }
765 catch(exception& e) {
766 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
770 /**********************************************************************************************************************/
771 int SplitAbundCommand::parseGroup(string tag) { //namefile
774 map<string, ofstream*> filehandles;
776 if (Groups.size() == 0) {
780 if (rareNames.size() != 0) {
781 string rare = outputDir + getRootName(getSimpleName(groupfile)) + tag + "rare.groups";
782 openOutputFile(rare, rout);
783 outputNames.push_back(rare);
786 if (abundNames.size() != 0) {
787 string abund = outputDir + getRootName(getSimpleName(groupfile)) + tag + "abund.groups";
788 openOutputFile(abund, aout);
789 outputNames.push_back(abund);
793 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
794 vector<string> names;
795 splitAtComma(itName->second, names); //parses bin into individual sequence names
797 for (int i = 0; i < names.size(); i++) {
799 string group = groupMap->getGroup(names[i]);
801 if (group == "not found") {
802 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
804 if (rareNames.count(itName->first) != 0) { //you are a rare name
805 rout << names[i] << '\t' << group << endl;
806 }else{ //you are a abund name
807 aout << names[i] << '\t' << group << endl;
813 if (rareNames.size() != 0) { rout.close(); }
814 if (abundNames.size() != 0) { aout.close(); }
816 }else{ //parse names by abundance and group
817 string fileroot = outputDir + getRootName(getSimpleName(groupfile));
820 map<string, bool> wroteFile;
821 map<string, ofstream*> filehandles;
822 map<string, ofstream*>::iterator it3;
824 for (int i=0; i<Groups.size(); i++) {
826 filehandles[Groups[i]+".rare"] = temp;
827 temp2 = new ofstream;
828 filehandles[Groups[i]+".abund"] = temp2;
830 openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"]));
831 openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"]));
833 wroteFile[Groups[i] + ".rare"] = false;
834 wroteFile[Groups[i] + ".abund"] = false;
837 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
838 vector<string> names;
839 splitAtComma(itName->second, names); //parses bin into individual sequence names
842 if (rareNames.count(itName->first) != 0) { //you are a rare name
844 }else{ //you are a abund name
845 rareAbund = ".abund";
848 for (int i = 0; i < names.size(); i++) {
850 string group = groupMap->getGroup(names[i]);
852 if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
853 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
854 wroteFile[group+rareAbund] = true;
859 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
860 (*(filehandles[it3->first])).close();
861 if (wroteFile[it3->first] == true) { outputNames.push_back(fileroot + tag + it3->first + ".groups"); }
862 else { remove((fileroot + tag + it3->first + ".groups").c_str()); }
870 catch(exception& e) {
871 m->errorOut(e, "SplitAbundCommand", "parseGroups");
875 /**********************************************************************************************************************/
876 int SplitAbundCommand::parseFasta(string tag) { //namefile
879 map<string, ofstream*> filehandles;
881 if (Groups.size() == 0) {
885 if (rareNames.size() != 0) {
886 string rare = outputDir + getRootName(getSimpleName(fastafile)) + tag + "rare.fasta";
887 openOutputFile(rare, rout);
888 outputNames.push_back(rare);
891 if (abundNames.size() != 0) {
892 string abund = outputDir + getRootName(getSimpleName(fastafile)) + tag + "abund.fasta";
893 openOutputFile(abund, aout);
894 outputNames.push_back(abund);
900 openInputFile(fastafile, in);
903 if (m->control_pressed) { break; }
905 Sequence seq(in); gobble(in);
907 if (seq.getName() != "") {
909 map<string, string>::iterator itNames;
911 itNames = nameMap.find(seq.getName());
913 if (itNames == nameMap.end()) {
914 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
916 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
917 seq.printSequence(rout);
918 }else{ //you are a abund name
919 seq.printSequence(aout);
925 if (rareNames.size() != 0) { rout.close(); }
926 if (abundNames.size() != 0) { aout.close(); }
928 }else{ //parse names by abundance and group
929 string fileroot = outputDir + getRootName(getSimpleName(fastafile));
932 map<string, bool> wroteFile;
933 map<string, ofstream*> filehandles;
934 map<string, ofstream*>::iterator it3;
936 for (int i=0; i<Groups.size(); i++) {
938 filehandles[Groups[i]+".rare"] = temp;
939 temp2 = new ofstream;
940 filehandles[Groups[i]+".abund"] = temp2;
942 openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"]));
943 openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"]));
945 wroteFile[Groups[i] + ".rare"] = false;
946 wroteFile[Groups[i] + ".abund"] = false;
951 openInputFile(fastafile, in);
954 if (m->control_pressed) { break; }
956 Sequence seq(in); gobble(in);
958 if (seq.getName() != "") {
959 map<string, string>::iterator itNames = nameMap.find(seq.getName());
961 if (itNames == nameMap.end()) {
962 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
964 vector<string> names;
965 splitAtComma(itNames->second, names); //parses bin into individual sequence names
968 if (rareNames.count(itNames->first) != 0) { //you are a rare name
970 }else{ //you are a abund name
971 rareAbund = ".abund";
974 for (int i = 0; i < names.size(); i++) {
976 string group = groupMap->getGroup(seq.getName());
978 if (inUsersGroups(group, Groups)) { //only add if this is in a group we want
979 seq.printSequence(*(filehandles[group+rareAbund]));
980 wroteFile[group+rareAbund] = true;
981 }else if(group == "not found") {
982 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
990 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
991 (*(filehandles[it3->first])).close();
992 if (wroteFile[it3->first] == true) { outputNames.push_back(fileroot + tag + it3->first + ".fasta"); }
993 else { remove((fileroot + tag + it3->first + ".fasta").c_str()); }
1001 catch(exception& e) {
1002 m->errorOut(e, "SplitAbundCommand", "parseFasta");
1006 /**********************************************************************************************************************/