2 * splitabundcommand.cpp
5 * Created by westcott on 5/17/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "splitabundcommand.h"
12 //**********************************************************************************************************************
13 vector<string> SplitAbundCommand::setParameters(){
15 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
16 CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
17 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
18 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
19 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
20 CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "",false,true); parameters.push_back(pcutoff);
21 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22 CommandParameter paccnos("accnos", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(paccnos);
23 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
26 vector<string> myArray;
27 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
31 m->errorOut(e, "SplitAbundCommand", "setParameters");
35 //**********************************************************************************************************************
36 string SplitAbundCommand::getHelpString(){
38 string helpString = "";
39 helpString += "The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n";
40 helpString += "The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n";
41 helpString += "The fasta and a list or name parameter are required, and you must provide a cutoff value.\n";
42 helpString += "The cutoff parameter is used to qualify what is abundant and rare.\n";
43 helpString += "The group parameter allows you to parse a group file into rare and abundant groups.\n";
44 helpString += "The label parameter is used to read specific labels in your listfile you want to use.\n";
45 helpString += "The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n";
46 helpString += "The groups parameter allows you to parse the files into rare and abundant files by group. \n";
47 helpString += "For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files. \n";
48 helpString += "If you want .abund and .rare files for all groups, set groups=all. \n";
49 helpString += "The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n";
50 helpString += "Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n";
51 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n";
55 m->errorOut(e, "SplitAbundCommand", "getHelpString");
60 //**********************************************************************************************************************
61 SplitAbundCommand::SplitAbundCommand(){
63 abort = true; calledHelp = true;
65 vector<string> tempOutNames;
66 outputTypes["list"] = tempOutNames;
67 outputTypes["name"] = tempOutNames;
68 outputTypes["accnos"] = tempOutNames;
69 outputTypes["group"] = tempOutNames;
70 outputTypes["fasta"] = tempOutNames;
73 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
77 //**********************************************************************************************************************
78 SplitAbundCommand::SplitAbundCommand(string option) {
80 abort = false; calledHelp = false;
83 //allow user to run help
84 if(option == "help") { help(); abort = true; calledHelp = true; }
85 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
87 vector<string> myArray = setParameters();
89 OptionParser parser(option);
90 map<string, string> parameters = parser.getParameters();
92 ValidParameters validParameter;
93 map<string, string>::iterator it;
95 //check to make sure all parameters are valid for command
96 for (it = parameters.begin(); it != parameters.end(); it++) {
97 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
100 //initialize outputTypes
101 vector<string> tempOutNames;
102 outputTypes["list"] = tempOutNames;
103 outputTypes["name"] = tempOutNames;
104 outputTypes["accnos"] = tempOutNames;
105 outputTypes["group"] = tempOutNames;
106 outputTypes["fasta"] = tempOutNames;
108 //if the user changes the input directory command factory will send this info to us in the output parameter
109 string inputDir = validParameter.validFile(parameters, "inputdir", false);
110 if (inputDir == "not found"){ inputDir = ""; }
113 it = parameters.find("list");
114 //user has given a template file
115 if(it != parameters.end()){
116 path = m->hasPath(it->second);
117 //if the user has not given a path then, add inputdir. else leave path alone.
118 if (path == "") { parameters["list"] = inputDir + it->second; }
121 it = parameters.find("group");
122 //user has given a template file
123 if(it != parameters.end()){
124 path = m->hasPath(it->second);
125 //if the user has not given a path then, add inputdir. else leave path alone.
126 if (path == "") { parameters["group"] = inputDir + it->second; }
129 it = parameters.find("fasta");
130 //user has given a template file
131 if(it != parameters.end()){
132 path = m->hasPath(it->second);
133 //if the user has not given a path then, add inputdir. else leave path alone.
134 if (path == "") { parameters["fasta"] = inputDir + it->second; }
137 it = parameters.find("name");
138 //user has given a template file
139 if(it != parameters.end()){
140 path = m->hasPath(it->second);
141 //if the user has not given a path then, add inputdir. else leave path alone.
142 if (path == "") { parameters["name"] = inputDir + it->second; }
148 //if the user changes the output directory command factory will send this info to us in the output parameter
149 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
151 //check for required parameters
152 listfile = validParameter.validFile(parameters, "list", true);
153 if (listfile == "not open") { abort = true; }
154 else if (listfile == "not found") { listfile = ""; }
155 else{ inputFile = listfile; m->setListFile(listfile); }
157 namefile = validParameter.validFile(parameters, "name", true);
158 if (namefile == "not open") { abort = true; }
159 else if (namefile == "not found") { namefile = ""; }
160 else{ inputFile = namefile; m->setNameFile(namefile); }
162 fastafile = validParameter.validFile(parameters, "fasta", true);
163 if (fastafile == "not open") { abort = true; }
164 else if (fastafile == "not found") {
165 fastafile = m->getFastaFile();
166 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
167 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
168 }else { m->setFastaFile(fastafile); }
170 groupfile = validParameter.validFile(parameters, "group", true);
171 if (groupfile == "not open") { groupfile = ""; abort = true; }
172 else if (groupfile == "not found") { groupfile = ""; }
174 groupMap = new GroupMap(groupfile);
176 int error = groupMap->readMap();
177 if (error == 1) { abort = true; }
178 m->setGroupFile(groupfile);
181 groups = validParameter.validFile(parameters, "groups", false);
182 if (groups == "not found") { groups = ""; }
183 else if (groups == "all") {
184 if (groupfile != "") { Groups = groupMap->getNamesOfGroups(); }
185 else { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; }
187 m->splitAtDash(groups, Groups);
190 if ((groupfile == "") && (groups != "")) { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; Groups.clear(); }
192 //do you have all files needed
193 if ((listfile == "") && (namefile == "")) {
194 namefile = m->getNameFile();
195 if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
197 listfile = m->getListFile();
198 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
199 else { m->mothurOut("You have no current list or namefile and the list or name parameter is required."); m->mothurOutEndLine(); abort = true; }
203 //check for optional parameter and set defaults
204 // ...at some point should added some additional type checking...
205 label = validParameter.validFile(parameters, "label", false);
206 if (label == "not found") { label = ""; allLines = 1; }
208 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
209 else { allLines = 1; }
212 string temp = validParameter.validFile(parameters, "accnos", false); if (temp == "not found") { temp = "F"; }
213 accnos = m->isTrue(temp);
215 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0"; }
216 convert(temp, cutoff);
218 if (cutoff == 0) { m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
223 catch(exception& e) {
224 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
228 //**********************************************************************************************************************
229 SplitAbundCommand::~SplitAbundCommand(){
230 if (groupfile != "") { delete groupMap; }
232 //**********************************************************************************************************************
233 int SplitAbundCommand::execute(){
236 if (abort == true) { if (calledHelp) { return 0; } return 2; }
238 if (listfile != "") { //you are using a listfile to determine abundance
239 if (outputDir == "") { outputDir = m->hasPath(listfile); }
241 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
242 set<string> processedLabels;
243 set<string> userLabels = labels;
245 input = new InputData(listfile, "list");
246 list = input->getListVector();
247 string lastLabel = list->getLabel();
249 //do you have a namefile or do we need to similate one?
250 if (namefile != "") { readNamesFile(); }
251 else { createNameMap(list); }
253 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
255 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
257 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
259 if(allLines == 1 || labels.count(list->getLabel()) == 1){
261 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
264 processedLabels.insert(list->getLabel());
265 userLabels.erase(list->getLabel());
268 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
269 string saveLabel = list->getLabel();
272 list = input->getListVector(lastLabel); //get new list vector to process
274 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
277 processedLabels.insert(list->getLabel());
278 userLabels.erase(list->getLabel());
280 //restore real lastlabel to save below
281 list->setLabel(saveLabel);
285 lastLabel = list->getLabel();
288 list = input->getListVector(); //get new list vector to process
291 if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
293 //output error messages about any remaining user labels
294 set<string>::iterator it;
295 bool needToRun = false;
296 for (it = userLabels.begin(); it != userLabels.end(); it++) {
297 m->mothurOut("Your file does not include the label " + *it);
298 if (processedLabels.count(lastLabel) != 1) {
299 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
302 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
307 if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
309 //run last label if you need to
310 if (needToRun == true) {
311 if (list != NULL) { delete list; }
312 list = input->getListVector(lastLabel); //get new list vector to process
314 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
322 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
324 }else { //you are using the namefile to determine abundance
325 if (outputDir == "") { outputDir = m->hasPath(namefile); }
331 if (groupfile != "") { parseGroup(tag); }
332 if (accnos) { writeAccnos(tag); }
333 if (fastafile != "") { parseFasta(tag); }
336 //set fasta file as new current fastafile
338 itTypes = outputTypes.find("fasta");
339 if (itTypes != outputTypes.end()) {
340 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
343 itTypes = outputTypes.find("name");
344 if (itTypes != outputTypes.end()) {
345 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
348 itTypes = outputTypes.find("group");
349 if (itTypes != outputTypes.end()) {
350 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
353 itTypes = outputTypes.find("list");
354 if (itTypes != outputTypes.end()) {
355 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
358 itTypes = outputTypes.find("accnos");
359 if (itTypes != outputTypes.end()) {
360 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
363 m->mothurOutEndLine();
364 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
365 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
366 m->mothurOutEndLine();
370 catch(exception& e) {
371 m->errorOut(e, "SplitAbundCommand", "execute");
375 /**********************************************************************************************************************/
376 int SplitAbundCommand::splitList(ListVector* thisList) {
381 //get rareNames and abundNames
382 for (int i = 0; i < thisList->getNumBins(); i++) {
383 if (m->control_pressed) { return 0; }
385 string bin = thisList->get(i);
387 vector<string> names;
388 m->splitAtComma(bin, names); //parses bin into individual sequence names
389 int size = names.size();
391 if (size <= cutoff) {
392 for (int j = 0; j < names.size(); j++) { rareNames.insert(names[j]); }
394 for (int j = 0; j < names.size(); j++) { abundNames.insert(names[j]); }
399 string tag = thisList->getLabel() + ".";
401 writeList(thisList, tag);
403 if (groupfile != "") { parseGroup(tag); }
404 if (accnos) { writeAccnos(tag); }
405 if (fastafile != "") { parseFasta(tag); }
410 catch(exception& e) {
411 m->errorOut(e, "SplitAbundCommand", "splitList");
415 /**********************************************************************************************************************/
416 int SplitAbundCommand::writeList(ListVector* thisList, string tag) {
419 map<string, ofstream*> filehandles;
421 if (Groups.size() == 0) {
422 SAbundVector* sabund = new SAbundVector();
423 *sabund = thisList->getSAbundVector();
425 //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
426 // and don't have to store the bins until you are done with the whole vector, this save alot of space.
428 for (int i = 0; i <= sabund->getMaxRank(); i++) {
429 if (i > cutoff) { break; }
430 numRareBins += sabund->get(i);
432 int numAbundBins = thisList->getNumBins() - numRareBins;
438 string rare = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "rare.list";
439 m->openOutputFile(rare, rout);
440 outputNames.push_back(rare); outputTypes["list"].push_back(rare);
442 string abund = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "abund.list";
443 m->openOutputFile(abund, aout);
444 outputNames.push_back(abund); outputTypes["list"].push_back(abund);
446 if (rareNames.size() != 0) { rout << thisList->getLabel() << '\t' << numRareBins << '\t'; }
447 if (abundNames.size() != 0) { aout << thisList->getLabel() << '\t' << numAbundBins << '\t'; }
449 for (int i = 0; i < thisList->getNumBins(); i++) {
450 if (m->control_pressed) { break; }
452 string bin = list->get(i);
454 int size = m->getNumNames(bin);
456 if (size <= cutoff) { rout << bin << '\t'; }
457 else { aout << bin << '\t'; }
460 if (rareNames.size() != 0) { rout << endl; }
461 if (abundNames.size() != 0) { aout << endl; }
466 }else{ //parse names by abundance and group
467 string fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
470 //map<string, bool> wroteFile;
471 map<string, ofstream*> filehandles;
472 map<string, ofstream*>::iterator it3;
474 for (int i=0; i<Groups.size(); i++) {
476 filehandles[Groups[i]+".rare"] = temp;
477 temp2 = new ofstream;
478 filehandles[Groups[i]+".abund"] = temp2;
480 m->openOutputFile(fileroot + Groups[i] + tag + ".rare.list", *(filehandles[Groups[i]+".rare"]));
481 m->openOutputFile(fileroot + Groups[i] + tag + ".abund.list", *(filehandles[Groups[i]+".abund"]));
482 outputNames.push_back(fileroot + Groups[i] + tag + ".rare.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".rare.list");
483 outputNames.push_back(fileroot + Groups[i] + tag + ".abund.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".abund.list");
486 map<string, string> groupVector;
487 map<string, string>::iterator itGroup;
488 map<string, int> groupNumBins;
490 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
491 groupNumBins[it3->first] = 0;
492 groupVector[it3->first] = "";
495 for (int i = 0; i < thisList->getNumBins(); i++) {
496 if (m->control_pressed) { break; }
498 map<string, string> groupBins;
499 string bin = list->get(i);
501 vector<string> names;
502 m->splitAtComma(bin, names); //parses bin into individual sequence names
504 //parse bin into list of sequences in each group
505 for (int j = 0; j < names.size(); j++) {
507 if (rareNames.count(names[j]) != 0) { //you are a rare name
509 }else{ //you are a abund name
510 rareAbund = ".abund";
513 string group = groupMap->getGroup(names[j]);
515 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
516 itGroup = groupBins.find(group+rareAbund);
517 if(itGroup == groupBins.end()) {
518 groupBins[group+rareAbund] = names[j]; //add first name
519 groupNumBins[group+rareAbund]++;
520 }else{ //add another name
521 groupBins[group+rareAbund] += "," + names[j];
523 }else if(group == "not found") {
524 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
529 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
530 groupVector[itGroup->first] += itGroup->second + '\t';
535 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
536 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl; // label numBins listvector for that group
537 (*(filehandles[it3->first])).close();
545 catch(exception& e) {
546 m->errorOut(e, "SplitAbundCommand", "writeList");
550 /**********************************************************************************************************************/
551 int SplitAbundCommand::splitNames() { //namefile
559 m->openInputFile(namefile, in);
562 if (m->control_pressed) { break; }
564 string firstCol, secondCol;
565 in >> firstCol >> secondCol; m->gobble(in);
567 nameMap[firstCol] = secondCol;
569 int size = m->getNumNames(secondCol);
571 if (size <= cutoff) {
572 rareNames.insert(firstCol);
574 abundNames.insert(firstCol);
582 catch(exception& e) {
583 m->errorOut(e, "SplitAbundCommand", "splitNames");
587 /**********************************************************************************************************************/
588 int SplitAbundCommand::readNamesFile() {
592 m->openInputFile(namefile, in);
595 if (m->control_pressed) { break; }
597 string firstCol, secondCol;
598 in >> firstCol >> secondCol; m->gobble(in);
600 nameMap[firstCol] = secondCol;
607 catch(exception& e) {
608 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
612 /**********************************************************************************************************************/
613 int SplitAbundCommand::createNameMap(ListVector* thisList) {
616 if (thisList != NULL) {
617 for (int i = 0; i < thisList->getNumBins(); i++) {
618 if (m->control_pressed) { return 0; }
620 string bin = thisList->get(i);
622 vector<string> names;
623 m->splitAtComma(bin, names); //parses bin into individual sequence names
625 for (int j = 0; j < names.size(); j++) { nameMap[names[j]] = names[j]; }
631 catch(exception& e) {
632 m->errorOut(e, "SplitAbundCommand", "createNameMap");
636 /**********************************************************************************************************************/
637 int SplitAbundCommand::writeNames() { //namefile
640 map<string, ofstream*> filehandles;
642 if (Groups.size() == 0) {
646 string rare = outputDir + m->getRootName(m->getSimpleName(namefile)) + "rare.names";
647 m->openOutputFile(rare, rout);
648 outputNames.push_back(rare); outputTypes["name"].push_back(rare);
650 string abund = outputDir + m->getRootName(m->getSimpleName(namefile)) + "abund.names";
651 m->openOutputFile(abund, aout);
652 outputNames.push_back(abund); outputTypes["name"].push_back(abund);
654 if (rareNames.size() != 0) {
655 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
656 rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
661 if (abundNames.size() != 0) {
662 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
663 aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
668 }else{ //parse names by abundance and group
669 string fileroot = outputDir + m->getRootName(m->getSimpleName(namefile));
672 map<string, ofstream*> filehandles;
673 map<string, ofstream*>::iterator it3;
675 for (int i=0; i<Groups.size(); i++) {
677 filehandles[Groups[i]+".rare"] = temp;
678 temp2 = new ofstream;
679 filehandles[Groups[i]+".abund"] = temp2;
681 m->openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"]));
682 m->openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"]));
685 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
686 vector<string> names;
687 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
690 if (rareNames.count(itName->first) != 0) { //you are a rare name
692 }else{ //you are a abund name
693 rareAbund = ".abund";
696 map<string, string> outputStrings;
697 map<string, string>::iterator itout;
698 for (int i = 0; i < names.size(); i++) {
700 string group = groupMap->getGroup(names[i]);
702 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
703 itout = outputStrings.find(group+rareAbund);
704 if (itout == outputStrings.end()) {
705 outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
706 }else { outputStrings[group+rareAbund] += "," + names[i]; }
707 }else if(group == "not found") {
708 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
712 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { *(filehandles[itout->first]) << itout->second << endl; }
716 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
717 (*(filehandles[it3->first])).close();
718 outputNames.push_back(fileroot + it3->first + ".names"); outputTypes["name"].push_back(fileroot + it3->first + ".names");
726 catch(exception& e) {
727 m->errorOut(e, "SplitAbundCommand", "writeNames");
731 /**********************************************************************************************************************/
732 //just write the unique names - if a namesfile is given
733 int SplitAbundCommand::writeAccnos(string tag) {
736 map<string, ofstream*> filehandles;
738 if (Groups.size() == 0) {
743 string rare = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "rare.accnos";
744 m->openOutputFile(rare, rout);
745 outputNames.push_back(rare); outputTypes["accnos"].push_back(rare);
747 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
748 rout << (*itRare) << endl;
752 string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "abund.accnos";
753 m->openOutputFile(abund, aout);
754 outputNames.push_back(abund); outputTypes["accnos"].push_back(abund);
756 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
757 aout << (*itAbund) << endl;
761 }else{ //parse names by abundance and group
762 string fileroot = outputDir + m->getRootName(m->getSimpleName(inputFile));
765 map<string, ofstream*> filehandles;
766 map<string, ofstream*>::iterator it3;
768 for (int i=0; i<Groups.size(); i++) {
770 filehandles[Groups[i]+".rare"] = temp;
771 temp2 = new ofstream;
772 filehandles[Groups[i]+".abund"] = temp2;
774 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"]));
775 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"]));
779 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
780 string group = groupMap->getGroup(*itRare);
782 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
783 *(filehandles[group+".rare"]) << *itRare << endl;
788 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
789 string group = groupMap->getGroup(*itAbund);
791 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
792 *(filehandles[group+".abund"]) << *itAbund << endl;
797 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
798 (*(filehandles[it3->first])).close();
799 outputNames.push_back(fileroot + tag + it3->first + ".accnos"); outputTypes["accnos"].push_back(fileroot + tag + it3->first + ".accnos");
807 catch(exception& e) {
808 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
812 /**********************************************************************************************************************/
813 int SplitAbundCommand::parseGroup(string tag) { //namefile
816 map<string, ofstream*> filehandles;
818 if (Groups.size() == 0) {
822 string rare = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "rare.groups";
823 m->openOutputFile(rare, rout);
824 outputNames.push_back(rare); outputTypes["group"].push_back(rare);
826 string abund = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "abund.groups";
827 m->openOutputFile(abund, aout);
828 outputNames.push_back(abund); outputTypes["group"].push_back(abund);
830 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
831 vector<string> names;
832 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
834 for (int i = 0; i < names.size(); i++) {
836 string group = groupMap->getGroup(names[i]);
838 if (group == "not found") {
839 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
841 if (rareNames.count(itName->first) != 0) { //you are a rare name
842 rout << names[i] << '\t' << group << endl;
843 }else{ //you are a abund name
844 aout << names[i] << '\t' << group << endl;
853 }else{ //parse names by abundance and group
854 string fileroot = outputDir + m->getRootName(m->getSimpleName(groupfile));
857 map<string, ofstream*> filehandles;
858 map<string, ofstream*>::iterator it3;
860 for (int i=0; i<Groups.size(); i++) {
862 filehandles[Groups[i]+".rare"] = temp;
863 temp2 = new ofstream;
864 filehandles[Groups[i]+".abund"] = temp2;
866 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"]));
867 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"]));
870 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
871 vector<string> names;
872 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
875 if (rareNames.count(itName->first) != 0) { //you are a rare name
877 }else{ //you are a abund name
878 rareAbund = ".abund";
881 for (int i = 0; i < names.size(); i++) {
883 string group = groupMap->getGroup(names[i]);
885 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
886 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
891 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
892 (*(filehandles[it3->first])).close();
893 outputNames.push_back(fileroot + tag + it3->first + ".groups"); outputTypes["group"].push_back(fileroot + tag + it3->first + ".groups");
901 catch(exception& e) {
902 m->errorOut(e, "SplitAbundCommand", "parseGroups");
906 /**********************************************************************************************************************/
907 int SplitAbundCommand::parseFasta(string tag) { //namefile
910 map<string, ofstream*> filehandles;
912 if (Groups.size() == 0) {
916 string rare = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "rare.fasta";
917 m->openOutputFile(rare, rout);
918 outputNames.push_back(rare); outputTypes["fasta"].push_back(rare);
920 string abund = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "abund.fasta";
921 m->openOutputFile(abund, aout);
922 outputNames.push_back(abund); outputTypes["fasta"].push_back(abund);
926 m->openInputFile(fastafile, in);
929 if (m->control_pressed) { break; }
931 Sequence seq(in); m->gobble(in);
933 if (seq.getName() != "") {
935 map<string, string>::iterator itNames;
937 itNames = nameMap.find(seq.getName());
939 if (itNames == nameMap.end()) {
940 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
942 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
943 seq.printSequence(rout);
944 }else{ //you are a abund name
945 seq.printSequence(aout);
954 }else{ //parse names by abundance and group
955 string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
958 map<string, ofstream*> filehandles;
959 map<string, ofstream*>::iterator it3;
961 for (int i=0; i<Groups.size(); i++) {
963 filehandles[Groups[i]+".rare"] = temp;
964 temp2 = new ofstream;
965 filehandles[Groups[i]+".abund"] = temp2;
967 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"]));
968 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"]));
973 m->openInputFile(fastafile, in);
976 if (m->control_pressed) { break; }
978 Sequence seq(in); m->gobble(in);
980 if (seq.getName() != "") {
981 map<string, string>::iterator itNames = nameMap.find(seq.getName());
983 if (itNames == nameMap.end()) {
984 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
986 vector<string> names;
987 m->splitAtComma(itNames->second, names); //parses bin into individual sequence names
990 if (rareNames.count(itNames->first) != 0) { //you are a rare name
992 }else{ //you are a abund name
993 rareAbund = ".abund";
996 for (int i = 0; i < names.size(); i++) {
998 string group = groupMap->getGroup(seq.getName());
1000 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1001 seq.printSequence(*(filehandles[group+rareAbund]));
1002 }else if(group == "not found") {
1003 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
1011 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1012 (*(filehandles[it3->first])).close();
1013 outputNames.push_back(fileroot + tag + it3->first + ".fasta"); outputTypes["fasta"].push_back(fileroot + tag + it3->first + ".fasta");
1021 catch(exception& e) {
1022 m->errorOut(e, "SplitAbundCommand", "parseFasta");
1026 /**********************************************************************************************************************/