2 * splitabundcommand.cpp
5 * Created by westcott on 5/17/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "splitabundcommand.h"
11 #include "sharedutilities.h"
13 //**********************************************************************************************************************
14 vector<string> SplitAbundCommand::setParameters(){
16 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
17 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
18 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false); parameters.push_back(pcount);
19 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false); parameters.push_back(pgroup);
20 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist);
21 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
22 CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "","",false,true); parameters.push_back(pcutoff);
23 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
24 CommandParameter paccnos("accnos", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(paccnos);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
28 vector<string> myArray;
29 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
33 m->errorOut(e, "SplitAbundCommand", "setParameters");
37 //**********************************************************************************************************************
38 string SplitAbundCommand::getHelpString(){
40 string helpString = "";
41 helpString += "The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n";
42 helpString += "The split.abund command parameters are fasta, list, name, count, cutoff, group, label, groups, cutoff and accnos.\n";
43 helpString += "The fasta and a list or name or count parameter are required, and you must provide a cutoff value.\n";
44 helpString += "The cutoff parameter is used to qualify what is abundant and rare.\n";
45 helpString += "The group parameter allows you to parse a group file into rare and abundant groups.\n";
46 helpString += "The label parameter is used to read specific labels in your listfile you want to use.\n";
47 helpString += "The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n";
48 helpString += "The groups parameter allows you to parse the files into rare and abundant files by group. \n";
49 helpString += "For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files. \n";
50 helpString += "If you want .abund and .rare files for all groups, set groups=all. \n";
51 helpString += "The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n";
52 helpString += "Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n";
53 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n";
57 m->errorOut(e, "SplitAbundCommand", "getHelpString");
62 //**********************************************************************************************************************
63 string SplitAbundCommand::getOutputPattern(string type) {
67 if (type == "fasta") { pattern = "[filename],[tag],[tag2],fasta-[filename],[tag],[group],[tag2],fasta"; }
68 else if (type == "list") { pattern = "[filename],[tag],[tag2],list-[filename],[group],[tag],[tag2],list"; }
69 else if (type == "name") { pattern = "[filename],[tag],names-[filename],[group],[tag],names"; }
70 else if (type == "count") { pattern = "[filename],[tag],[tag2],count_table-[filename],[tag],count_table"; }
71 else if (type == "group") { pattern = "[filename],[tag],[tag2],groups-[filename],[tag],[group],[tag2],groups"; }
72 else if (type == "accnos") { pattern = "[filename],[tag],[tag2],accnos-[filename],[tag],[group],[tag2],accnos"; }
73 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
78 m->errorOut(e, "SplitAbundCommand", "getOutputPattern");
82 //**********************************************************************************************************************
83 SplitAbundCommand::SplitAbundCommand(){
85 abort = true; calledHelp = true;
87 vector<string> tempOutNames;
88 outputTypes["list"] = tempOutNames;
89 outputTypes["name"] = tempOutNames;
90 outputTypes["count"] = tempOutNames;
91 outputTypes["accnos"] = tempOutNames;
92 outputTypes["group"] = tempOutNames;
93 outputTypes["fasta"] = tempOutNames;
96 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
100 //**********************************************************************************************************************
101 SplitAbundCommand::SplitAbundCommand(string option) {
103 abort = false; calledHelp = false;
106 //allow user to run help
107 if(option == "help") { help(); abort = true; calledHelp = true; }
108 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
110 vector<string> myArray = setParameters();
112 OptionParser parser(option);
113 map<string, string> parameters = parser.getParameters();
115 ValidParameters validParameter;
116 map<string, string>::iterator it;
118 //check to make sure all parameters are valid for command
119 for (it = parameters.begin(); it != parameters.end(); it++) {
120 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
123 //initialize outputTypes
124 vector<string> tempOutNames;
125 outputTypes["list"] = tempOutNames;
126 outputTypes["name"] = tempOutNames;
127 outputTypes["accnos"] = tempOutNames;
128 outputTypes["group"] = tempOutNames;
129 outputTypes["fasta"] = tempOutNames;
130 outputTypes["count"] = tempOutNames;
132 //if the user changes the input directory command factory will send this info to us in the output parameter
133 string inputDir = validParameter.validFile(parameters, "inputdir", false);
134 if (inputDir == "not found"){ inputDir = ""; }
137 it = parameters.find("list");
138 //user has given a template file
139 if(it != parameters.end()){
140 path = m->hasPath(it->second);
141 //if the user has not given a path then, add inputdir. else leave path alone.
142 if (path == "") { parameters["list"] = inputDir + it->second; }
145 it = parameters.find("group");
146 //user has given a template file
147 if(it != parameters.end()){
148 path = m->hasPath(it->second);
149 //if the user has not given a path then, add inputdir. else leave path alone.
150 if (path == "") { parameters["group"] = inputDir + it->second; }
153 it = parameters.find("fasta");
154 //user has given a template file
155 if(it != parameters.end()){
156 path = m->hasPath(it->second);
157 //if the user has not given a path then, add inputdir. else leave path alone.
158 if (path == "") { parameters["fasta"] = inputDir + it->second; }
161 it = parameters.find("name");
162 //user has given a template file
163 if(it != parameters.end()){
164 path = m->hasPath(it->second);
165 //if the user has not given a path then, add inputdir. else leave path alone.
166 if (path == "") { parameters["name"] = inputDir + it->second; }
169 it = parameters.find("count");
170 //user has given a template file
171 if(it != parameters.end()){
172 path = m->hasPath(it->second);
173 //if the user has not given a path then, add inputdir. else leave path alone.
174 if (path == "") { parameters["count"] = inputDir + it->second; }
179 //if the user changes the output directory command factory will send this info to us in the output parameter
180 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
182 //check for required parameters
183 listfile = validParameter.validFile(parameters, "list", true);
184 if (listfile == "not open") { abort = true; }
185 else if (listfile == "not found") { listfile = ""; }
186 else{ inputFile = listfile; m->setListFile(listfile); }
188 namefile = validParameter.validFile(parameters, "name", true);
189 if (namefile == "not open") { abort = true; }
190 else if (namefile == "not found") { namefile = ""; }
191 else{ inputFile = namefile; m->setNameFile(namefile); }
193 fastafile = validParameter.validFile(parameters, "fasta", true);
194 if (fastafile == "not open") { abort = true; }
195 else if (fastafile == "not found") {
196 fastafile = m->getFastaFile();
197 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
198 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
199 }else { m->setFastaFile(fastafile); }
201 groupfile = validParameter.validFile(parameters, "group", true);
202 if (groupfile == "not open") { groupfile = ""; abort = true; }
203 else if (groupfile == "not found") { groupfile = ""; }
205 int error = groupMap.readMap(groupfile);
206 if (error == 1) { abort = true; }
207 m->setGroupFile(groupfile);
210 countfile = validParameter.validFile(parameters, "count", true);
211 if (countfile == "not open") { countfile = ""; abort = true; }
212 else if (countfile == "not found") { countfile = ""; }
214 m->setCountTableFile(countfile);
215 ct.readTable(countfile, true, false);
218 if ((namefile != "") && (countfile != "")) {
219 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
222 if ((groupfile != "") && (countfile != "")) {
223 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
226 groups = validParameter.validFile(parameters, "groups", false);
227 if (groups == "not found") { groups = ""; }
228 else { m->splitAtDash(groups, Groups); }
230 if (((groupfile == "") && (countfile == ""))&& (groups != "")) { m->mothurOut("You cannot select groups without a valid group or count file, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; Groups.clear(); }
232 if (countfile != "") {
233 if (!ct.hasGroupInfo()) { m->mothurOut("You cannot pick groups without group info in your count file; I will disregard your groups selection."); m->mothurOutEndLine(); groups = ""; Groups.clear(); }
236 //do you have all files needed
237 if ((listfile == "") && (namefile == "") && (countfile == "")) {
238 namefile = m->getNameFile();
239 if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
241 listfile = m->getListFile();
242 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
244 countfile = m->getCountTableFile();
245 if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
246 else { m->mothurOut("You have no current list, count or namefile and one is required."); m->mothurOutEndLine(); abort = true; }
251 //check for optional parameter and set defaults
252 // ...at some point should added some additional type checking...
253 label = validParameter.validFile(parameters, "label", false);
254 if (label == "not found") { label = ""; allLines = 1; }
256 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
257 else { allLines = 1; }
260 string temp = validParameter.validFile(parameters, "accnos", false); if (temp == "not found") { temp = "F"; }
261 accnos = m->isTrue(temp);
263 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0"; }
264 m->mothurConvert(temp, cutoff);
266 if (cutoff == 0) { m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
270 catch(exception& e) {
271 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
275 //**********************************************************************************************************************
276 SplitAbundCommand::~SplitAbundCommand(){}
277 //**********************************************************************************************************************
278 int SplitAbundCommand::execute(){
281 if (abort == true) { if (calledHelp) { return 0; } return 2; }
283 if (Groups.size() != 0) {
284 vector<string> allGroups;
285 if (countfile != "") { allGroups = ct.getNamesOfGroups(); }
286 else { allGroups = groupMap.getNamesOfGroups(); }
288 util.setGroups(Groups, allGroups);
291 if (listfile != "") { //you are using a listfile to determine abundance
292 if (outputDir == "") { outputDir = m->hasPath(listfile); }
294 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
295 set<string> processedLabels;
296 set<string> userLabels = labels;
298 InputData input(listfile, "list");
299 ListVector* list = input.getListVector();
300 string lastLabel = list->getLabel();
302 //do you have a namefile or do we need to similate one?
303 if (namefile != "") { readNamesFile(); }
304 else { createNameMap(list); }
306 if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
308 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
310 if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
312 if(allLines == 1 || labels.count(list->getLabel()) == 1){
314 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
317 processedLabels.insert(list->getLabel());
318 userLabels.erase(list->getLabel());
321 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
322 string saveLabel = list->getLabel();
325 list = input.getListVector(lastLabel); //get new list vector to process
327 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
330 processedLabels.insert(list->getLabel());
331 userLabels.erase(list->getLabel());
333 //restore real lastlabel to save below
334 list->setLabel(saveLabel);
338 lastLabel = list->getLabel();
341 list = input.getListVector(); //get new list vector to process
344 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
346 //output error messages about any remaining user labels
347 set<string>::iterator it;
348 bool needToRun = false;
349 for (it = userLabels.begin(); it != userLabels.end(); it++) {
350 m->mothurOut("Your file does not include the label " + *it);
351 if (processedLabels.count(lastLabel) != 1) {
352 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
355 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
360 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
362 //run last label if you need to
363 if (needToRun == true) {
364 if (list != NULL) { delete list; }
365 list = input.getListVector(lastLabel); //get new list vector to process
367 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
373 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
375 }else if (namefile != "") { //you are using the namefile to determine abundance
376 if (outputDir == "") { outputDir = m->hasPath(namefile); }
382 if (groupfile != "") { parseGroup(tag); }
383 if (accnos) { writeAccnos(tag); }
384 if (fastafile != "") { parseFasta(tag); }
390 if (accnos) { writeAccnos(tag); }
391 if (fastafile != "") { parseFasta(tag); }
394 //set fasta file as new current fastafile
396 itTypes = outputTypes.find("fasta");
397 if (itTypes != outputTypes.end()) {
398 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
401 itTypes = outputTypes.find("name");
402 if (itTypes != outputTypes.end()) {
403 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
406 itTypes = outputTypes.find("group");
407 if (itTypes != outputTypes.end()) {
408 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
411 itTypes = outputTypes.find("list");
412 if (itTypes != outputTypes.end()) {
413 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
416 itTypes = outputTypes.find("accnos");
417 if (itTypes != outputTypes.end()) {
418 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
421 itTypes = outputTypes.find("count");
422 if (itTypes != outputTypes.end()) {
423 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
426 m->mothurOutEndLine();
427 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
428 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
429 m->mothurOutEndLine();
433 catch(exception& e) {
434 m->errorOut(e, "SplitAbundCommand", "execute");
438 /**********************************************************************************************************************/
439 int SplitAbundCommand::splitList(ListVector* thisList) {
444 //get rareNames and abundNames
446 for (int i = 0; i < thisList->getNumBins(); i++) {
447 if (m->control_pressed) { return 0; }
449 string bin = thisList->get(i);
451 vector<string> names;
452 m->splitAtComma(bin, names); //parses bin into individual sequence names
453 int size = names.size();
455 //if countfile is not blank we assume the list file is unique, otherwise we assume it includes all seqs
456 if (countfile != "") {
458 for (int j = 0; j < names.size(); j++) { size += ct.getNumSeqs(names[j]); }
461 if (size <= cutoff) {
463 for (int j = 0; j < names.size(); j++) { rareNames.insert(names[j]); }
465 for (int j = 0; j < names.size(); j++) { abundNames.insert(names[j]); }
470 string tag = thisList->getLabel();
472 writeList(thisList, tag, numRareBins);
474 if (groupfile != "") { parseGroup(tag); }
475 if (accnos) { writeAccnos(tag); }
476 if (fastafile != "") { parseFasta(tag); }
477 if (countfile != "") { parseCount(tag); }
482 catch(exception& e) {
483 m->errorOut(e, "SplitAbundCommand", "splitList");
487 /**********************************************************************************************************************/
488 int SplitAbundCommand::writeList(ListVector* thisList, string tag, int numRareBins) {
491 map<string, ofstream*> filehandles;
493 if (Groups.size() == 0) {
494 int numAbundBins = thisList->getNumBins() - numRareBins;
499 map<string, string> variables;
500 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
501 variables["[tag]"] = tag;
502 variables["[tag2]"] = "rare";
503 string rare = getOutputFileName("list",variables);
504 m->openOutputFile(rare+".temp", rout);
505 outputNames.push_back(rare); outputTypes["list"].push_back(rare);
507 variables["[tag2]"] = "abund";
508 string abund = getOutputFileName("list",variables);
509 m->openOutputFile(abund+".temp", aout);
510 outputNames.push_back(abund); outputTypes["list"].push_back(abund);
512 if (rareNames.size() != 0) { rout << thisList->getLabel() << '\t' << numRareBins << '\t'; }
513 if (abundNames.size() != 0) { aout << thisList->getLabel() << '\t' << numAbundBins << '\t'; }
515 vector<string> binLabels = thisList->getLabels();
516 string rareHeader = "label\tnumOtus\t"; string abundHeader = "label\tnumOtus\t";
517 for (int i = 0; i < thisList->getNumBins(); i++) {
518 if (m->control_pressed) { break; }
520 string bin = thisList->get(i);
521 vector<string> names;
522 m->splitAtComma(bin, names);
524 int size = names.size();
525 if (countfile != "") {
527 for (int j = 0; j < names.size(); j++) { size += ct.getNumSeqs(names[j]); }
530 if (size <= cutoff) { rout << bin << '\t'; rareHeader += binLabels[i] + '\t'; }
531 else { aout << bin << '\t'; abundHeader += binLabels[i] + '\t'; }
534 if (rareNames.size() != 0) { rout << endl; }
535 if (abundNames.size() != 0) { aout << endl; }
542 m->openOutputFile(rare, r);
543 r << rareHeader << endl;
545 m->appendFiles(rare+".temp", rare);
546 m->mothurRemove(rare+".temp");
549 m->openOutputFile(abund, a);
550 a << abundHeader << endl;
552 m->appendFiles(abund+".temp", abund);
553 m->mothurRemove(abund+".temp");
555 }else{ //parse names by abundance and group
556 string fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
559 //map<string, bool> wroteFile;
560 map<string, ofstream*> filehandles;
561 map<string, ofstream*>::iterator it3;
563 for (int i=0; i<Groups.size(); i++) {
565 filehandles[Groups[i]+".rare"] = temp;
566 temp2 = new ofstream;
567 filehandles[Groups[i]+".abund"] = temp2;
569 map<string, string> variables;
570 variables["[filename]"] = fileroot;
571 variables["[tag]"] = tag;
572 variables["[tag2]"] = "rare";
573 variables["[group]"] = Groups[i];
574 string rareGroupFileName = getOutputFileName("list",variables);
575 variables["[tag2]"] = "abund";
576 string abundGroupFileName = getOutputFileName("list",variables);
577 m->openOutputFile(rareGroupFileName, *(filehandles[Groups[i]+".rare"]));
578 m->openOutputFile(abundGroupFileName, *(filehandles[Groups[i]+".abund"]));
579 outputNames.push_back(rareGroupFileName); outputTypes["list"].push_back(rareGroupFileName);
580 outputNames.push_back(abundGroupFileName); outputTypes["list"].push_back(abundGroupFileName);
583 map<string, string> groupVector;
584 map<string, string> groupLabels;
585 map<string, string>::iterator itGroup;
586 map<string, int> groupNumBins;
588 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
589 groupNumBins[it3->first] = 0;
590 groupVector[it3->first] = "";
591 groupLabels[it3->first] = "label\tnumOtus\t";
593 vector<string> binLabels = thisList->getLabels();
594 for (int i = 0; i < thisList->getNumBins(); i++) {
595 if (m->control_pressed) { break; }
597 map<string, string> groupBins;
598 string bin = thisList->get(i);
600 vector<string> names;
601 m->splitAtComma(bin, names); //parses bin into individual sequence names
603 //parse bin into list of sequences in each group
604 for (int j = 0; j < names.size(); j++) {
606 if (rareNames.count(names[j]) != 0) { //you are a rare name
608 }else{ //you are a abund name
609 rareAbund = ".abund";
612 if (countfile == "") {
613 string group = groupMap.getGroup(names[j]);
615 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
616 itGroup = groupBins.find(group+rareAbund);
617 if(itGroup == groupBins.end()) {
618 groupBins[group+rareAbund] = names[j]; //add first name
619 groupNumBins[group+rareAbund]++;
620 }else{ //add another name
621 groupBins[group+rareAbund] += "," + names[j];
623 }else if(group == "not found") {
624 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
627 vector<string> thisSeqsGroups = ct.getGroups(names[j]);
628 for (int k = 0; k < thisSeqsGroups.size(); k++) {
629 if (m->inUsersGroups(thisSeqsGroups[k], Groups)) { //only add if this is in a group we want
630 itGroup = groupBins.find(thisSeqsGroups[k]+rareAbund);
631 if(itGroup == groupBins.end()) {
632 groupBins[thisSeqsGroups[k]+rareAbund] = names[j]; //add first name
633 groupNumBins[thisSeqsGroups[k]+rareAbund]++;
634 }else{ //add another name
635 groupBins[thisSeqsGroups[k]+rareAbund] += "," + names[j];
643 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
644 groupVector[itGroup->first] += itGroup->second + '\t';
645 groupLabels[itGroup->first] += binLabels[i] + '\t';
650 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
651 (*(filehandles[it3->first])) << groupLabels[it3->first] << endl;
652 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl; // label numBins listvector for that group
653 (*(filehandles[it3->first])).close();
661 catch(exception& e) {
662 m->errorOut(e, "SplitAbundCommand", "writeList");
666 /**********************************************************************************************************************/
667 int SplitAbundCommand::splitCount() { //countfile
672 vector<string> allNames = ct.getNamesOfSeqs();
673 for (int i = 0; i < allNames.size(); i++) {
675 if (m->control_pressed) { return 0; }
677 int size = ct.getNumSeqs(allNames[i]);
678 nameMap[allNames[i]] = allNames[i];
680 if (size <= cutoff) {
681 rareNames.insert(allNames[i]);
683 abundNames.insert(allNames[i]);
687 //write out split count files
692 catch(exception& e) {
693 m->errorOut(e, "SplitAbundCommand", "splitCount");
697 /**********************************************************************************************************************/
698 int SplitAbundCommand::splitNames() { //namefile
706 m->openInputFile(namefile, in);
709 if (m->control_pressed) { break; }
711 string firstCol, secondCol;
712 in >> firstCol >> secondCol; m->gobble(in);
714 nameMap[firstCol] = secondCol;
716 int size = m->getNumNames(secondCol);
718 if (size <= cutoff) {
719 rareNames.insert(firstCol);
721 abundNames.insert(firstCol);
729 catch(exception& e) {
730 m->errorOut(e, "SplitAbundCommand", "splitNames");
734 /**********************************************************************************************************************/
735 int SplitAbundCommand::readNamesFile() {
739 m->openInputFile(namefile, in);
742 if (m->control_pressed) { break; }
744 string firstCol, secondCol;
745 in >> firstCol >> secondCol; m->gobble(in);
747 nameMap[firstCol] = secondCol;
754 catch(exception& e) {
755 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
759 /**********************************************************************************************************************/
760 int SplitAbundCommand::createNameMap(ListVector* thisList) {
763 if (thisList != NULL) {
764 for (int i = 0; i < thisList->getNumBins(); i++) {
765 if (m->control_pressed) { return 0; }
767 string bin = thisList->get(i);
769 vector<string> names;
770 m->splitAtComma(bin, names); //parses bin into individual sequence names
772 for (int j = 0; j < names.size(); j++) { nameMap[names[j]] = names[j]; }
778 catch(exception& e) {
779 m->errorOut(e, "SplitAbundCommand", "createNameMap");
783 /**********************************************************************************************************************/
784 int SplitAbundCommand::parseCount(string tag) { //namefile
787 map<string, ofstream*> filehandles;
789 if (Groups.size() == 0) {
790 map<string, string> variables;
791 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
792 variables["[tag]"] = tag;
793 variables["[tag2]"] = "rare";
794 string rare = getOutputFileName("count",variables);
795 outputNames.push_back(rare); outputTypes["count"].push_back(rare);
796 variables["[tag2]"] = "abund";
797 string abund = getOutputFileName("count",variables);
798 outputNames.push_back(abund); outputTypes["count"].push_back(abund);
800 CountTable rareTable;
801 CountTable abundTable;
802 if (ct.hasGroupInfo()) {
803 vector<string> ctGroups = ct.getNamesOfGroups();
804 for (int i = 0; i < ctGroups.size(); i++) { rareTable.addGroup(ctGroups[i]); abundTable.addGroup(ctGroups[i]); }
807 if (rareNames.size() != 0) {
808 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
809 if (ct.hasGroupInfo()) {
810 vector<int> groupCounts = ct.getGroupCounts(*itRare);
811 rareTable.push_back(*itRare, groupCounts);
813 int groupCounts = ct.getNumSeqs(*itRare);
814 rareTable.push_back(*itRare, groupCounts);
817 if (rareTable.hasGroupInfo()) {
818 vector<string> ctGroups = rareTable.getNamesOfGroups();
819 for (int i = 0; i < ctGroups.size(); i++) {
820 if (rareTable.getGroupCount(ctGroups[i]) == 0) { rareTable.removeGroup(ctGroups[i]); }
823 rareTable.printTable(rare);
827 if (abundNames.size() != 0) {
828 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
829 if (ct.hasGroupInfo()) {
830 vector<int> groupCounts = ct.getGroupCounts(*itAbund);
831 abundTable.push_back(*itAbund, groupCounts);
833 int groupCounts = ct.getNumSeqs(*itAbund);
834 abundTable.push_back(*itAbund, groupCounts);
837 if (abundTable.hasGroupInfo()) {
838 vector<string> ctGroups = abundTable.getNamesOfGroups();
839 for (int i = 0; i < ctGroups.size(); i++) {
840 if (abundTable.getGroupCount(ctGroups[i]) == 0) { abundTable.removeGroup(ctGroups[i]); }
843 abundTable.printTable(abund);
846 }else{ //parse names by abundance and group
847 map<string, CountTable*> countTableMap;
848 map<string, CountTable*>::iterator it3;
850 for (int i=0; i<Groups.size(); i++) {
851 CountTable* rareCt = new CountTable();
852 rareCt->addGroup(Groups[i]);
853 countTableMap[Groups[i]+".rare"] = rareCt;
854 CountTable* abundCt = new CountTable();
855 abundCt->addGroup(Groups[i]);
856 countTableMap[Groups[i]+".abund"] = abundCt;
859 vector<string> allNames = ct.getNamesOfSeqs();
860 for (int i = 0; i < allNames.size(); i++) {
862 if (rareNames.count(allNames[i]) != 0) { //you are a rare name
864 }else{ //you are a abund name
865 rareAbund = ".abund";
868 vector<string> thisSeqsGroups = ct.getGroups(allNames[i]);
869 for (int j = 0; j < thisSeqsGroups.size(); j++) {
870 if (m->inUsersGroups(thisSeqsGroups[j], Groups)) { //only add if this is in a group we want
871 int num = ct.getGroupCount(allNames[i], thisSeqsGroups[j]);
872 vector<int> nums; nums.push_back(num);
873 countTableMap[thisSeqsGroups[j]+rareAbund]->push_back(allNames[i], nums);
879 for (it3 = countTableMap.begin(); it3 != countTableMap.end(); it3++) {
880 string fileroot = outputDir + m->getRootName(m->getSimpleName(countfile));
881 map<string, string> variables;
882 variables["[filename]"] = fileroot;
883 variables["[tag]"] = it3->first;
884 string filename = getOutputFileName("count",variables);
885 outputNames.push_back(filename); outputTypes["count"].push_back(filename);
886 (it3->second)->printTable(filename);
894 catch(exception& e) {
895 m->errorOut(e, "SplitAbundCommand", "parseCount");
899 /**********************************************************************************************************************/
900 int SplitAbundCommand::writeNames() { //namefile
903 map<string, ofstream*> filehandles;
905 if (Groups.size() == 0) {
909 map<string, string> variables;
910 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(namefile));
911 variables["[tag]"] = "rare";
912 string rare = getOutputFileName("name", variables);
913 m->openOutputFile(rare, rout);
914 outputNames.push_back(rare); outputTypes["name"].push_back(rare);
916 variables["[tag]"] = "abund";
917 string abund = getOutputFileName("name", variables);
918 m->openOutputFile(abund, aout);
919 outputNames.push_back(abund); outputTypes["name"].push_back(abund);
921 if (rareNames.size() != 0) {
922 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
923 rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
928 if (abundNames.size() != 0) {
929 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
930 aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
935 }else{ //parse names by abundance and group
936 string fileroot = outputDir + m->getRootName(m->getSimpleName(namefile));
939 map<string, ofstream*> filehandles;
940 map<string, ofstream*>::iterator it3;
942 for (int i=0; i<Groups.size(); i++) {
944 filehandles[Groups[i]+".rare"] = temp;
945 temp2 = new ofstream;
946 filehandles[Groups[i]+".abund"] = temp2;
948 map<string, string> variables;
949 variables["[filename]"] = fileroot;
950 variables["[tag]"] = "rare";
951 variables["[group]"] = Groups[i];
952 string rareGroupFileName = getOutputFileName("name",variables);
953 variables["[tag]"] = "abund";
954 string abundGroupFileName = getOutputFileName("name",variables);
955 m->openOutputFile(rareGroupFileName, *(filehandles[Groups[i]+".rare"]));
956 m->openOutputFile(abundGroupFileName, *(filehandles[Groups[i]+".abund"]));
959 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
960 vector<string> names;
961 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
964 if (rareNames.count(itName->first) != 0) { //you are a rare name
966 }else{ //you are a abund name
967 rareAbund = ".abund";
970 map<string, string> outputStrings;
971 map<string, string>::iterator itout;
972 for (int i = 0; i < names.size(); i++) {
974 string group = groupMap.getGroup(names[i]);
976 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
977 itout = outputStrings.find(group+rareAbund);
978 if (itout == outputStrings.end()) {
979 outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
980 }else { outputStrings[group+rareAbund] += "," + names[i]; }
981 }else if(group == "not found") {
982 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
986 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { *(filehandles[itout->first]) << itout->second << endl; }
990 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
991 (*(filehandles[it3->first])).close();
992 map<string, string> variables;
993 variables["[filename]"] = fileroot;
994 variables["[tag]"] = it3->first;
995 outputNames.push_back(getOutputFileName("name",variables)); outputTypes["name"].push_back(getOutputFileName("name",variables));
1003 catch(exception& e) {
1004 m->errorOut(e, "SplitAbundCommand", "writeNames");
1008 /**********************************************************************************************************************/
1009 //just write the unique names - if a namesfile is given
1010 int SplitAbundCommand::writeAccnos(string tag) {
1013 map<string, ofstream*> filehandles;
1015 if (Groups.size() == 0) {
1019 map<string, string> variables;
1020 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFile));
1021 variables["[tag]"] = tag;
1022 variables["[tag2]"] = "rare";
1023 string rare = getOutputFileName("accnos",variables);
1024 m->openOutputFile(rare, rout);
1025 outputNames.push_back(rare); outputTypes["accnos"].push_back(rare);
1027 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
1028 rout << (*itRare) << endl;
1032 variables["[tag2]"] = "abund";
1033 string abund = getOutputFileName("accnos",variables);
1034 m->openOutputFile(abund, aout);
1035 outputNames.push_back(abund); outputTypes["accnos"].push_back(abund);
1037 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
1038 aout << (*itAbund) << endl;
1042 }else{ //parse names by abundance and group
1043 string fileroot = outputDir + m->getRootName(m->getSimpleName(inputFile));
1046 map<string, ofstream*> filehandles;
1047 map<string, ofstream*>::iterator it3;
1049 for (int i=0; i<Groups.size(); i++) {
1050 temp = new ofstream;
1051 filehandles[Groups[i]+".rare"] = temp;
1052 temp2 = new ofstream;
1053 filehandles[Groups[i]+".abund"] = temp2;
1055 map<string, string> variables;
1056 variables["[filename]"] = fileroot;
1057 variables["[tag]"] = tag;
1058 variables["[tag2]"] = "rare";
1059 variables["[group]"] = Groups[i];
1060 m->openOutputFile(getOutputFileName("accnos",variables), *(filehandles[Groups[i]+".rare"]));
1061 variables["[tag2]"] = "abund";
1062 m->openOutputFile(getOutputFileName("accnos",variables), *(filehandles[Groups[i]+".abund"]));
1066 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
1067 string group = groupMap.getGroup(*itRare);
1069 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1070 *(filehandles[group+".rare"]) << *itRare << endl;
1075 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
1076 string group = groupMap.getGroup(*itAbund);
1078 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1079 *(filehandles[group+".abund"]) << *itAbund << endl;
1084 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1085 (*(filehandles[it3->first])).close();
1086 map<string, string> variables;
1087 variables["[filename]"] = fileroot;
1088 variables["[tag]"] = tag;
1089 variables["[tag2]"] = it3->first;
1090 outputNames.push_back(getOutputFileName("accnos",variables)); outputTypes["accnos"].push_back(getOutputFileName("accnos",variables));
1098 catch(exception& e) {
1099 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
1103 /**********************************************************************************************************************/
1104 int SplitAbundCommand::parseGroup(string tag) { //namefile
1107 map<string, ofstream*> filehandles;
1109 if (Groups.size() == 0) {
1113 map<string, string> variables;
1114 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(groupfile));
1115 variables["[tag]"] = tag;
1116 variables["[tag2]"] = "rare";
1117 string rare = getOutputFileName("group",variables);
1118 m->openOutputFile(rare, rout);
1119 outputNames.push_back(rare); outputTypes["group"].push_back(rare);
1121 variables["[tag2]"] = "abund";
1122 string abund = getOutputFileName("group",variables);
1124 m->openOutputFile(abund, aout);
1125 outputNames.push_back(abund); outputTypes["group"].push_back(abund);
1127 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
1128 vector<string> names;
1129 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
1131 for (int i = 0; i < names.size(); i++) {
1133 string group = groupMap.getGroup(names[i]);
1135 if (group == "not found") {
1136 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
1138 if (rareNames.count(itName->first) != 0) { //you are a rare name
1139 rout << names[i] << '\t' << group << endl;
1140 }else{ //you are a abund name
1141 aout << names[i] << '\t' << group << endl;
1150 }else{ //parse names by abundance and group
1151 string fileroot = outputDir + m->getRootName(m->getSimpleName(groupfile));
1154 map<string, ofstream*> filehandles;
1155 map<string, ofstream*>::iterator it3;
1157 for (int i=0; i<Groups.size(); i++) {
1158 temp = new ofstream;
1159 filehandles[Groups[i]+".rare"] = temp;
1160 temp2 = new ofstream;
1161 filehandles[Groups[i]+".abund"] = temp2;
1163 map<string, string> variables;
1164 variables["[filename]"] = fileroot;
1165 variables["[tag]"] = tag;
1166 variables["[tag2]"] = "rare";
1167 variables["[group]"] = Groups[i];
1168 m->openOutputFile(getOutputFileName("group",variables), *(filehandles[Groups[i]+".rare"]));
1169 variables["[tag2]"] = "abund";
1170 m->openOutputFile(getOutputFileName("group",variables), *(filehandles[Groups[i]+".abund"]));
1173 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
1174 vector<string> names;
1175 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
1178 if (rareNames.count(itName->first) != 0) { //you are a rare name
1179 rareAbund = ".rare";
1180 }else{ //you are a abund name
1181 rareAbund = ".abund";
1184 for (int i = 0; i < names.size(); i++) {
1186 string group = groupMap.getGroup(names[i]);
1188 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1189 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
1194 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1195 (*(filehandles[it3->first])).close();
1196 map<string, string> variables;
1197 variables["[filename]"] = fileroot;
1198 variables["[tag]"] = tag;
1199 variables["[tag2]"] = it3->first;
1200 outputNames.push_back(getOutputFileName("group",variables)); outputTypes["group"].push_back(getOutputFileName("group",variables));
1208 catch(exception& e) {
1209 m->errorOut(e, "SplitAbundCommand", "parseGroups");
1213 /**********************************************************************************************************************/
1214 int SplitAbundCommand::parseFasta(string tag) { //namefile
1217 map<string, ofstream*> filehandles;
1219 if (Groups.size() == 0) {
1223 map<string, string> variables;
1224 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
1225 variables["[tag]"] = tag;
1226 variables["[tag2]"] = "rare";
1227 string rare = getOutputFileName("fasta",variables);
1228 m->openOutputFile(rare, rout);
1229 outputNames.push_back(rare); outputTypes["fasta"].push_back(rare);
1231 variables["[tag2]"] = "abund";
1232 string abund = getOutputFileName("fasta",variables);
1233 m->openOutputFile(abund, aout);
1234 outputNames.push_back(abund); outputTypes["fasta"].push_back(abund);
1238 m->openInputFile(fastafile, in);
1241 if (m->control_pressed) { break; }
1243 Sequence seq(in); m->gobble(in);
1245 if (seq.getName() != "") {
1247 map<string, string>::iterator itNames;
1249 itNames = nameMap.find(seq.getName());
1251 if (itNames == nameMap.end()) {
1252 m->mothurOut(seq.getName() + " is not in your names or list file, ignoring."); m->mothurOutEndLine();
1254 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
1255 seq.printSequence(rout);
1256 }else{ //you are a abund name
1257 seq.printSequence(aout);
1266 }else{ //parse names by abundance and group
1267 string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
1270 map<string, ofstream*> filehandles;
1271 map<string, ofstream*>::iterator it3;
1273 for (int i=0; i<Groups.size(); i++) {
1274 temp = new ofstream;
1275 filehandles[Groups[i]+".rare"] = temp;
1276 temp2 = new ofstream;
1277 filehandles[Groups[i]+".abund"] = temp2;
1279 map<string, string> variables;
1280 variables["[filename]"] = fileroot;
1281 variables["[tag]"] = tag;
1282 variables["[tag2]"] = "rare";
1283 variables["[group]"] = Groups[i];
1284 m->openOutputFile(getOutputFileName("fasta",variables), *(filehandles[Groups[i]+".rare"]));
1285 variables["[tag2]"] = "abund";
1286 m->openOutputFile(getOutputFileName("fasta",variables), *(filehandles[Groups[i]+".abund"]));
1291 m->openInputFile(fastafile, in);
1294 if (m->control_pressed) { break; }
1296 Sequence seq(in); m->gobble(in);
1298 if (seq.getName() != "") {
1299 map<string, string>::iterator itNames = nameMap.find(seq.getName());
1301 if (itNames == nameMap.end()) {
1302 m->mothurOut(seq.getName() + " is not in your names or list file, ignoring."); m->mothurOutEndLine();
1304 vector<string> names;
1305 m->splitAtComma(itNames->second, names); //parses bin into individual sequence names
1308 if (rareNames.count(itNames->first) != 0) { //you are a rare name
1309 rareAbund = ".rare";
1310 }else{ //you are a abund name
1311 rareAbund = ".abund";
1314 if (countfile == "") {
1315 for (int i = 0; i < names.size(); i++) {
1316 string group = groupMap.getGroup(seq.getName());
1318 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1319 seq.printSequence(*(filehandles[group+rareAbund]));
1320 }else if(group == "not found") {
1321 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
1325 vector<string> thisSeqsGroups = ct.getGroups(names[0]); //we only need names[0], because there is no namefile
1326 for (int i = 0; i < thisSeqsGroups.size(); i++) {
1327 if (m->inUsersGroups(thisSeqsGroups[i], Groups)) { //only add if this is in a group we want
1328 seq.printSequence(*(filehandles[thisSeqsGroups[i]+rareAbund]));
1337 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1338 (*(filehandles[it3->first])).close();
1339 map<string, string> variables;
1340 variables["[filename]"] = fileroot;
1341 variables["[tag]"] = tag;
1342 variables["[tag2]"] = it3->first;
1343 outputNames.push_back(getOutputFileName("fasta",variables)); outputTypes["fasta"].push_back(getOutputFileName("fasta",variables));
1351 catch(exception& e) {
1352 m->errorOut(e, "SplitAbundCommand", "parseFasta");
1356 /**********************************************************************************************************************/