2 * splitabundcommand.cpp
5 * Created by westcott on 5/17/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "splitabundcommand.h"
11 #include "sharedutilities.h"
13 //**********************************************************************************************************************
14 vector<string> SplitAbundCommand::setParameters(){
16 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
17 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
18 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false); parameters.push_back(pcount);
19 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false); parameters.push_back(pgroup);
20 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist);
21 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
22 CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "","",false,true); parameters.push_back(pcutoff);
23 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
24 CommandParameter paccnos("accnos", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(paccnos);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
28 vector<string> myArray;
29 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
33 m->errorOut(e, "SplitAbundCommand", "setParameters");
37 //**********************************************************************************************************************
38 string SplitAbundCommand::getHelpString(){
40 string helpString = "";
41 helpString += "The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n";
42 helpString += "The split.abund command parameters are fasta, list, name, count, cutoff, group, label, groups, cutoff and accnos.\n";
43 helpString += "The fasta and a list or name or count parameter are required, and you must provide a cutoff value.\n";
44 helpString += "The cutoff parameter is used to qualify what is abundant and rare.\n";
45 helpString += "The group parameter allows you to parse a group file into rare and abundant groups.\n";
46 helpString += "The label parameter is used to read specific labels in your listfile you want to use.\n";
47 helpString += "The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n";
48 helpString += "The groups parameter allows you to parse the files into rare and abundant files by group. \n";
49 helpString += "For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files. \n";
50 helpString += "If you want .abund and .rare files for all groups, set groups=all. \n";
51 helpString += "The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n";
52 helpString += "Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n";
53 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n";
57 m->errorOut(e, "SplitAbundCommand", "getHelpString");
62 //**********************************************************************************************************************
63 string SplitAbundCommand::getOutputPattern(string type) {
67 if (type == "fasta") { pattern = "[filename],[tag],[tag2],fasta-[filename],[tag],[group],[tag2],fasta"; }
68 else if (type == "list") { pattern = "[filename],[tag],[tag2],list-[filename],[group],[tag],[tag2],list"; }
69 else if (type == "name") { pattern = "[filename],[tag],names-[filename],[group],[tag],names"; }
70 else if (type == "count") { pattern = "[filename],[tag],[tag2],count_table-[filename],[tag],count_table"; }
71 else if (type == "group") { pattern = "[filename],[tag],[tag2],groups-[filename],[tag],[group],[tag2],groups"; }
72 else if (type == "accnos") { pattern = "[filename],[tag],[tag2],accnos-[filename],[tag],[group],[tag2],accnos"; }
73 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
78 m->errorOut(e, "SplitAbundCommand", "getOutputPattern");
82 //**********************************************************************************************************************
83 SplitAbundCommand::SplitAbundCommand(){
85 abort = true; calledHelp = true;
87 vector<string> tempOutNames;
88 outputTypes["list"] = tempOutNames;
89 outputTypes["name"] = tempOutNames;
90 outputTypes["count"] = tempOutNames;
91 outputTypes["accnos"] = tempOutNames;
92 outputTypes["group"] = tempOutNames;
93 outputTypes["fasta"] = tempOutNames;
96 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
100 //**********************************************************************************************************************
101 SplitAbundCommand::SplitAbundCommand(string option) {
103 abort = false; calledHelp = false;
106 //allow user to run help
107 if(option == "help") { help(); abort = true; calledHelp = true; }
108 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
110 vector<string> myArray = setParameters();
112 OptionParser parser(option);
113 map<string, string> parameters = parser.getParameters();
115 ValidParameters validParameter;
116 map<string, string>::iterator it;
118 //check to make sure all parameters are valid for command
119 for (it = parameters.begin(); it != parameters.end(); it++) {
120 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
123 //initialize outputTypes
124 vector<string> tempOutNames;
125 outputTypes["list"] = tempOutNames;
126 outputTypes["name"] = tempOutNames;
127 outputTypes["accnos"] = tempOutNames;
128 outputTypes["group"] = tempOutNames;
129 outputTypes["fasta"] = tempOutNames;
130 outputTypes["count"] = tempOutNames;
132 //if the user changes the input directory command factory will send this info to us in the output parameter
133 string inputDir = validParameter.validFile(parameters, "inputdir", false);
134 if (inputDir == "not found"){ inputDir = ""; }
137 it = parameters.find("list");
138 //user has given a template file
139 if(it != parameters.end()){
140 path = m->hasPath(it->second);
141 //if the user has not given a path then, add inputdir. else leave path alone.
142 if (path == "") { parameters["list"] = inputDir + it->second; }
145 it = parameters.find("group");
146 //user has given a template file
147 if(it != parameters.end()){
148 path = m->hasPath(it->second);
149 //if the user has not given a path then, add inputdir. else leave path alone.
150 if (path == "") { parameters["group"] = inputDir + it->second; }
153 it = parameters.find("fasta");
154 //user has given a template file
155 if(it != parameters.end()){
156 path = m->hasPath(it->second);
157 //if the user has not given a path then, add inputdir. else leave path alone.
158 if (path == "") { parameters["fasta"] = inputDir + it->second; }
161 it = parameters.find("name");
162 //user has given a template file
163 if(it != parameters.end()){
164 path = m->hasPath(it->second);
165 //if the user has not given a path then, add inputdir. else leave path alone.
166 if (path == "") { parameters["name"] = inputDir + it->second; }
169 it = parameters.find("count");
170 //user has given a template file
171 if(it != parameters.end()){
172 path = m->hasPath(it->second);
173 //if the user has not given a path then, add inputdir. else leave path alone.
174 if (path == "") { parameters["count"] = inputDir + it->second; }
179 //if the user changes the output directory command factory will send this info to us in the output parameter
180 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
182 //check for required parameters
183 listfile = validParameter.validFile(parameters, "list", true);
184 if (listfile == "not open") { abort = true; }
185 else if (listfile == "not found") { listfile = ""; }
186 else{ inputFile = listfile; m->setListFile(listfile); }
188 namefile = validParameter.validFile(parameters, "name", true);
189 if (namefile == "not open") { abort = true; }
190 else if (namefile == "not found") { namefile = ""; }
191 else{ inputFile = namefile; m->setNameFile(namefile); }
193 fastafile = validParameter.validFile(parameters, "fasta", true);
194 if (fastafile == "not open") { abort = true; }
195 else if (fastafile == "not found") {
196 fastafile = m->getFastaFile();
197 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
198 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
199 }else { m->setFastaFile(fastafile); }
201 groupfile = validParameter.validFile(parameters, "group", true);
202 if (groupfile == "not open") { groupfile = ""; abort = true; }
203 else if (groupfile == "not found") { groupfile = ""; }
205 int error = groupMap.readMap(groupfile);
206 if (error == 1) { abort = true; }
207 m->setGroupFile(groupfile);
210 countfile = validParameter.validFile(parameters, "count", true);
211 if (countfile == "not open") { countfile = ""; abort = true; }
212 else if (countfile == "not found") { countfile = ""; }
214 m->setCountTableFile(countfile);
215 ct.readTable(countfile, true);
218 if ((namefile != "") && (countfile != "")) {
219 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
222 if ((groupfile != "") && (countfile != "")) {
223 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
226 groups = validParameter.validFile(parameters, "groups", false);
227 if (groups == "not found") { groups = ""; }
228 else { m->splitAtDash(groups, Groups); }
230 if (((groupfile == "") && (countfile == ""))&& (groups != "")) { m->mothurOut("You cannot select groups without a valid group or count file, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; Groups.clear(); }
232 if (countfile != "") {
233 if (!ct.hasGroupInfo()) { m->mothurOut("You cannot pick groups without group info in your count file; I will disregard your groups selection."); m->mothurOutEndLine(); groups = ""; Groups.clear(); }
236 //do you have all files needed
237 if ((listfile == "") && (namefile == "") && (countfile == "")) {
238 namefile = m->getNameFile();
239 if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
241 listfile = m->getListFile();
242 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
244 countfile = m->getCountTableFile();
245 if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
246 else { m->mothurOut("You have no current list, count or namefile and one is required."); m->mothurOutEndLine(); abort = true; }
251 //check for optional parameter and set defaults
252 // ...at some point should added some additional type checking...
253 label = validParameter.validFile(parameters, "label", false);
254 if (label == "not found") { label = ""; allLines = 1; }
256 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
257 else { allLines = 1; }
260 string temp = validParameter.validFile(parameters, "accnos", false); if (temp == "not found") { temp = "F"; }
261 accnos = m->isTrue(temp);
263 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0"; }
264 m->mothurConvert(temp, cutoff);
266 if (cutoff == 0) { m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
270 catch(exception& e) {
271 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
275 //**********************************************************************************************************************
276 SplitAbundCommand::~SplitAbundCommand(){}
277 //**********************************************************************************************************************
278 int SplitAbundCommand::execute(){
281 if (abort == true) { if (calledHelp) { return 0; } return 2; }
283 if (Groups.size() != 0) {
284 vector<string> allGroups;
285 if (countfile != "") { allGroups = ct.getNamesOfGroups(); }
286 else { allGroups = groupMap.getNamesOfGroups(); }
288 util.setGroups(Groups, allGroups);
291 if (listfile != "") { //you are using a listfile to determine abundance
292 if (outputDir == "") { outputDir = m->hasPath(listfile); }
294 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
295 set<string> processedLabels;
296 set<string> userLabels = labels;
298 InputData input(listfile, "list");
299 ListVector* list = input.getListVector();
300 string lastLabel = list->getLabel();
302 //do you have a namefile or do we need to similate one?
303 if (namefile != "") { readNamesFile(); }
304 else { createNameMap(list); }
306 if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
308 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
310 if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
312 if(allLines == 1 || labels.count(list->getLabel()) == 1){
314 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
317 processedLabels.insert(list->getLabel());
318 userLabels.erase(list->getLabel());
321 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
322 string saveLabel = list->getLabel();
325 list = input.getListVector(lastLabel); //get new list vector to process
327 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
330 processedLabels.insert(list->getLabel());
331 userLabels.erase(list->getLabel());
333 //restore real lastlabel to save below
334 list->setLabel(saveLabel);
338 lastLabel = list->getLabel();
341 list = input.getListVector(); //get new list vector to process
344 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
346 //output error messages about any remaining user labels
347 set<string>::iterator it;
348 bool needToRun = false;
349 for (it = userLabels.begin(); it != userLabels.end(); it++) {
350 m->mothurOut("Your file does not include the label " + *it);
351 if (processedLabels.count(lastLabel) != 1) {
352 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
355 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
360 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
362 //run last label if you need to
363 if (needToRun == true) {
364 if (list != NULL) { delete list; }
365 list = input.getListVector(lastLabel); //get new list vector to process
367 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
373 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
375 }else if (namefile != "") { //you are using the namefile to determine abundance
376 if (outputDir == "") { outputDir = m->hasPath(namefile); }
382 if (groupfile != "") { parseGroup(tag); }
383 if (accnos) { writeAccnos(tag); }
384 if (fastafile != "") { parseFasta(tag); }
390 if (accnos) { writeAccnos(tag); }
391 if (fastafile != "") { parseFasta(tag); }
394 //set fasta file as new current fastafile
396 itTypes = outputTypes.find("fasta");
397 if (itTypes != outputTypes.end()) {
398 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
401 itTypes = outputTypes.find("name");
402 if (itTypes != outputTypes.end()) {
403 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
406 itTypes = outputTypes.find("group");
407 if (itTypes != outputTypes.end()) {
408 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
411 itTypes = outputTypes.find("list");
412 if (itTypes != outputTypes.end()) {
413 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
416 itTypes = outputTypes.find("accnos");
417 if (itTypes != outputTypes.end()) {
418 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
421 itTypes = outputTypes.find("count");
422 if (itTypes != outputTypes.end()) {
423 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
426 m->mothurOutEndLine();
427 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
428 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
429 m->mothurOutEndLine();
433 catch(exception& e) {
434 m->errorOut(e, "SplitAbundCommand", "execute");
438 /**********************************************************************************************************************/
439 int SplitAbundCommand::splitList(ListVector* thisList) {
444 //get rareNames and abundNames
446 for (int i = 0; i < thisList->getNumBins(); i++) {
447 if (m->control_pressed) { return 0; }
449 string bin = thisList->get(i);
451 vector<string> names;
452 m->splitAtComma(bin, names); //parses bin into individual sequence names
453 int size = names.size();
455 //if countfile is not blank we assume the list file is unique, otherwise we assume it includes all seqs
456 if (countfile != "") {
458 for (int j = 0; j < names.size(); j++) { size += ct.getNumSeqs(names[j]); }
461 if (size <= cutoff) {
463 for (int j = 0; j < names.size(); j++) { rareNames.insert(names[j]); }
465 for (int j = 0; j < names.size(); j++) { abundNames.insert(names[j]); }
470 string tag = thisList->getLabel();
472 writeList(thisList, tag, numRareBins);
474 if (groupfile != "") { parseGroup(tag); }
475 if (accnos) { writeAccnos(tag); }
476 if (fastafile != "") { parseFasta(tag); }
477 if (countfile != "") { parseCount(tag); }
482 catch(exception& e) {
483 m->errorOut(e, "SplitAbundCommand", "splitList");
487 /**********************************************************************************************************************/
488 int SplitAbundCommand::writeList(ListVector* thisList, string tag, int numRareBins) {
491 map<string, ofstream*> filehandles;
493 if (Groups.size() == 0) {
494 int numAbundBins = thisList->getNumBins() - numRareBins;
499 map<string, string> variables;
500 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
501 variables["[tag]"] = tag;
502 variables["[tag2]"] = "rare";
503 string rare = getOutputFileName("list",variables);
504 m->openOutputFile(rare, rout);
505 outputNames.push_back(rare); outputTypes["list"].push_back(rare);
507 variables["[tag2]"] = "abund";
508 string abund = getOutputFileName("list",variables);
509 m->openOutputFile(abund, aout);
510 outputNames.push_back(abund); outputTypes["list"].push_back(abund);
512 if (rareNames.size() != 0) { rout << thisList->getLabel() << '\t' << numRareBins << '\t'; }
513 if (abundNames.size() != 0) { aout << thisList->getLabel() << '\t' << numAbundBins << '\t'; }
515 for (int i = 0; i < thisList->getNumBins(); i++) {
516 if (m->control_pressed) { break; }
518 string bin = thisList->get(i);
519 vector<string> names;
520 m->splitAtComma(bin, names);
522 int size = names.size();
523 if (countfile != "") {
525 for (int j = 0; j < names.size(); j++) { size += ct.getNumSeqs(names[j]); }
528 if (size <= cutoff) { rout << bin << '\t'; }
529 else { aout << bin << '\t'; }
532 if (rareNames.size() != 0) { rout << endl; }
533 if (abundNames.size() != 0) { aout << endl; }
538 }else{ //parse names by abundance and group
539 string fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
542 //map<string, bool> wroteFile;
543 map<string, ofstream*> filehandles;
544 map<string, ofstream*>::iterator it3;
546 for (int i=0; i<Groups.size(); i++) {
548 filehandles[Groups[i]+".rare"] = temp;
549 temp2 = new ofstream;
550 filehandles[Groups[i]+".abund"] = temp2;
552 map<string, string> variables;
553 variables["[filename]"] = fileroot;
554 variables["[tag]"] = tag;
555 variables["[tag2]"] = "rare";
556 variables["[group]"] = Groups[i];
557 string rareGroupFileName = getOutputFileName("list",variables);
558 variables["[tag2]"] = "abund";
559 string abundGroupFileName = getOutputFileName("list",variables);
560 m->openOutputFile(rareGroupFileName, *(filehandles[Groups[i]+".rare"]));
561 m->openOutputFile(abundGroupFileName, *(filehandles[Groups[i]+".abund"]));
562 outputNames.push_back(rareGroupFileName); outputTypes["list"].push_back(rareGroupFileName);
563 outputNames.push_back(abundGroupFileName); outputTypes["list"].push_back(abundGroupFileName);
566 map<string, string> groupVector;
567 map<string, string>::iterator itGroup;
568 map<string, int> groupNumBins;
570 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
571 groupNumBins[it3->first] = 0;
572 groupVector[it3->first] = "";
575 for (int i = 0; i < thisList->getNumBins(); i++) {
576 if (m->control_pressed) { break; }
578 map<string, string> groupBins;
579 string bin = thisList->get(i);
581 vector<string> names;
582 m->splitAtComma(bin, names); //parses bin into individual sequence names
584 //parse bin into list of sequences in each group
585 for (int j = 0; j < names.size(); j++) {
587 if (rareNames.count(names[j]) != 0) { //you are a rare name
589 }else{ //you are a abund name
590 rareAbund = ".abund";
593 if (countfile == "") {
594 string group = groupMap.getGroup(names[j]);
596 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
597 itGroup = groupBins.find(group+rareAbund);
598 if(itGroup == groupBins.end()) {
599 groupBins[group+rareAbund] = names[j]; //add first name
600 groupNumBins[group+rareAbund]++;
601 }else{ //add another name
602 groupBins[group+rareAbund] += "," + names[j];
604 }else if(group == "not found") {
605 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
608 vector<string> thisSeqsGroups = ct.getGroups(names[j]);
609 for (int k = 0; k < thisSeqsGroups.size(); k++) {
610 if (m->inUsersGroups(thisSeqsGroups[k], Groups)) { //only add if this is in a group we want
611 itGroup = groupBins.find(thisSeqsGroups[k]+rareAbund);
612 if(itGroup == groupBins.end()) {
613 groupBins[thisSeqsGroups[k]+rareAbund] = names[j]; //add first name
614 groupNumBins[thisSeqsGroups[k]+rareAbund]++;
615 }else{ //add another name
616 groupBins[thisSeqsGroups[k]+rareAbund] += "," + names[j];
624 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
625 groupVector[itGroup->first] += itGroup->second + '\t';
630 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
631 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl; // label numBins listvector for that group
632 (*(filehandles[it3->first])).close();
640 catch(exception& e) {
641 m->errorOut(e, "SplitAbundCommand", "writeList");
645 /**********************************************************************************************************************/
646 int SplitAbundCommand::splitCount() { //countfile
651 vector<string> allNames = ct.getNamesOfSeqs();
652 for (int i = 0; i < allNames.size(); i++) {
654 if (m->control_pressed) { return 0; }
656 int size = ct.getNumSeqs(allNames[i]);
657 nameMap[allNames[i]] = allNames[i];
659 if (size <= cutoff) {
660 rareNames.insert(allNames[i]);
662 abundNames.insert(allNames[i]);
666 //write out split count files
671 catch(exception& e) {
672 m->errorOut(e, "SplitAbundCommand", "splitCount");
676 /**********************************************************************************************************************/
677 int SplitAbundCommand::splitNames() { //namefile
685 m->openInputFile(namefile, in);
688 if (m->control_pressed) { break; }
690 string firstCol, secondCol;
691 in >> firstCol >> secondCol; m->gobble(in);
693 nameMap[firstCol] = secondCol;
695 int size = m->getNumNames(secondCol);
697 if (size <= cutoff) {
698 rareNames.insert(firstCol);
700 abundNames.insert(firstCol);
708 catch(exception& e) {
709 m->errorOut(e, "SplitAbundCommand", "splitNames");
713 /**********************************************************************************************************************/
714 int SplitAbundCommand::readNamesFile() {
718 m->openInputFile(namefile, in);
721 if (m->control_pressed) { break; }
723 string firstCol, secondCol;
724 in >> firstCol >> secondCol; m->gobble(in);
726 nameMap[firstCol] = secondCol;
733 catch(exception& e) {
734 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
738 /**********************************************************************************************************************/
739 int SplitAbundCommand::createNameMap(ListVector* thisList) {
742 if (thisList != NULL) {
743 for (int i = 0; i < thisList->getNumBins(); i++) {
744 if (m->control_pressed) { return 0; }
746 string bin = thisList->get(i);
748 vector<string> names;
749 m->splitAtComma(bin, names); //parses bin into individual sequence names
751 for (int j = 0; j < names.size(); j++) { nameMap[names[j]] = names[j]; }
757 catch(exception& e) {
758 m->errorOut(e, "SplitAbundCommand", "createNameMap");
762 /**********************************************************************************************************************/
763 int SplitAbundCommand::parseCount(string tag) { //namefile
766 map<string, ofstream*> filehandles;
768 if (Groups.size() == 0) {
769 map<string, string> variables;
770 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
771 variables["[tag]"] = tag;
772 variables["[tag2]"] = "rare";
773 string rare = getOutputFileName("count",variables);
774 outputNames.push_back(rare); outputTypes["count"].push_back(rare);
775 variables["[tag2]"] = "abund";
776 string abund = getOutputFileName("count",variables);
777 outputNames.push_back(abund); outputTypes["count"].push_back(abund);
779 CountTable rareTable;
780 CountTable abundTable;
781 if (ct.hasGroupInfo()) {
782 vector<string> ctGroups = ct.getNamesOfGroups();
783 for (int i = 0; i < ctGroups.size(); i++) { rareTable.addGroup(ctGroups[i]); abundTable.addGroup(ctGroups[i]); }
786 if (rareNames.size() != 0) {
787 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
788 if (ct.hasGroupInfo()) {
789 vector<int> groupCounts = ct.getGroupCounts(*itRare);
790 rareTable.push_back(*itRare, groupCounts);
792 int groupCounts = ct.getNumSeqs(*itRare);
793 rareTable.push_back(*itRare, groupCounts);
796 if (rareTable.hasGroupInfo()) {
797 vector<string> ctGroups = rareTable.getNamesOfGroups();
798 for (int i = 0; i < ctGroups.size(); i++) {
799 if (rareTable.getGroupCount(ctGroups[i]) == 0) { rareTable.removeGroup(ctGroups[i]); }
802 rareTable.printTable(rare);
806 if (abundNames.size() != 0) {
807 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
808 if (ct.hasGroupInfo()) {
809 vector<int> groupCounts = ct.getGroupCounts(*itAbund);
810 abundTable.push_back(*itAbund, groupCounts);
812 int groupCounts = ct.getNumSeqs(*itAbund);
813 abundTable.push_back(*itAbund, groupCounts);
816 if (abundTable.hasGroupInfo()) {
817 vector<string> ctGroups = abundTable.getNamesOfGroups();
818 for (int i = 0; i < ctGroups.size(); i++) {
819 if (abundTable.getGroupCount(ctGroups[i]) == 0) { abundTable.removeGroup(ctGroups[i]); }
822 abundTable.printTable(abund);
825 }else{ //parse names by abundance and group
826 map<string, CountTable*> countTableMap;
827 map<string, CountTable*>::iterator it3;
829 for (int i=0; i<Groups.size(); i++) {
830 CountTable* rareCt = new CountTable();
831 rareCt->addGroup(Groups[i]);
832 countTableMap[Groups[i]+".rare"] = rareCt;
833 CountTable* abundCt = new CountTable();
834 abundCt->addGroup(Groups[i]);
835 countTableMap[Groups[i]+".abund"] = abundCt;
838 vector<string> allNames = ct.getNamesOfSeqs();
839 for (int i = 0; i < allNames.size(); i++) {
841 if (rareNames.count(allNames[i]) != 0) { //you are a rare name
843 }else{ //you are a abund name
844 rareAbund = ".abund";
847 vector<string> thisSeqsGroups = ct.getGroups(allNames[i]);
848 for (int j = 0; j < thisSeqsGroups.size(); j++) {
849 if (m->inUsersGroups(thisSeqsGroups[j], Groups)) { //only add if this is in a group we want
850 int num = ct.getGroupCount(allNames[i], thisSeqsGroups[j]);
851 vector<int> nums; nums.push_back(num);
852 countTableMap[thisSeqsGroups[j]+rareAbund]->push_back(allNames[i], nums);
858 for (it3 = countTableMap.begin(); it3 != countTableMap.end(); it3++) {
859 string fileroot = outputDir + m->getRootName(m->getSimpleName(countfile));
860 map<string, string> variables;
861 variables["[filename]"] = fileroot;
862 variables["[tag]"] = it3->first;
863 string filename = getOutputFileName("count",variables);
864 outputNames.push_back(filename); outputTypes["count"].push_back(filename);
865 (it3->second)->printTable(filename);
873 catch(exception& e) {
874 m->errorOut(e, "SplitAbundCommand", "parseCount");
878 /**********************************************************************************************************************/
879 int SplitAbundCommand::writeNames() { //namefile
882 map<string, ofstream*> filehandles;
884 if (Groups.size() == 0) {
888 map<string, string> variables;
889 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(namefile));
890 variables["[tag]"] = "rare";
891 string rare = getOutputFileName("name", variables);
892 m->openOutputFile(rare, rout);
893 outputNames.push_back(rare); outputTypes["name"].push_back(rare);
895 variables["[tag]"] = "abund";
896 string abund = getOutputFileName("name", variables);
897 m->openOutputFile(abund, aout);
898 outputNames.push_back(abund); outputTypes["name"].push_back(abund);
900 if (rareNames.size() != 0) {
901 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
902 rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
907 if (abundNames.size() != 0) {
908 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
909 aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
914 }else{ //parse names by abundance and group
915 string fileroot = outputDir + m->getRootName(m->getSimpleName(namefile));
918 map<string, ofstream*> filehandles;
919 map<string, ofstream*>::iterator it3;
921 for (int i=0; i<Groups.size(); i++) {
923 filehandles[Groups[i]+".rare"] = temp;
924 temp2 = new ofstream;
925 filehandles[Groups[i]+".abund"] = temp2;
927 map<string, string> variables;
928 variables["[filename]"] = fileroot;
929 variables["[tag]"] = "rare";
930 variables["[group]"] = Groups[i];
931 string rareGroupFileName = getOutputFileName("name",variables);
932 variables["[tag]"] = "abund";
933 string abundGroupFileName = getOutputFileName("name",variables);
934 m->openOutputFile(rareGroupFileName, *(filehandles[Groups[i]+".rare"]));
935 m->openOutputFile(abundGroupFileName, *(filehandles[Groups[i]+".abund"]));
938 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
939 vector<string> names;
940 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
943 if (rareNames.count(itName->first) != 0) { //you are a rare name
945 }else{ //you are a abund name
946 rareAbund = ".abund";
949 map<string, string> outputStrings;
950 map<string, string>::iterator itout;
951 for (int i = 0; i < names.size(); i++) {
953 string group = groupMap.getGroup(names[i]);
955 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
956 itout = outputStrings.find(group+rareAbund);
957 if (itout == outputStrings.end()) {
958 outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
959 }else { outputStrings[group+rareAbund] += "," + names[i]; }
960 }else if(group == "not found") {
961 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
965 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { *(filehandles[itout->first]) << itout->second << endl; }
969 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
970 (*(filehandles[it3->first])).close();
971 map<string, string> variables;
972 variables["[filename]"] = fileroot;
973 variables["[tag]"] = it3->first;
974 outputNames.push_back(getOutputFileName("name",variables)); outputTypes["name"].push_back(getOutputFileName("name",variables));
982 catch(exception& e) {
983 m->errorOut(e, "SplitAbundCommand", "writeNames");
987 /**********************************************************************************************************************/
988 //just write the unique names - if a namesfile is given
989 int SplitAbundCommand::writeAccnos(string tag) {
992 map<string, ofstream*> filehandles;
994 if (Groups.size() == 0) {
998 map<string, string> variables;
999 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFile));
1000 variables["[tag]"] = tag;
1001 variables["[tag2]"] = "rare";
1002 string rare = getOutputFileName("accnos",variables);
1003 m->openOutputFile(rare, rout);
1004 outputNames.push_back(rare); outputTypes["accnos"].push_back(rare);
1006 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
1007 rout << (*itRare) << endl;
1011 variables["[tag2]"] = "abund";
1012 string abund = getOutputFileName("accnos",variables);
1013 m->openOutputFile(abund, aout);
1014 outputNames.push_back(abund); outputTypes["accnos"].push_back(abund);
1016 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
1017 aout << (*itAbund) << endl;
1021 }else{ //parse names by abundance and group
1022 string fileroot = outputDir + m->getRootName(m->getSimpleName(inputFile));
1025 map<string, ofstream*> filehandles;
1026 map<string, ofstream*>::iterator it3;
1028 for (int i=0; i<Groups.size(); i++) {
1029 temp = new ofstream;
1030 filehandles[Groups[i]+".rare"] = temp;
1031 temp2 = new ofstream;
1032 filehandles[Groups[i]+".abund"] = temp2;
1034 map<string, string> variables;
1035 variables["[filename]"] = fileroot;
1036 variables["[tag]"] = tag;
1037 variables["[tag2]"] = "rare";
1038 variables["[group]"] = Groups[i];
1039 m->openOutputFile(getOutputFileName("accnos",variables), *(filehandles[Groups[i]+".rare"]));
1040 variables["[tag2]"] = "abund";
1041 m->openOutputFile(getOutputFileName("accnos",variables), *(filehandles[Groups[i]+".abund"]));
1045 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
1046 string group = groupMap.getGroup(*itRare);
1048 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1049 *(filehandles[group+".rare"]) << *itRare << endl;
1054 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
1055 string group = groupMap.getGroup(*itAbund);
1057 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1058 *(filehandles[group+".abund"]) << *itAbund << endl;
1063 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1064 (*(filehandles[it3->first])).close();
1065 map<string, string> variables;
1066 variables["[filename]"] = fileroot;
1067 variables["[tag]"] = tag;
1068 variables["[tag2]"] = it3->first;
1069 outputNames.push_back(getOutputFileName("accnos",variables)); outputTypes["accnos"].push_back(getOutputFileName("accnos",variables));
1077 catch(exception& e) {
1078 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
1082 /**********************************************************************************************************************/
1083 int SplitAbundCommand::parseGroup(string tag) { //namefile
1086 map<string, ofstream*> filehandles;
1088 if (Groups.size() == 0) {
1092 map<string, string> variables;
1093 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(groupfile));
1094 variables["[tag]"] = tag;
1095 variables["[tag2]"] = "rare";
1096 string rare = getOutputFileName("group",variables);
1097 m->openOutputFile(rare, rout);
1098 outputNames.push_back(rare); outputTypes["group"].push_back(rare);
1100 variables["[tag2]"] = "abund";
1101 string abund = getOutputFileName("group",variables);
1103 m->openOutputFile(abund, aout);
1104 outputNames.push_back(abund); outputTypes["group"].push_back(abund);
1106 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
1107 vector<string> names;
1108 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
1110 for (int i = 0; i < names.size(); i++) {
1112 string group = groupMap.getGroup(names[i]);
1114 if (group == "not found") {
1115 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
1117 if (rareNames.count(itName->first) != 0) { //you are a rare name
1118 rout << names[i] << '\t' << group << endl;
1119 }else{ //you are a abund name
1120 aout << names[i] << '\t' << group << endl;
1129 }else{ //parse names by abundance and group
1130 string fileroot = outputDir + m->getRootName(m->getSimpleName(groupfile));
1133 map<string, ofstream*> filehandles;
1134 map<string, ofstream*>::iterator it3;
1136 for (int i=0; i<Groups.size(); i++) {
1137 temp = new ofstream;
1138 filehandles[Groups[i]+".rare"] = temp;
1139 temp2 = new ofstream;
1140 filehandles[Groups[i]+".abund"] = temp2;
1142 map<string, string> variables;
1143 variables["[filename]"] = fileroot;
1144 variables["[tag]"] = tag;
1145 variables["[tag2]"] = "rare";
1146 variables["[group]"] = Groups[i];
1147 m->openOutputFile(getOutputFileName("group",variables), *(filehandles[Groups[i]+".rare"]));
1148 variables["[tag2]"] = "abund";
1149 m->openOutputFile(getOutputFileName("group",variables), *(filehandles[Groups[i]+".abund"]));
1152 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
1153 vector<string> names;
1154 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
1157 if (rareNames.count(itName->first) != 0) { //you are a rare name
1158 rareAbund = ".rare";
1159 }else{ //you are a abund name
1160 rareAbund = ".abund";
1163 for (int i = 0; i < names.size(); i++) {
1165 string group = groupMap.getGroup(names[i]);
1167 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1168 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
1173 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1174 (*(filehandles[it3->first])).close();
1175 map<string, string> variables;
1176 variables["[filename]"] = fileroot;
1177 variables["[tag]"] = tag;
1178 variables["[tag2]"] = it3->first;
1179 outputNames.push_back(getOutputFileName("group",variables)); outputTypes["group"].push_back(getOutputFileName("group",variables));
1187 catch(exception& e) {
1188 m->errorOut(e, "SplitAbundCommand", "parseGroups");
1192 /**********************************************************************************************************************/
1193 int SplitAbundCommand::parseFasta(string tag) { //namefile
1196 map<string, ofstream*> filehandles;
1198 if (Groups.size() == 0) {
1202 map<string, string> variables;
1203 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
1204 variables["[tag]"] = tag;
1205 variables["[tag2]"] = "rare";
1206 string rare = getOutputFileName("fasta",variables);
1207 m->openOutputFile(rare, rout);
1208 outputNames.push_back(rare); outputTypes["fasta"].push_back(rare);
1210 variables["[tag2]"] = "abund";
1211 string abund = getOutputFileName("fasta",variables);
1212 m->openOutputFile(abund, aout);
1213 outputNames.push_back(abund); outputTypes["fasta"].push_back(abund);
1217 m->openInputFile(fastafile, in);
1220 if (m->control_pressed) { break; }
1222 Sequence seq(in); m->gobble(in);
1224 if (seq.getName() != "") {
1226 map<string, string>::iterator itNames;
1228 itNames = nameMap.find(seq.getName());
1230 if (itNames == nameMap.end()) {
1231 m->mothurOut(seq.getName() + " is not in your names or list file, ignoring."); m->mothurOutEndLine();
1233 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
1234 seq.printSequence(rout);
1235 }else{ //you are a abund name
1236 seq.printSequence(aout);
1245 }else{ //parse names by abundance and group
1246 string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
1249 map<string, ofstream*> filehandles;
1250 map<string, ofstream*>::iterator it3;
1252 for (int i=0; i<Groups.size(); i++) {
1253 temp = new ofstream;
1254 filehandles[Groups[i]+".rare"] = temp;
1255 temp2 = new ofstream;
1256 filehandles[Groups[i]+".abund"] = temp2;
1258 map<string, string> variables;
1259 variables["[filename]"] = fileroot;
1260 variables["[tag]"] = tag;
1261 variables["[tag2]"] = "rare";
1262 variables["[group]"] = Groups[i];
1263 m->openOutputFile(getOutputFileName("fasta",variables), *(filehandles[Groups[i]+".rare"]));
1264 variables["[tag2]"] = "abund";
1265 m->openOutputFile(getOutputFileName("fasta",variables), *(filehandles[Groups[i]+".abund"]));
1270 m->openInputFile(fastafile, in);
1273 if (m->control_pressed) { break; }
1275 Sequence seq(in); m->gobble(in);
1277 if (seq.getName() != "") {
1278 map<string, string>::iterator itNames = nameMap.find(seq.getName());
1280 if (itNames == nameMap.end()) {
1281 m->mothurOut(seq.getName() + " is not in your names or list file, ignoring."); m->mothurOutEndLine();
1283 vector<string> names;
1284 m->splitAtComma(itNames->second, names); //parses bin into individual sequence names
1287 if (rareNames.count(itNames->first) != 0) { //you are a rare name
1288 rareAbund = ".rare";
1289 }else{ //you are a abund name
1290 rareAbund = ".abund";
1293 if (countfile == "") {
1294 for (int i = 0; i < names.size(); i++) {
1295 string group = groupMap.getGroup(seq.getName());
1297 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1298 seq.printSequence(*(filehandles[group+rareAbund]));
1299 }else if(group == "not found") {
1300 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
1304 vector<string> thisSeqsGroups = ct.getGroups(names[0]); //we only need names[0], because there is no namefile
1305 for (int i = 0; i < thisSeqsGroups.size(); i++) {
1306 if (m->inUsersGroups(thisSeqsGroups[i], Groups)) { //only add if this is in a group we want
1307 seq.printSequence(*(filehandles[thisSeqsGroups[i]+rareAbund]));
1316 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1317 (*(filehandles[it3->first])).close();
1318 map<string, string> variables;
1319 variables["[filename]"] = fileroot;
1320 variables["[tag]"] = tag;
1321 variables["[tag2]"] = it3->first;
1322 outputNames.push_back(getOutputFileName("fasta",variables)); outputTypes["fasta"].push_back(getOutputFileName("fasta",variables));
1330 catch(exception& e) {
1331 m->errorOut(e, "SplitAbundCommand", "parseFasta");
1335 /**********************************************************************************************************************/