2 * splitabundcommand.cpp
5 * Created by westcott on 5/17/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "splitabundcommand.h"
11 #include "sharedutilities.h"
13 //**********************************************************************************************************************
14 vector<string> SplitAbundCommand::setParameters(){
16 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
17 CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none",false,false); parameters.push_back(pname);
18 CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none",false,false); parameters.push_back(pcount);
19 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none",false,false); parameters.push_back(pgroup);
20 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
21 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
22 CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "",false,true); parameters.push_back(pcutoff);
23 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
24 CommandParameter paccnos("accnos", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(paccnos);
25 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
26 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
28 vector<string> myArray;
29 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
33 m->errorOut(e, "SplitAbundCommand", "setParameters");
37 //**********************************************************************************************************************
38 string SplitAbundCommand::getHelpString(){
40 string helpString = "";
41 helpString += "The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n";
42 helpString += "The split.abund command parameters are fasta, list, name, count, cutoff, group, label, groups, cutoff and accnos.\n";
43 helpString += "The fasta and a list or name or count parameter are required, and you must provide a cutoff value.\n";
44 helpString += "The cutoff parameter is used to qualify what is abundant and rare.\n";
45 helpString += "The group parameter allows you to parse a group file into rare and abundant groups.\n";
46 helpString += "The label parameter is used to read specific labels in your listfile you want to use.\n";
47 helpString += "The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n";
48 helpString += "The groups parameter allows you to parse the files into rare and abundant files by group. \n";
49 helpString += "For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files. \n";
50 helpString += "If you want .abund and .rare files for all groups, set groups=all. \n";
51 helpString += "The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n";
52 helpString += "Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n";
53 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n";
57 m->errorOut(e, "SplitAbundCommand", "getHelpString");
61 //**********************************************************************************************************************
62 string SplitAbundCommand::getOutputFileNameTag(string type, string inputName=""){
64 string outputFileName = "";
65 map<string, vector<string> >::iterator it;
67 //is this a type this command creates
68 it = outputTypes.find(type);
69 if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
71 if (type == "fasta") { outputFileName = "fasta"; }
72 else if (type == "list") { outputFileName = "list"; }
73 else if (type == "name") { outputFileName = "names"; }
74 else if (type == "count") { outputFileName = "count_table"; }
75 else if (type == "group") { outputFileName = "groups"; }
76 else if (type == "accnos") { outputFileName = "accnos"; }
77 else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; }
79 return outputFileName;
82 m->errorOut(e, "SplitAbundCommand", "getOutputFileNameTag");
86 //**********************************************************************************************************************
87 SplitAbundCommand::SplitAbundCommand(){
89 abort = true; calledHelp = true;
91 vector<string> tempOutNames;
92 outputTypes["list"] = tempOutNames;
93 outputTypes["name"] = tempOutNames;
94 outputTypes["count"] = tempOutNames;
95 outputTypes["accnos"] = tempOutNames;
96 outputTypes["group"] = tempOutNames;
97 outputTypes["fasta"] = tempOutNames;
100 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
104 //**********************************************************************************************************************
105 SplitAbundCommand::SplitAbundCommand(string option) {
107 abort = false; calledHelp = false;
110 //allow user to run help
111 if(option == "help") { help(); abort = true; calledHelp = true; }
112 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
114 vector<string> myArray = setParameters();
116 OptionParser parser(option);
117 map<string, string> parameters = parser.getParameters();
119 ValidParameters validParameter;
120 map<string, string>::iterator it;
122 //check to make sure all parameters are valid for command
123 for (it = parameters.begin(); it != parameters.end(); it++) {
124 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
127 //initialize outputTypes
128 vector<string> tempOutNames;
129 outputTypes["list"] = tempOutNames;
130 outputTypes["name"] = tempOutNames;
131 outputTypes["accnos"] = tempOutNames;
132 outputTypes["group"] = tempOutNames;
133 outputTypes["fasta"] = tempOutNames;
134 outputTypes["count"] = tempOutNames;
136 //if the user changes the input directory command factory will send this info to us in the output parameter
137 string inputDir = validParameter.validFile(parameters, "inputdir", false);
138 if (inputDir == "not found"){ inputDir = ""; }
141 it = parameters.find("list");
142 //user has given a template file
143 if(it != parameters.end()){
144 path = m->hasPath(it->second);
145 //if the user has not given a path then, add inputdir. else leave path alone.
146 if (path == "") { parameters["list"] = inputDir + it->second; }
149 it = parameters.find("group");
150 //user has given a template file
151 if(it != parameters.end()){
152 path = m->hasPath(it->second);
153 //if the user has not given a path then, add inputdir. else leave path alone.
154 if (path == "") { parameters["group"] = inputDir + it->second; }
157 it = parameters.find("fasta");
158 //user has given a template file
159 if(it != parameters.end()){
160 path = m->hasPath(it->second);
161 //if the user has not given a path then, add inputdir. else leave path alone.
162 if (path == "") { parameters["fasta"] = inputDir + it->second; }
165 it = parameters.find("name");
166 //user has given a template file
167 if(it != parameters.end()){
168 path = m->hasPath(it->second);
169 //if the user has not given a path then, add inputdir. else leave path alone.
170 if (path == "") { parameters["name"] = inputDir + it->second; }
173 it = parameters.find("count");
174 //user has given a template file
175 if(it != parameters.end()){
176 path = m->hasPath(it->second);
177 //if the user has not given a path then, add inputdir. else leave path alone.
178 if (path == "") { parameters["count"] = inputDir + it->second; }
183 //if the user changes the output directory command factory will send this info to us in the output parameter
184 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
186 //check for required parameters
187 listfile = validParameter.validFile(parameters, "list", true);
188 if (listfile == "not open") { abort = true; }
189 else if (listfile == "not found") { listfile = ""; }
190 else{ inputFile = listfile; m->setListFile(listfile); }
192 namefile = validParameter.validFile(parameters, "name", true);
193 if (namefile == "not open") { abort = true; }
194 else if (namefile == "not found") { namefile = ""; }
195 else{ inputFile = namefile; m->setNameFile(namefile); }
197 fastafile = validParameter.validFile(parameters, "fasta", true);
198 if (fastafile == "not open") { abort = true; }
199 else if (fastafile == "not found") {
200 fastafile = m->getFastaFile();
201 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
202 else { m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
203 }else { m->setFastaFile(fastafile); }
205 groupfile = validParameter.validFile(parameters, "group", true);
206 if (groupfile == "not open") { groupfile = ""; abort = true; }
207 else if (groupfile == "not found") { groupfile = ""; }
209 int error = groupMap.readMap(groupfile);
210 if (error == 1) { abort = true; }
211 m->setGroupFile(groupfile);
214 countfile = validParameter.validFile(parameters, "count", true);
215 if (countfile == "not open") { countfile = ""; abort = true; }
216 else if (countfile == "not found") { countfile = ""; }
218 m->setCountTableFile(countfile);
219 ct.readTable(countfile);
222 if ((namefile != "") && (countfile != "")) {
223 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
226 if ((groupfile != "") && (countfile != "")) {
227 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
230 groups = validParameter.validFile(parameters, "groups", false);
231 if (groups == "not found") { groups = ""; }
232 else { m->splitAtDash(groups, Groups); }
234 if (((groupfile == "") && (countfile == ""))&& (groups != "")) { m->mothurOut("You cannot select groups without a valid group or count file, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; Groups.clear(); }
236 if (countfile != "") {
237 if (!ct.hasGroupInfo()) { m->mothurOut("You cannot pick groups without group info in your count file; I will disregard your groups selection."); m->mothurOutEndLine(); groups = ""; Groups.clear(); }
240 //do you have all files needed
241 if ((listfile == "") && (namefile == "") && (countfile == "")) {
242 namefile = m->getNameFile();
243 if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
245 listfile = m->getListFile();
246 if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
248 countfile = m->getCountTableFile();
249 if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
250 else { m->mothurOut("You have no current list, count or namefile and one is required."); m->mothurOutEndLine(); abort = true; }
255 //check for optional parameter and set defaults
256 // ...at some point should added some additional type checking...
257 label = validParameter.validFile(parameters, "label", false);
258 if (label == "not found") { label = ""; allLines = 1; }
260 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
261 else { allLines = 1; }
264 string temp = validParameter.validFile(parameters, "accnos", false); if (temp == "not found") { temp = "F"; }
265 accnos = m->isTrue(temp);
267 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0"; }
268 m->mothurConvert(temp, cutoff);
270 if (cutoff == 0) { m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
274 catch(exception& e) {
275 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
279 //**********************************************************************************************************************
280 SplitAbundCommand::~SplitAbundCommand(){}
281 //**********************************************************************************************************************
282 int SplitAbundCommand::execute(){
285 if (abort == true) { if (calledHelp) { return 0; } return 2; }
287 if (Groups.size() != 0) {
288 vector<string> allGroups;
289 if (countfile != "") { allGroups = ct.getNamesOfGroups(); }
290 else { allGroups = groupMap.getNamesOfGroups(); }
292 util.setGroups(Groups, allGroups);
295 if (listfile != "") { //you are using a listfile to determine abundance
296 if (outputDir == "") { outputDir = m->hasPath(listfile); }
298 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
299 set<string> processedLabels;
300 set<string> userLabels = labels;
302 InputData input(listfile, "list");
303 ListVector* list = input.getListVector();
304 string lastLabel = list->getLabel();
306 //do you have a namefile or do we need to similate one?
307 if (namefile != "") { readNamesFile(); }
308 else { createNameMap(list); }
310 if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
312 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
314 if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
316 if(allLines == 1 || labels.count(list->getLabel()) == 1){
318 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
321 processedLabels.insert(list->getLabel());
322 userLabels.erase(list->getLabel());
325 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
326 string saveLabel = list->getLabel();
329 list = input.getListVector(lastLabel); //get new list vector to process
331 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
334 processedLabels.insert(list->getLabel());
335 userLabels.erase(list->getLabel());
337 //restore real lastlabel to save below
338 list->setLabel(saveLabel);
342 lastLabel = list->getLabel();
345 list = input.getListVector(); //get new list vector to process
348 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
350 //output error messages about any remaining user labels
351 set<string>::iterator it;
352 bool needToRun = false;
353 for (it = userLabels.begin(); it != userLabels.end(); it++) {
354 m->mothurOut("Your file does not include the label " + *it);
355 if (processedLabels.count(lastLabel) != 1) {
356 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
359 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
364 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
366 //run last label if you need to
367 if (needToRun == true) {
368 if (list != NULL) { delete list; }
369 list = input.getListVector(lastLabel); //get new list vector to process
371 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
377 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
379 }else if (namefile != "") { //you are using the namefile to determine abundance
380 if (outputDir == "") { outputDir = m->hasPath(namefile); }
386 if (groupfile != "") { parseGroup(tag); }
387 if (accnos) { writeAccnos(tag); }
388 if (fastafile != "") { parseFasta(tag); }
394 if (accnos) { writeAccnos(tag); }
395 if (fastafile != "") { parseFasta(tag); }
398 //set fasta file as new current fastafile
400 itTypes = outputTypes.find("fasta");
401 if (itTypes != outputTypes.end()) {
402 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
405 itTypes = outputTypes.find("name");
406 if (itTypes != outputTypes.end()) {
407 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
410 itTypes = outputTypes.find("group");
411 if (itTypes != outputTypes.end()) {
412 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
415 itTypes = outputTypes.find("list");
416 if (itTypes != outputTypes.end()) {
417 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
420 itTypes = outputTypes.find("accnos");
421 if (itTypes != outputTypes.end()) {
422 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
425 itTypes = outputTypes.find("count");
426 if (itTypes != outputTypes.end()) {
427 if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
430 m->mothurOutEndLine();
431 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
432 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
433 m->mothurOutEndLine();
437 catch(exception& e) {
438 m->errorOut(e, "SplitAbundCommand", "execute");
442 /**********************************************************************************************************************/
443 int SplitAbundCommand::splitList(ListVector* thisList) {
448 //get rareNames and abundNames
450 for (int i = 0; i < thisList->getNumBins(); i++) {
451 if (m->control_pressed) { return 0; }
453 string bin = thisList->get(i);
455 vector<string> names;
456 m->splitAtComma(bin, names); //parses bin into individual sequence names
457 int size = names.size();
459 //if countfile is not blank we assume the list file is unique, otherwise we assume it includes all seqs
460 if (countfile != "") {
462 for (int j = 0; j < names.size(); j++) { size += ct.getNumSeqs(names[j]); }
465 if (size <= cutoff) {
467 for (int j = 0; j < names.size(); j++) { rareNames.insert(names[j]); }
469 for (int j = 0; j < names.size(); j++) { abundNames.insert(names[j]); }
474 string tag = thisList->getLabel() + ".";
476 writeList(thisList, tag, numRareBins);
478 if (groupfile != "") { parseGroup(tag); }
479 if (accnos) { writeAccnos(tag); }
480 if (fastafile != "") { parseFasta(tag); }
481 if (countfile != "") { parseCount(tag); }
486 catch(exception& e) {
487 m->errorOut(e, "SplitAbundCommand", "splitList");
491 /**********************************************************************************************************************/
492 int SplitAbundCommand::writeList(ListVector* thisList, string tag, int numRareBins) {
495 map<string, ofstream*> filehandles;
497 if (Groups.size() == 0) {
498 int numAbundBins = thisList->getNumBins() - numRareBins;
503 string rare = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "rare." + getOutputFileNameTag("list");
504 m->openOutputFile(rare, rout);
505 outputNames.push_back(rare); outputTypes["list"].push_back(rare);
507 string abund = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "abund." + getOutputFileNameTag("list");
508 m->openOutputFile(abund, aout);
509 outputNames.push_back(abund); outputTypes["list"].push_back(abund);
511 if (rareNames.size() != 0) { rout << thisList->getLabel() << '\t' << numRareBins << '\t'; }
512 if (abundNames.size() != 0) { aout << thisList->getLabel() << '\t' << numAbundBins << '\t'; }
514 for (int i = 0; i < thisList->getNumBins(); i++) {
515 if (m->control_pressed) { break; }
517 string bin = thisList->get(i);
518 vector<string> names;
519 m->splitAtComma(bin, names);
521 int size = names.size();
522 if (countfile != "") {
524 for (int j = 0; j < names.size(); j++) { size += ct.getNumSeqs(names[j]); }
527 if (size <= cutoff) { rout << bin << '\t'; }
528 else { aout << bin << '\t'; }
531 if (rareNames.size() != 0) { rout << endl; }
532 if (abundNames.size() != 0) { aout << endl; }
537 }else{ //parse names by abundance and group
538 string fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
541 //map<string, bool> wroteFile;
542 map<string, ofstream*> filehandles;
543 map<string, ofstream*>::iterator it3;
545 for (int i=0; i<Groups.size(); i++) {
547 filehandles[Groups[i]+".rare"] = temp;
548 temp2 = new ofstream;
549 filehandles[Groups[i]+".abund"] = temp2;
551 string rareGroupFileName = fileroot + Groups[i] +"."+ tag + "rare." + getOutputFileNameTag("list");
552 string abundGroupFileName = fileroot + Groups[i] +"."+ tag + "abund." + getOutputFileNameTag("list");
553 m->openOutputFile(rareGroupFileName, *(filehandles[Groups[i]+".rare"]));
554 m->openOutputFile(abundGroupFileName, *(filehandles[Groups[i]+".abund"]));
555 outputNames.push_back(rareGroupFileName); outputTypes["list"].push_back(rareGroupFileName);
556 outputNames.push_back(abundGroupFileName); outputTypes["list"].push_back(abundGroupFileName);
559 map<string, string> groupVector;
560 map<string, string>::iterator itGroup;
561 map<string, int> groupNumBins;
563 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
564 groupNumBins[it3->first] = 0;
565 groupVector[it3->first] = "";
568 for (int i = 0; i < thisList->getNumBins(); i++) {
569 if (m->control_pressed) { break; }
571 map<string, string> groupBins;
572 string bin = thisList->get(i);
574 vector<string> names;
575 m->splitAtComma(bin, names); //parses bin into individual sequence names
577 //parse bin into list of sequences in each group
578 for (int j = 0; j < names.size(); j++) {
580 if (rareNames.count(names[j]) != 0) { //you are a rare name
582 }else{ //you are a abund name
583 rareAbund = ".abund";
586 if (countfile == "") {
587 string group = groupMap.getGroup(names[j]);
589 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
590 itGroup = groupBins.find(group+rareAbund);
591 if(itGroup == groupBins.end()) {
592 groupBins[group+rareAbund] = names[j]; //add first name
593 groupNumBins[group+rareAbund]++;
594 }else{ //add another name
595 groupBins[group+rareAbund] += "," + names[j];
597 }else if(group == "not found") {
598 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
601 vector<string> thisSeqsGroups = ct.getGroups(names[j]);
602 for (int k = 0; k < thisSeqsGroups.size(); k++) {
603 if (m->inUsersGroups(thisSeqsGroups[k], Groups)) { //only add if this is in a group we want
604 itGroup = groupBins.find(thisSeqsGroups[k]+rareAbund);
605 if(itGroup == groupBins.end()) {
606 groupBins[thisSeqsGroups[k]+rareAbund] = names[j]; //add first name
607 groupNumBins[thisSeqsGroups[k]+rareAbund]++;
608 }else{ //add another name
609 groupBins[thisSeqsGroups[k]+rareAbund] += "," + names[j];
617 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
618 groupVector[itGroup->first] += itGroup->second + '\t';
623 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
624 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl; // label numBins listvector for that group
625 (*(filehandles[it3->first])).close();
633 catch(exception& e) {
634 m->errorOut(e, "SplitAbundCommand", "writeList");
638 /**********************************************************************************************************************/
639 int SplitAbundCommand::splitCount() { //countfile
644 vector<string> allNames = ct.getNamesOfSeqs();
645 for (int i = 0; i < allNames.size(); i++) {
647 if (m->control_pressed) { return 0; }
649 int size = ct.getNumSeqs(allNames[i]);
650 nameMap[allNames[i]] = allNames[i];
652 if (size <= cutoff) {
653 rareNames.insert(allNames[i]);
655 abundNames.insert(allNames[i]);
659 //write out split count files
664 catch(exception& e) {
665 m->errorOut(e, "SplitAbundCommand", "splitCount");
669 /**********************************************************************************************************************/
670 int SplitAbundCommand::splitNames() { //namefile
678 m->openInputFile(namefile, in);
681 if (m->control_pressed) { break; }
683 string firstCol, secondCol;
684 in >> firstCol >> secondCol; m->gobble(in);
686 nameMap[firstCol] = secondCol;
688 int size = m->getNumNames(secondCol);
690 if (size <= cutoff) {
691 rareNames.insert(firstCol);
693 abundNames.insert(firstCol);
701 catch(exception& e) {
702 m->errorOut(e, "SplitAbundCommand", "splitNames");
706 /**********************************************************************************************************************/
707 int SplitAbundCommand::readNamesFile() {
711 m->openInputFile(namefile, in);
714 if (m->control_pressed) { break; }
716 string firstCol, secondCol;
717 in >> firstCol >> secondCol; m->gobble(in);
719 nameMap[firstCol] = secondCol;
726 catch(exception& e) {
727 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
731 /**********************************************************************************************************************/
732 int SplitAbundCommand::createNameMap(ListVector* thisList) {
735 if (thisList != NULL) {
736 for (int i = 0; i < thisList->getNumBins(); i++) {
737 if (m->control_pressed) { return 0; }
739 string bin = thisList->get(i);
741 vector<string> names;
742 m->splitAtComma(bin, names); //parses bin into individual sequence names
744 for (int j = 0; j < names.size(); j++) { nameMap[names[j]] = names[j]; }
750 catch(exception& e) {
751 m->errorOut(e, "SplitAbundCommand", "createNameMap");
755 /**********************************************************************************************************************/
756 int SplitAbundCommand::parseCount(string tag) { //namefile
759 map<string, ofstream*> filehandles;
761 if (Groups.size() == 0) {
762 string rare = outputDir + m->getRootName(m->getSimpleName(countfile)) + tag + "rare." + getOutputFileNameTag("count");
763 outputNames.push_back(rare); outputTypes["count"].push_back(rare);
765 string abund = outputDir + m->getRootName(m->getSimpleName(countfile)) + tag + "abund." + getOutputFileNameTag("count");
766 outputNames.push_back(abund); outputTypes["count"].push_back(abund);
768 CountTable rareTable;
769 CountTable abundTable;
770 if (ct.hasGroupInfo()) {
771 vector<string> ctGroups = ct.getNamesOfGroups();
772 for (int i = 0; i < ctGroups.size(); i++) { rareTable.addGroup(ctGroups[i]); abundTable.addGroup(ctGroups[i]); }
775 if (rareNames.size() != 0) {
776 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
777 if (ct.hasGroupInfo()) {
778 vector<int> groupCounts = ct.getGroupCounts(*itRare);
779 rareTable.push_back(*itRare, groupCounts);
781 int groupCounts = ct.getNumSeqs(*itRare);
782 rareTable.push_back(*itRare, groupCounts);
785 if (rareTable.hasGroupInfo()) {
786 vector<string> ctGroups = rareTable.getNamesOfGroups();
787 for (int i = 0; i < ctGroups.size(); i++) {
788 if (rareTable.getGroupCount(ctGroups[i]) == 0) { rareTable.removeGroup(ctGroups[i]); }
791 rareTable.printTable(rare);
795 if (abundNames.size() != 0) {
796 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
797 if (ct.hasGroupInfo()) {
798 vector<int> groupCounts = ct.getGroupCounts(*itAbund);
799 abundTable.push_back(*itAbund, groupCounts);
801 int groupCounts = ct.getNumSeqs(*itAbund);
802 abundTable.push_back(*itAbund, groupCounts);
805 if (abundTable.hasGroupInfo()) {
806 vector<string> ctGroups = abundTable.getNamesOfGroups();
807 for (int i = 0; i < ctGroups.size(); i++) {
808 if (abundTable.getGroupCount(ctGroups[i]) == 0) { abundTable.removeGroup(ctGroups[i]); }
811 abundTable.printTable(abund);
814 }else{ //parse names by abundance and group
815 map<string, CountTable*> countTableMap;
816 map<string, CountTable*>::iterator it3;
818 for (int i=0; i<Groups.size(); i++) {
819 CountTable* rareCt = new CountTable();
820 rareCt->addGroup(Groups[i]);
821 countTableMap[Groups[i]+".rare"] = rareCt;
822 CountTable* abundCt = new CountTable();
823 abundCt->addGroup(Groups[i]);
824 countTableMap[Groups[i]+".abund"] = abundCt;
827 vector<string> allNames = ct.getNamesOfSeqs();
828 for (int i = 0; i < allNames.size(); i++) {
830 if (rareNames.count(allNames[i]) != 0) { //you are a rare name
832 }else{ //you are a abund name
833 rareAbund = ".abund";
836 vector<string> thisSeqsGroups = ct.getGroups(allNames[i]);
837 for (int j = 0; j < thisSeqsGroups.size(); j++) {
838 if (m->inUsersGroups(thisSeqsGroups[j], Groups)) { //only add if this is in a group we want
839 int num = ct.getGroupCount(allNames[i], thisSeqsGroups[j]);
840 vector<int> nums; nums.push_back(num);
841 countTableMap[thisSeqsGroups[j]+rareAbund]->push_back(allNames[i], nums);
847 for (it3 = countTableMap.begin(); it3 != countTableMap.end(); it3++) {
848 string fileroot = outputDir + m->getRootName(m->getSimpleName(countfile));
849 string filename = fileroot + it3->first + "." + getOutputFileNameTag("count");
850 outputNames.push_back(filename); outputTypes["count"].push_back(filename);
851 (it3->second)->printTable(filename);
859 catch(exception& e) {
860 m->errorOut(e, "SplitAbundCommand", "parseCount");
864 /**********************************************************************************************************************/
865 int SplitAbundCommand::writeNames() { //namefile
868 map<string, ofstream*> filehandles;
870 if (Groups.size() == 0) {
874 string rare = outputDir + m->getRootName(m->getSimpleName(namefile)) + "rare." + getOutputFileNameTag("name");
875 m->openOutputFile(rare, rout);
876 outputNames.push_back(rare); outputTypes["name"].push_back(rare);
878 string abund = outputDir + m->getRootName(m->getSimpleName(namefile)) + "abund." + getOutputFileNameTag("name");
879 m->openOutputFile(abund, aout);
880 outputNames.push_back(abund); outputTypes["name"].push_back(abund);
882 if (rareNames.size() != 0) {
883 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
884 rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
889 if (abundNames.size() != 0) {
890 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
891 aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
896 }else{ //parse names by abundance and group
897 string fileroot = outputDir + m->getRootName(m->getSimpleName(namefile));
900 map<string, ofstream*> filehandles;
901 map<string, ofstream*>::iterator it3;
903 for (int i=0; i<Groups.size(); i++) {
905 filehandles[Groups[i]+".rare"] = temp;
906 temp2 = new ofstream;
907 filehandles[Groups[i]+".abund"] = temp2;
909 string rareGroupFileName = fileroot + Groups[i] + ".rare." + getOutputFileNameTag("name");
910 string abundGroupFileName = fileroot + Groups[i] + ".abund." + getOutputFileNameTag("name");
911 m->openOutputFile(rareGroupFileName, *(filehandles[Groups[i]+".rare"]));
912 m->openOutputFile(abundGroupFileName, *(filehandles[Groups[i]+".abund"]));
915 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
916 vector<string> names;
917 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
920 if (rareNames.count(itName->first) != 0) { //you are a rare name
922 }else{ //you are a abund name
923 rareAbund = ".abund";
926 map<string, string> outputStrings;
927 map<string, string>::iterator itout;
928 for (int i = 0; i < names.size(); i++) {
930 string group = groupMap.getGroup(names[i]);
932 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
933 itout = outputStrings.find(group+rareAbund);
934 if (itout == outputStrings.end()) {
935 outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
936 }else { outputStrings[group+rareAbund] += "," + names[i]; }
937 }else if(group == "not found") {
938 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
942 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { *(filehandles[itout->first]) << itout->second << endl; }
946 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
947 (*(filehandles[it3->first])).close();
948 outputNames.push_back(fileroot + it3->first + "." + getOutputFileNameTag("name")); outputTypes["name"].push_back(fileroot + it3->first + "." + getOutputFileNameTag("name"));
956 catch(exception& e) {
957 m->errorOut(e, "SplitAbundCommand", "writeNames");
961 /**********************************************************************************************************************/
962 //just write the unique names - if a namesfile is given
963 int SplitAbundCommand::writeAccnos(string tag) {
966 map<string, ofstream*> filehandles;
968 if (Groups.size() == 0) {
973 string rare = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "rare." + getOutputFileNameTag("accnos");
974 m->openOutputFile(rare, rout);
975 outputNames.push_back(rare); outputTypes["accnos"].push_back(rare);
977 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
978 rout << (*itRare) << endl;
982 string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag + "abund." + getOutputFileNameTag("accnos");
983 m->openOutputFile(abund, aout);
984 outputNames.push_back(abund); outputTypes["accnos"].push_back(abund);
986 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
987 aout << (*itAbund) << endl;
991 }else{ //parse names by abundance and group
992 string fileroot = outputDir + m->getRootName(m->getSimpleName(inputFile));
995 map<string, ofstream*> filehandles;
996 map<string, ofstream*>::iterator it3;
998 for (int i=0; i<Groups.size(); i++) {
1000 filehandles[Groups[i]+".rare"] = temp;
1001 temp2 = new ofstream;
1002 filehandles[Groups[i]+".abund"] = temp2;
1004 m->openOutputFile(fileroot + tag + Groups[i] + ".rare." + getOutputFileNameTag("accnos"), *(filehandles[Groups[i]+".rare"]));
1005 m->openOutputFile(fileroot + tag + Groups[i] + ".abund." + getOutputFileNameTag("accnos"), *(filehandles[Groups[i]+".abund"]));
1009 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
1010 string group = groupMap.getGroup(*itRare);
1012 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1013 *(filehandles[group+".rare"]) << *itRare << endl;
1018 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
1019 string group = groupMap.getGroup(*itAbund);
1021 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1022 *(filehandles[group+".abund"]) << *itAbund << endl;
1027 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1028 (*(filehandles[it3->first])).close();
1029 outputNames.push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("accnos")); outputTypes["accnos"].push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("accnos"));
1037 catch(exception& e) {
1038 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
1042 /**********************************************************************************************************************/
1043 int SplitAbundCommand::parseGroup(string tag) { //namefile
1046 map<string, ofstream*> filehandles;
1048 if (Groups.size() == 0) {
1052 string rare = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "rare." + getOutputFileNameTag("group");
1053 m->openOutputFile(rare, rout);
1054 outputNames.push_back(rare); outputTypes["group"].push_back(rare);
1056 string abund = outputDir + m->getRootName(m->getSimpleName(groupfile)) + tag + "abund." + getOutputFileNameTag("group");
1058 m->openOutputFile(abund, aout);
1059 outputNames.push_back(abund); outputTypes["group"].push_back(abund);
1061 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
1062 vector<string> names;
1063 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
1065 for (int i = 0; i < names.size(); i++) {
1067 string group = groupMap.getGroup(names[i]);
1069 if (group == "not found") {
1070 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
1072 if (rareNames.count(itName->first) != 0) { //you are a rare name
1073 rout << names[i] << '\t' << group << endl;
1074 }else{ //you are a abund name
1075 aout << names[i] << '\t' << group << endl;
1084 }else{ //parse names by abundance and group
1085 string fileroot = outputDir + m->getRootName(m->getSimpleName(groupfile));
1088 map<string, ofstream*> filehandles;
1089 map<string, ofstream*>::iterator it3;
1091 for (int i=0; i<Groups.size(); i++) {
1092 temp = new ofstream;
1093 filehandles[Groups[i]+".rare"] = temp;
1094 temp2 = new ofstream;
1095 filehandles[Groups[i]+".abund"] = temp2;
1097 m->openOutputFile(fileroot + tag + Groups[i] + ".rare." + getOutputFileNameTag("group"), *(filehandles[Groups[i]+".rare"]));
1098 m->openOutputFile(fileroot + tag + Groups[i] + ".abund." + getOutputFileNameTag("group"), *(filehandles[Groups[i]+".abund"]));
1101 for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {
1102 vector<string> names;
1103 m->splitAtComma(itName->second, names); //parses bin into individual sequence names
1106 if (rareNames.count(itName->first) != 0) { //you are a rare name
1107 rareAbund = ".rare";
1108 }else{ //you are a abund name
1109 rareAbund = ".abund";
1112 for (int i = 0; i < names.size(); i++) {
1114 string group = groupMap.getGroup(names[i]);
1116 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1117 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
1122 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1123 (*(filehandles[it3->first])).close();
1124 outputNames.push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("group")); outputTypes["group"].push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("group"));
1132 catch(exception& e) {
1133 m->errorOut(e, "SplitAbundCommand", "parseGroups");
1137 /**********************************************************************************************************************/
1138 int SplitAbundCommand::parseFasta(string tag) { //namefile
1141 map<string, ofstream*> filehandles;
1143 if (Groups.size() == 0) {
1147 string rare = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "rare." + getOutputFileNameTag("fasta");
1148 m->openOutputFile(rare, rout);
1149 outputNames.push_back(rare); outputTypes["fasta"].push_back(rare);
1151 string abund = outputDir + m->getRootName(m->getSimpleName(fastafile)) + tag + "abund." + getOutputFileNameTag("fasta");
1152 m->openOutputFile(abund, aout);
1153 outputNames.push_back(abund); outputTypes["fasta"].push_back(abund);
1157 m->openInputFile(fastafile, in);
1160 if (m->control_pressed) { break; }
1162 Sequence seq(in); m->gobble(in);
1164 if (seq.getName() != "") {
1166 map<string, string>::iterator itNames;
1168 itNames = nameMap.find(seq.getName());
1170 if (itNames == nameMap.end()) {
1171 m->mothurOut(seq.getName() + " is not in your names or list file, ignoring."); m->mothurOutEndLine();
1173 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
1174 seq.printSequence(rout);
1175 }else{ //you are a abund name
1176 seq.printSequence(aout);
1185 }else{ //parse names by abundance and group
1186 string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile));
1189 map<string, ofstream*> filehandles;
1190 map<string, ofstream*>::iterator it3;
1192 for (int i=0; i<Groups.size(); i++) {
1193 temp = new ofstream;
1194 filehandles[Groups[i]+".rare"] = temp;
1195 temp2 = new ofstream;
1196 filehandles[Groups[i]+".abund"] = temp2;
1198 m->openOutputFile(fileroot + tag + Groups[i] + ".rare." + getOutputFileNameTag("fasta"), *(filehandles[Groups[i]+".rare"]));
1199 m->openOutputFile(fileroot + tag + Groups[i] + ".abund." + getOutputFileNameTag("fasta"), *(filehandles[Groups[i]+".abund"]));
1204 m->openInputFile(fastafile, in);
1207 if (m->control_pressed) { break; }
1209 Sequence seq(in); m->gobble(in);
1211 if (seq.getName() != "") {
1212 map<string, string>::iterator itNames = nameMap.find(seq.getName());
1214 if (itNames == nameMap.end()) {
1215 m->mothurOut(seq.getName() + " is not in your names or list file, ignoring."); m->mothurOutEndLine();
1217 vector<string> names;
1218 m->splitAtComma(itNames->second, names); //parses bin into individual sequence names
1221 if (rareNames.count(itNames->first) != 0) { //you are a rare name
1222 rareAbund = ".rare";
1223 }else{ //you are a abund name
1224 rareAbund = ".abund";
1227 if (countfile == "") {
1228 for (int i = 0; i < names.size(); i++) {
1229 string group = groupMap.getGroup(seq.getName());
1231 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1232 seq.printSequence(*(filehandles[group+rareAbund]));
1233 }else if(group == "not found") {
1234 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
1238 vector<string> thisSeqsGroups = ct.getGroups(names[0]); //we only need names[0], because there is no namefile
1239 for (int i = 0; i < thisSeqsGroups.size(); i++) {
1240 if (m->inUsersGroups(thisSeqsGroups[i], Groups)) { //only add if this is in a group we want
1241 seq.printSequence(*(filehandles[thisSeqsGroups[i]+rareAbund]));
1250 for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
1251 (*(filehandles[it3->first])).close();
1252 outputNames.push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("fasta")); outputTypes["fasta"].push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("fasta"));
1260 catch(exception& e) {
1261 m->errorOut(e, "SplitAbundCommand", "parseFasta");
1265 /**********************************************************************************************************************/