2 * splitabundcommand.cpp
5 * Created by westcott on 5/17/10.
6 * Copyright 2010 Schloss Lab. All rights reserved.
10 #include "splitabundcommand.h"
12 //**********************************************************************************************************************
13 SplitAbundCommand::SplitAbundCommand(string option) {
16 wroteRareList = false;
17 wroteAbundList = false;
20 //allow user to run help
21 if(option == "help") { help(); abort = true; }
24 //valid paramters for this command
25 string Array[] = {"list","name","group","label","accnos","cutoff","outputdir","inputdir"};
26 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
28 OptionParser parser(option);
29 map<string, string> parameters = parser.getParameters();
31 ValidParameters validParameter;
32 map<string, string>::iterator it;
34 //check to make sure all parameters are valid for command
35 for (it = parameters.begin(); it != parameters.end(); it++) {
36 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
39 //if the user changes the input directory command factory will send this info to us in the output parameter
40 string inputDir = validParameter.validFile(parameters, "inputdir", false);
41 if (inputDir == "not found"){ inputDir = ""; }
44 it = parameters.find("list");
45 //user has given a template file
46 if(it != parameters.end()){
47 path = hasPath(it->second);
48 //if the user has not given a path then, add inputdir. else leave path alone.
49 if (path == "") { parameters["list"] = inputDir + it->second; }
52 it = parameters.find("group");
53 //user has given a template file
54 if(it != parameters.end()){
55 path = hasPath(it->second);
56 //if the user has not given a path then, add inputdir. else leave path alone.
57 if (path == "") { parameters["group"] = inputDir + it->second; }
60 it = parameters.find("name");
61 //user has given a template file
62 if(it != parameters.end()){
63 path = hasPath(it->second);
64 //if the user has not given a path then, add inputdir. else leave path alone.
65 if (path == "") { parameters["name"] = inputDir + it->second; }
71 //if the user changes the output directory command factory will send this info to us in the output parameter
72 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = ""; }
74 //check for required parameters
75 listfile = validParameter.validFile(parameters, "list", true);
76 if (listfile == "not open") { abort = true; }
77 else if (listfile == "not found") { listfile = ""; }
79 //check for required parameters
80 namefile = validParameter.validFile(parameters, "name", true);
81 if (namefile == "not open") { abort = true; }
82 else if (namefile == "not found") { namefile = ""; }
84 groupfile = validParameter.validFile(parameters, "group", true);
85 if (groupfile == "not open") { abort = true; }
86 else if (groupfile == "not found") { groupfile = ""; }
88 groupMap = new GroupMap(groupfile);
90 int error = groupMap->readMap();
91 if (error == 1) { abort = true; }
94 //do you have all files needed
95 if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
96 if ((listfile != "") && (namefile != "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command, but NOT BOTH. "); m->mothurOutEndLine(); abort = true; }
98 //check for optional parameter and set defaults
99 // ...at some point should added some additional type checking...
100 label = validParameter.validFile(parameters, "label", false);
101 if (label == "not found") { label = ""; allLines = 1; }
103 if(label != "all") { splitAtDash(label, labels); allLines = 0; }
104 else { allLines = 1; }
107 string temp = validParameter.validFile(parameters, "accnos", false); if (temp == "not found") { temp = "F"; }
108 accnos = isTrue(temp);
110 temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found") { temp = "0"; }
111 convert(temp, cutoff);
113 if (cutoff == 0) { m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true; }
118 catch(exception& e) {
119 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
123 //**********************************************************************************************************************
124 void SplitAbundCommand::help(){
126 m->mothurOut("The split.abund command reads a list or a names file splits the sequences into rare and abundant groups.. \n");
127 m->mothurOut("The split.abund command parameters are list, name, cutoff, group, label and accnos.\n");
128 m->mothurOut("The list or name parameter is required, and you must provide a cutoff value.\n");
129 m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n");
130 m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n");
131 m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n");
132 m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n");
133 m->mothurOut("The split.abund command should be used in the following format: split.abund(list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n");
134 m->mothurOut("Example: split.abundt(list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n");
135 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
138 catch(exception& e) {
139 m->errorOut(e, "SplitAbundCommand", "help");
143 //**********************************************************************************************************************
144 SplitAbundCommand::~SplitAbundCommand(){
145 if (groupfile != "") { delete groupMap; }
147 //**********************************************************************************************************************
148 int SplitAbundCommand::execute(){
151 if (abort == true) { return 0; }
153 if (namefile != "") { split(); }
156 //remove old files so you can append later....
157 string fileroot = outputDir + getRootName(getSimpleName(listfile));
158 remove((fileroot + "rare.list").c_str());
159 remove((fileroot + "abund.list").c_str());
161 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
162 set<string> processedLabels;
163 set<string> userLabels = labels;
165 input = new InputData(listfile, "list");
166 list = input->getListVector();
167 string lastLabel = list->getLabel();
169 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
171 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
173 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
175 if(allLines == 1 || labels.count(list->getLabel()) == 1){
177 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
180 processedLabels.insert(list->getLabel());
181 userLabels.erase(list->getLabel());
184 if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
185 string saveLabel = list->getLabel();
188 list = input->getListVector(lastLabel); //get new list vector to process
190 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
193 processedLabels.insert(list->getLabel());
194 userLabels.erase(list->getLabel());
196 //restore real lastlabel to save below
197 list->setLabel(saveLabel);
201 lastLabel = list->getLabel();
204 list = input->getListVector(); //get new list vector to process
207 if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
209 //output error messages about any remaining user labels
210 set<string>::iterator it;
211 bool needToRun = false;
212 for (it = userLabels.begin(); it != userLabels.end(); it++) {
213 m->mothurOut("Your file does not include the label " + *it);
214 if (processedLabels.count(lastLabel) != 1) {
215 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
218 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
223 if (m->control_pressed) { delete input; for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
225 //run last label if you need to
226 if (needToRun == true) {
227 if (list != NULL) { delete list; }
228 list = input->getListVector(lastLabel); //get new list vector to process
230 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
238 if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
240 if (wroteAbundList) { outputNames.push_back(fileroot + "abund.list"); }
241 if (wroteRareList) { outputNames.push_back(fileroot + "rare.list"); }
245 m->mothurOutEndLine();
246 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
247 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
248 m->mothurOutEndLine();
252 catch(exception& e) {
253 m->errorOut(e, "SplitAbundCommand", "execute");
257 /**********************************************************************************************************************/
258 int SplitAbundCommand::split(ListVector* thisList) {
261 SAbundVector* sabund = new SAbundVector();
262 *sabund = thisList->getSAbundVector();
264 //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
265 // and don't have to store the bins until you are done with the whole vector, this save alot of space.
267 for (int i = 0; i <= sabund->getMaxRank(); i++) {
268 if (i > cutoff) { break; }
269 numRareBins += sabund->get(i);
271 int numAbundBins = thisList->getNumBins() - numRareBins;
275 ofstream outListAbund;
276 ofstream outListRare;
277 ofstream outGroupRare;
278 ofstream outGroupAbund;
279 ofstream outAccnosRare;
280 ofstream outAccnosAbund;
282 string fileroot = outputDir + getRootName(getSimpleName(listfile));
283 if (numRareBins > 0) {
284 wroteRareList = true;
285 string listRareName = fileroot + "rare.list";
286 openOutputFileAppend(listRareName, outListRare);
287 outListRare << thisList->getLabel() << '\t' << numRareBins << '\t';
290 string accnosName = fileroot + thisList->getLabel() + ".rare.accnos";
291 openOutputFile(accnosName, outAccnosRare);
292 outputNames.push_back(accnosName);
295 if (groupfile != "") {
296 string groupFileName = outputDir + getRootName(getSimpleName(groupfile)) + thisList->getLabel() + ".rare.group";
297 openOutputFile(groupFileName, outGroupRare);
298 outputNames.push_back(groupFileName);
302 if (numAbundBins > 0) {
303 wroteAbundList = true;
304 string listAbundName = fileroot + "abund.list";
305 openOutputFileAppend(listAbundName, outListAbund);
306 outListAbund << thisList->getLabel() << '\t' << numAbundBins << '\t';
309 string accnosName = fileroot + thisList->getLabel() + ".abund.accnos";
310 openOutputFile(accnosName, outAccnosAbund);
311 outputNames.push_back(accnosName);
314 if (groupfile != "") {
315 string groupFileName = outputDir + getRootName(getSimpleName(groupfile)) + thisList->getLabel() + ".abund.group";
316 openOutputFile(groupFileName, outGroupAbund);
317 outputNames.push_back(groupFileName);
321 for (int i = 0; i < thisList->getNumBins(); i++) {
322 if (m->control_pressed) { break; }
324 string bin = list->get(i);
326 int size = getNumNames(bin);
328 if (size <= cutoff) { outListRare << bin << '\t'; }
329 else { outListAbund << bin << '\t'; }
331 if ((groupfile != "") || (accnos)) { //you need to parse the bin...
332 vector<string> names;
333 splitAtComma(bin, names); //parses bin into individual sequence names
335 //parse bin into list of sequences in each group
336 for (int j = 0; j < names.size(); j++) {
338 //write to accnos file
340 if (size <= cutoff) { outAccnosRare << names[j] << endl; }
341 else { outAccnosAbund << names[j] << endl; }
345 if (groupfile != "") {
346 string group = groupMap->getGroup(names[j]);
348 if (group == "not found") { //error in groupfile so close and remove output file and disregard groupfile
349 m->mothurOut(names[j] + " is not in your groupfile. disregarding groupfile."); m->mothurOutEndLine();
351 if (numAbundBins > 0) {
352 outGroupAbund.close();
353 remove((outputDir + getRootName(getSimpleName(groupfile)) + thisList->getLabel() + ".abund.group").c_str());
355 if (numRareBins > 0) {
356 outGroupRare.close();
357 remove((outputDir + getRootName(getSimpleName(groupfile)) + thisList->getLabel() + ".rare.group").c_str());
361 if (size <= cutoff) { outGroupRare << names[j] << '\t' << group << endl; }
362 else { outGroupAbund << names[j] << '\t' << group << endl; }
372 if (numRareBins > 0) {
375 if (accnos) { outAccnosRare.close(); }
376 if (groupfile != "") { outGroupRare.close(); }
379 if (numAbundBins > 0) {
380 outListAbund << endl;
381 outListAbund.close();
382 if (accnos) { outAccnosAbund.close(); }
383 if (groupfile != "") { outGroupAbund.close(); }
389 catch(exception& e) {
390 m->errorOut(e, "SplitAbundCommand", "split");
394 /**********************************************************************************************************************/
395 int SplitAbundCommand::split() { //namefile
398 ofstream outNameAbund;
399 ofstream outNameRare;
400 ofstream outGroupRare;
401 ofstream outGroupAbund;
402 ofstream outAccnosRare;
403 ofstream outAccnosAbund;
405 bool wroteNameAbund = false;
406 bool wroteNameRare = false;
407 bool wroteGroupRare = false;
408 bool wroteGroupAbund = false;
409 bool wroteAccnosRare = false;
410 bool wroteAccnosAbund = false;
412 //prepare output files
413 string fileroot = outputDir + getRootName(getSimpleName(namefile));
415 string nameRareName = fileroot + "rare.names";
416 openOutputFile(nameRareName, outNameRare);
417 string nameAbundName = fileroot + "abund.names";
418 openOutputFile(nameAbundName, outNameAbund);
421 string accnosName = fileroot + "rare.accnos";
422 openOutputFile(accnosName, outAccnosRare);
424 accnosName = fileroot + "abund.accnos";
425 openOutputFile(accnosName, outAccnosAbund);
428 if (groupfile != "") {
429 string groupFileName = outputDir + getRootName(getSimpleName(groupfile)) + ".rare.group";
430 openOutputFile(groupFileName, outGroupRare);
432 groupFileName = outputDir + getRootName(getSimpleName(groupfile)) + ".abund.group";
433 openOutputFile(groupFileName, outGroupAbund);
439 openInputFile(namefile, in);
442 if (m->control_pressed) { break; }
444 string firstCol, secondCol;
445 in >> firstCol >> secondCol; gobble(in);
447 int size = getNumNames(secondCol);
449 if (size <= cutoff) { outNameRare << firstCol << '\t' << secondCol << endl; wroteNameRare = true; }
450 else { outNameAbund << firstCol << '\t' << secondCol << endl; wroteNameAbund = true; }
453 if ((groupfile != "") || (accnos)) { //you need to parse the bin...
454 vector<string> names;
455 splitAtComma(secondCol, names); //parses bin into individual sequence names
457 //parse bin into list of sequences in each group
458 for (int j = 0; j < names.size(); j++) {
460 //write to accnos file
462 if (size <= cutoff) { outAccnosRare << names[j] << endl; wroteAccnosRare = true; }
463 else { outAccnosAbund << names[j] << endl; wroteAccnosAbund = true; }
467 if (groupfile != "") {
468 string group = groupMap->getGroup(names[j]);
470 if (group == "not found") { //error in groupfile so close and remove output file and disregard groupfile
471 m->mothurOut(names[j] + " is not in your groupfile. disregarding groupfile."); m->mothurOutEndLine();
474 outGroupAbund.close();
475 remove((outputDir + getRootName(getSimpleName(groupfile)) + ".abund.group").c_str());
476 outGroupRare.close();
477 remove((outputDir + getRootName(getSimpleName(groupfile)) + ".rare.group").c_str());
480 wroteGroupRare = false;
481 wroteGroupAbund = false;
483 if (size <= cutoff) { outGroupRare << names[j] << '\t' << group << endl; wroteGroupRare = true; }
484 else { outGroupAbund << names[j] << '\t' << group << endl; wroteGroupAbund = true; }
496 outNameAbund.close();
497 if (!wroteNameRare) { remove((fileroot + "rare.names").c_str()); }
498 else { outputNames.push_back((fileroot + "rare.names")); }
499 if (!wroteNameAbund) { remove((fileroot + "abund.names").c_str()); }
500 else { outputNames.push_back((fileroot + "abund.names")); }
502 if (groupfile != "") {
503 outGroupRare.close(); outGroupAbund.close();
504 if (!wroteGroupRare) { remove((outputDir + getRootName(getSimpleName(groupfile)) + ".rare.group").c_str()); }
505 else { outputNames.push_back((outputDir + getRootName(getSimpleName(groupfile)) + ".rare.group")); }
506 if (!wroteGroupAbund) { remove((outputDir + getRootName(getSimpleName(groupfile)) + ".abund.group").c_str()); }
507 else { outputNames.push_back((outputDir + getRootName(getSimpleName(groupfile)) + ".abund.group")); }
511 outAccnosAbund.close(); outAccnosRare.close();
512 if (!wroteAccnosRare) { remove((fileroot + "rare.accnos").c_str()); }
513 else { outputNames.push_back((fileroot + "rare.accnos")); }
514 if (!wroteAccnosAbund) { remove((fileroot + "abund.accnos").c_str()); }
515 else { outputNames.push_back((fileroot + "abund.accnos")); }
521 catch(exception& e) {
522 m->errorOut(e, "SplitAbundCommand", "split");
527 /**********************************************************************************************************************/