]> git.donarmstrong.com Git - mothur.git/blob - splitabundcommand.cpp
added getCommandInfoCommand for gui
[mothur.git] / splitabundcommand.cpp
1 /*
2  *  splitabundcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/17/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "splitabundcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> SplitAbundCommand::setParameters(){      
14         try {           
15                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
16                 CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
17                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
18                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
19                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
20                 CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "",false,true); parameters.push_back(pcutoff);
21                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22                 CommandParameter paccnos("accnos", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(paccnos);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25                 
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "SplitAbundCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string SplitAbundCommand::getHelpString(){      
37         try {
38                 string helpString = "";
39                 helpString += "The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n";
40                 helpString += "The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n";
41                 helpString += "The fasta and a list or name parameter are required, and you must provide a cutoff value.\n";
42                 helpString += "The cutoff parameter is used to qualify what is abundant and rare.\n";
43                 helpString += "The group parameter allows you to parse a group file into rare and abundant groups.\n";
44                 helpString += "The label parameter is used to read specific labels in your listfile you want to use.\n";
45                 helpString += "The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n";
46                 helpString += "The groups parameter allows you to parse the files into rare and abundant files by group.  \n";
47                 helpString += "For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files.  \n";
48                 helpString += "If you want .abund and .rare files for all groups, set groups=all.  \n";
49                 helpString += "The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n";
50                 helpString += "Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n";
51                 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n";
52                 return helpString;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "SplitAbundCommand", "getHelpString");
56                 exit(1);
57         }
58 }
59
60 //**********************************************************************************************************************
61 SplitAbundCommand::SplitAbundCommand(){ 
62         try {
63                 abort = true; calledHelp = true; 
64                 setParameters();
65                 vector<string> tempOutNames;
66                 outputTypes["list"] = tempOutNames;
67                 outputTypes["name"] = tempOutNames;
68                 outputTypes["accnos"] = tempOutNames;
69                 outputTypes["group"] = tempOutNames;
70                 outputTypes["fasta"] = tempOutNames;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
74                 exit(1);
75         }
76 }
77 //**********************************************************************************************************************
78 SplitAbundCommand::SplitAbundCommand(string option)  {
79         try {
80                 abort = false; calledHelp = false;   
81                 allLines = 1;
82                         
83                 //allow user to run help
84                 if(option == "help") { help(); abort = true; calledHelp = true; }
85                 
86                 else {
87                         //valid paramters for this command
88                         string Array[] =  {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"}; //
89                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
90                         
91                         OptionParser parser(option);
92                         map<string, string> parameters = parser.getParameters();
93                         
94                         ValidParameters validParameter;
95                         map<string, string>::iterator it;
96                 
97                         //check to make sure all parameters are valid for command
98                         for (it = parameters.begin(); it != parameters.end(); it++) { 
99                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
100                         }
101                         
102                         //initialize outputTypes
103                         vector<string> tempOutNames;
104                         outputTypes["list"] = tempOutNames;
105                         outputTypes["name"] = tempOutNames;
106                         outputTypes["accnos"] = tempOutNames;
107                         outputTypes["group"] = tempOutNames;
108                         outputTypes["fasta"] = tempOutNames;                    
109                                                                                                 
110                         //if the user changes the input directory command factory will send this info to us in the output parameter 
111                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
112                         if (inputDir == "not found"){   inputDir = "";          }
113                         else {
114                                 string path;
115                                 it = parameters.find("list");
116                                 //user has given a template file
117                                 if(it != parameters.end()){ 
118                                         path = m->hasPath(it->second);
119                                         //if the user has not given a path then, add inputdir. else leave path alone.
120                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
121                                 }
122                                 
123                                 it = parameters.find("group");
124                                 //user has given a template file
125                                 if(it != parameters.end()){ 
126                                         path = m->hasPath(it->second);
127                                         //if the user has not given a path then, add inputdir. else leave path alone.
128                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
129                                 }
130                                 
131                                 it = parameters.find("fasta");
132                                 //user has given a template file
133                                 if(it != parameters.end()){ 
134                                         path = m->hasPath(it->second);
135                                         //if the user has not given a path then, add inputdir. else leave path alone.
136                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
137                                 }
138                                 
139                                 it = parameters.find("name");
140                                 //user has given a template file
141                                 if(it != parameters.end()){ 
142                                         path = m->hasPath(it->second);
143                                         //if the user has not given a path then, add inputdir. else leave path alone.
144                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
145                                 }
146
147                         }
148
149                         
150                         //if the user changes the output directory command factory will send this info to us in the output parameter 
151                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
152
153                         //check for required parameters
154                         listfile = validParameter.validFile(parameters, "list", true);
155                         if (listfile == "not open") { abort = true; }
156                         else if (listfile == "not found") { listfile = ""; }
157                         else{ inputFile = listfile; }   
158                         
159                         namefile = validParameter.validFile(parameters, "name", true);
160                         if (namefile == "not open") { abort = true; }
161                         else if (namefile == "not found") { namefile = ""; }    
162                         else{ inputFile = namefile; }   
163                 
164                         fastafile = validParameter.validFile(parameters, "fasta", true);
165                         if (fastafile == "not open") { abort = true; }
166                         else if (fastafile == "not found") {                            
167                                 fastafile = m->getFastaFile(); 
168                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
169                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
170                         }       
171                         
172                         groupfile = validParameter.validFile(parameters, "group", true);
173                         if (groupfile == "not open") {  groupfile = ""; abort = true; } 
174                         else if (groupfile == "not found") { groupfile = ""; }
175                         else {  
176                                 groupMap = new GroupMap(groupfile);
177                                 
178                                 int error = groupMap->readMap();
179                                 if (error == 1) { abort = true; }
180         
181                         }
182                         
183                         groups = validParameter.validFile(parameters, "groups", false);         
184                         if (groups == "not found") { groups = ""; }
185                         else if (groups == "all") { 
186                                 if (groupfile != "") {  Groups = groupMap->namesOfGroups;  } 
187                                 else {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";   }
188                         }else { 
189                                 m->splitAtDash(groups, Groups);
190                         }
191                         
192                         if ((groupfile == "") && (groups != "")) {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";  Groups.clear(); }
193                         
194                         //do you have all files needed
195                         if ((listfile == "") && (namefile == "")) { 
196                                 namefile = m->getNameFile(); 
197                                 if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
198                                 else {                          
199                                         listfile = m->getListFile(); 
200                                         if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
201                                         else {  m->mothurOut("You have no current list or namefile and the list or name parameter is required."); m->mothurOutEndLine(); abort = true; }
202                                 }
203                         }
204                         
205                         //check for optional parameter and set defaults
206                         // ...at some point should added some additional type checking...
207                         label = validParameter.validFile(parameters, "label", false);                   
208                         if (label == "not found") { label = "";  allLines = 1; }
209                         else { 
210                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
211                                 else { allLines = 1;  }
212                         }
213                         
214                         string temp = validParameter.validFile(parameters, "accnos", false);            if (temp == "not found") { temp = "F"; }
215                         accnos = m->isTrue(temp); 
216                         
217                         temp = validParameter.validFile(parameters, "cutoff", false);                           if (temp == "not found") { temp = "0"; }
218                         convert(temp, cutoff); 
219
220                         if (cutoff == 0) {  m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
221
222                 }
223
224         }
225         catch(exception& e) {
226                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
227                 exit(1);
228         }
229 }
230 //**********************************************************************************************************************
231 SplitAbundCommand::~SplitAbundCommand(){ 
232         if (groupfile != "") {  delete groupMap;  } 
233 }
234 //**********************************************************************************************************************
235 int SplitAbundCommand::execute(){
236         try {
237         
238                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
239                 
240                 if (listfile != "") { //you are using a listfile to determine abundance
241                         if (outputDir == "") { outputDir = m->hasPath(listfile); }
242                         
243                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
244                         set<string> processedLabels;
245                         set<string> userLabels = labels;        
246                         
247                         input = new InputData(listfile, "list");
248                         list = input->getListVector();
249                         string lastLabel = list->getLabel();
250                         
251                         //do you have a namefile or do we need to similate one?
252                         if (namefile != "") {  readNamesFile();         }
253                         else                            { createNameMap(list);  }
254                         
255                         if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
256                         
257                         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
258                         
259                                 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
260                                 
261                                 if(allLines == 1 || labels.count(list->getLabel()) == 1){
262                                                 
263                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
264                                                 splitList(list);
265                                                                                         
266                                                 processedLabels.insert(list->getLabel());
267                                                 userLabels.erase(list->getLabel());
268                                 }
269                                 
270                                 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
271                                                 string saveLabel = list->getLabel();
272                                                 
273                                                 delete list;
274                                                 list = input->getListVector(lastLabel); //get new list vector to process
275                                                 
276                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
277                                                 splitList(list);
278                                                 
279                                                 processedLabels.insert(list->getLabel());
280                                                 userLabels.erase(list->getLabel());
281                                                 
282                                                 //restore real lastlabel to save below
283                                                 list->setLabel(saveLabel);
284                                 }
285                                 
286                         
287                                 lastLabel = list->getLabel();
288                                         
289                                 delete list;
290                                 list = input->getListVector(); //get new list vector to process
291                         }
292                         
293                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
294                         
295                         //output error messages about any remaining user labels
296                         set<string>::iterator it;
297                         bool needToRun = false;
298                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
299                                 m->mothurOut("Your file does not include the label " + *it); 
300                                 if (processedLabels.count(lastLabel) != 1) {
301                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
302                                         needToRun = true;
303                                 }else {
304                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
305                                 }
306
307                         }
308                         
309                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
310                         
311                         //run last label if you need to
312                         if (needToRun == true)  {
313                                 if (list != NULL) {     delete list;    }
314                                 list = input->getListVector(lastLabel); //get new list vector to process
315                                 
316                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
317                                 splitList(list);                
318                                 
319                                 delete list;
320                         }
321                         
322                         delete input;
323                         
324                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); }       return 0;       }
325                                                                         
326                 }else { //you are using the namefile to determine abundance
327                         if (outputDir == "") { outputDir = m->hasPath(namefile); }
328                         
329                         splitNames(); 
330                         writeNames();
331                         
332                         string tag = "";
333                         if (groupfile != "")                            {  parseGroup(tag);             }
334                         if (accnos)                                                     {  writeAccnos(tag);    }
335                         if (fastafile != "")                            {  parseFasta(tag);             }
336                 }
337                 
338                 //set fasta file as new current fastafile
339                 string current = "";
340                 itTypes = outputTypes.find("fasta");
341                 if (itTypes != outputTypes.end()) {
342                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
343                 }
344                 
345                 itTypes = outputTypes.find("name");
346                 if (itTypes != outputTypes.end()) {
347                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
348                 }
349                 
350                 itTypes = outputTypes.find("group");
351                 if (itTypes != outputTypes.end()) {
352                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
353                 }
354                 
355                 itTypes = outputTypes.find("list");
356                 if (itTypes != outputTypes.end()) {
357                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
358                 }
359                 
360                 itTypes = outputTypes.find("accnos");
361                 if (itTypes != outputTypes.end()) {
362                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
363                 }
364                 
365                 m->mothurOutEndLine();
366                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
367                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
368                 m->mothurOutEndLine();
369                 
370                 return 0;
371         }
372         catch(exception& e) {
373                 m->errorOut(e, "SplitAbundCommand", "execute");
374                 exit(1);
375         }
376 }
377 /**********************************************************************************************************************/
378 int SplitAbundCommand::splitList(ListVector* thisList) {
379         try {
380                 rareNames.clear();
381                 abundNames.clear();
382                 
383                 //get rareNames and abundNames
384                 for (int i = 0; i < thisList->getNumBins(); i++) {
385                         if (m->control_pressed) { return 0; }
386                         
387                         string bin = thisList->get(i);
388                                                 
389                         vector<string> names;
390                         m->splitAtComma(bin, names);  //parses bin into individual sequence names
391                         int size = names.size();
392                                 
393                         if (size <= cutoff) {
394                                 for (int j = 0; j < names.size(); j++) {  rareNames.insert(names[j]);  }
395                         }else{
396                                 for (int j = 0; j < names.size(); j++) {  abundNames.insert(names[j]);  }
397                         }
398                 }//end for
399
400                 
401                 string tag = thisList->getLabel() + ".";
402                 
403                 writeList(thisList, tag);
404                 
405                 if (groupfile != "")                            {  parseGroup(tag);             }
406                 if (accnos)                                                     {  writeAccnos(tag);    }
407                 if (fastafile != "")                            {  parseFasta(tag);             }
408                 
409                 return 0;
410
411         }
412         catch(exception& e) {
413                 m->errorOut(e, "SplitAbundCommand", "splitList");
414                 exit(1);
415         }
416 }
417 /**********************************************************************************************************************/
418 int SplitAbundCommand::writeList(ListVector* thisList, string tag) { 
419         try {
420                 
421                 map<string, ofstream*> filehandles;
422                 
423                 if (Groups.size() == 0) {
424                         SAbundVector* sabund = new SAbundVector();
425                         *sabund = thisList->getSAbundVector();
426                 
427                         //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
428                         // and don't have to store the bins until you are done with the whole vector, this save alot of space.
429                         int numRareBins = 0;
430                         for (int i = 0; i <= sabund->getMaxRank(); i++) {
431                                 if (i > cutoff) { break; }
432                                 numRareBins += sabund->get(i);
433                         }
434                         int numAbundBins = thisList->getNumBins() - numRareBins;
435                         delete sabund;
436
437                         ofstream aout;
438                         ofstream rout;
439                         
440                         string rare = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "rare.list";
441                         m->openOutputFile(rare, rout);
442                         outputNames.push_back(rare); outputTypes["list"].push_back(rare);
443                         
444                         string abund = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "abund.list";
445                         m->openOutputFile(abund, aout);
446                         outputNames.push_back(abund); outputTypes["list"].push_back(abund);
447
448                         if (rareNames.size() != 0)      {  rout << thisList->getLabel() << '\t' << numRareBins << '\t';         }
449                         if (abundNames.size() != 0) {   aout << thisList->getLabel() << '\t' << numAbundBins << '\t';   }
450
451                         for (int i = 0; i < thisList->getNumBins(); i++) {
452                                 if (m->control_pressed) { break; }
453                         
454                                 string bin = list->get(i); 
455                         
456                                 int size = m->getNumNames(bin);
457                         
458                                 if (size <= cutoff) {  rout << bin << '\t';  }
459                                 else                            {  aout << bin << '\t'; }
460                         }
461                         
462                         if (rareNames.size() != 0)      { rout << endl; }
463                         if (abundNames.size() != 0) { aout << endl; }
464                         
465                         rout.close();
466                         aout.close();
467                         
468                 }else{ //parse names by abundance and group
469                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(listfile));
470                         ofstream* temp;
471                         ofstream* temp2;
472                         //map<string, bool> wroteFile;
473                         map<string, ofstream*> filehandles;
474                         map<string, ofstream*>::iterator it3;
475
476                         for (int i=0; i<Groups.size(); i++) {
477                                 temp = new ofstream;
478                                 filehandles[Groups[i]+".rare"] = temp;
479                                 temp2 = new ofstream;
480                                 filehandles[Groups[i]+".abund"] = temp2;
481                                 
482                                 m->openOutputFile(fileroot + Groups[i] + tag + ".rare.list", *(filehandles[Groups[i]+".rare"]));
483                                 m->openOutputFile(fileroot + Groups[i] + tag + ".abund.list", *(filehandles[Groups[i]+".abund"]));
484                                 outputNames.push_back(fileroot + Groups[i] + tag + ".rare.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".rare.list");
485                                 outputNames.push_back(fileroot + Groups[i] + tag + ".abund.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".abund.list");
486                         }
487                         
488                         map<string, string> groupVector;
489                         map<string, string>::iterator itGroup;
490                         map<string, int> groupNumBins;
491                 
492                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
493                                 groupNumBins[it3->first] = 0;
494                                 groupVector[it3->first] = "";
495                         }
496                 
497                         for (int i = 0; i < thisList->getNumBins(); i++) {
498                                 if (m->control_pressed) { break; }
499                         
500                                 map<string, string> groupBins;
501                                 string bin = list->get(i); 
502                         
503                                 vector<string> names;
504                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
505                         
506                                 //parse bin into list of sequences in each group
507                                 for (int j = 0; j < names.size(); j++) {
508                                         string rareAbund;
509                                         if (rareNames.count(names[j]) != 0) { //you are a rare name
510                                                 rareAbund = ".rare";
511                                         }else{ //you are a abund name
512                                                 rareAbund = ".abund";
513                                         }
514                                         
515                                         string group = groupMap->getGroup(names[j]);
516                                 
517                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
518                                                 itGroup = groupBins.find(group+rareAbund);
519                                                 if(itGroup == groupBins.end()) {
520                                                         groupBins[group+rareAbund] = names[j];  //add first name
521                                                         groupNumBins[group+rareAbund]++;
522                                                 }else{ //add another name
523                                                         groupBins[group+rareAbund] +=  "," + names[j];
524                                                 }
525                                         }else if(group == "not found") {
526                                                 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
527                                         }
528                                 }
529                         
530                         
531                                 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
532                                         groupVector[itGroup->first] +=  itGroup->second + '\t'; 
533                                 }
534                         }
535                         
536                         //end list vector
537                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
538                                 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl;  // label numBins  listvector for that group
539                                 (*(filehandles[it3->first])).close();
540                                 delete it3->second;
541                         }
542                 }
543                 
544                 return 0;
545
546         }
547         catch(exception& e) {
548                 m->errorOut(e, "SplitAbundCommand", "writeList");
549                 exit(1);
550         }
551 }
552 /**********************************************************************************************************************/
553 int SplitAbundCommand::splitNames() { //namefile
554         try {
555                 
556                 rareNames.clear();
557                 abundNames.clear();     
558                         
559                 //open input file
560                 ifstream in;
561                 m->openInputFile(namefile, in);
562                 
563                 while (!in.eof()) {
564                         if (m->control_pressed) { break; }
565                         
566                         string firstCol, secondCol;
567                         in >> firstCol >> secondCol; m->gobble(in);
568                         
569                         nameMap[firstCol] = secondCol;
570                         
571                         int size = m->getNumNames(secondCol);
572                                 
573                         if (size <= cutoff) {
574                                 rareNames.insert(firstCol); 
575                         }else{
576                                 abundNames.insert(firstCol); 
577                         }
578                 }
579                 in.close();
580                                 
581                 return 0;
582
583         }
584         catch(exception& e) {
585                 m->errorOut(e, "SplitAbundCommand", "splitNames");
586                 exit(1);
587         }
588 }
589 /**********************************************************************************************************************/
590 int SplitAbundCommand::readNamesFile() { 
591         try {
592                 //open input file
593                 ifstream in;
594                 m->openInputFile(namefile, in);
595                 
596                 while (!in.eof()) {
597                         if (m->control_pressed) { break; }
598                         
599                         string firstCol, secondCol;
600                         in >> firstCol >> secondCol; m->gobble(in);
601                         
602                         nameMap[firstCol] = secondCol;
603                 }
604                 in.close();
605                                 
606                 return 0;
607
608         }
609         catch(exception& e) {
610                 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
611                 exit(1);
612         }
613 }
614 /**********************************************************************************************************************/
615 int SplitAbundCommand::createNameMap(ListVector* thisList) {
616         try {
617                 
618                 if (thisList != NULL) {
619                         for (int i = 0; i < thisList->getNumBins(); i++) {
620                                 if (m->control_pressed) { return 0; }
621                                 
622                                 string bin = thisList->get(i);
623                                                         
624                                 vector<string> names;
625                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
626                                 
627                                 for (int j = 0; j < names.size(); j++) {  nameMap[names[j]] = names[j];  }
628                         }//end for
629                 }
630                 
631                 return 0;
632         }
633         catch(exception& e) {
634                 m->errorOut(e, "SplitAbundCommand", "createNameMap");
635                 exit(1);
636         }
637 }
638 /**********************************************************************************************************************/
639 int SplitAbundCommand::writeNames() { //namefile
640         try {
641                 
642                 map<string, ofstream*> filehandles;
643
644                 if (Groups.size() == 0) {
645                         ofstream aout;
646                         ofstream rout;
647                         
648                         string rare = outputDir + m->getRootName(m->getSimpleName(namefile))  + "rare.names";
649                         m->openOutputFile(rare, rout);
650                         outputNames.push_back(rare); outputTypes["name"].push_back(rare);
651                         
652                         string abund = outputDir + m->getRootName(m->getSimpleName(namefile))  + "abund.names";
653                         m->openOutputFile(abund, aout);
654                         outputNames.push_back(abund); outputTypes["name"].push_back(abund);
655                         
656                         if (rareNames.size() != 0) {
657                                 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
658                                         rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
659                                 }
660                         }
661                         rout.close();
662                         
663                         if (abundNames.size() != 0) {
664                                 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
665                                         aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
666                                 }
667                         }
668                         aout.close();
669                         
670                 }else{ //parse names by abundance and group
671                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(namefile));
672                         ofstream* temp;
673                         ofstream* temp2;
674                         map<string, ofstream*> filehandles;
675                         map<string, ofstream*>::iterator it3;
676
677                         for (int i=0; i<Groups.size(); i++) {
678                                 temp = new ofstream;
679                                 filehandles[Groups[i]+".rare"] = temp;
680                                 temp2 = new ofstream;
681                                 filehandles[Groups[i]+".abund"] = temp2;
682                                 
683                                 m->openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"]));
684                                 m->openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"]));
685                         }
686                         
687                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
688                                 vector<string> names;
689                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
690                                 
691                                 string rareAbund;
692                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
693                                                 rareAbund = ".rare";
694                                 }else{ //you are a abund name
695                                                 rareAbund = ".abund";
696                                 }
697                                 
698                                 map<string, string> outputStrings;
699                                 map<string, string>::iterator itout;
700                                 for (int i = 0; i < names.size(); i++) {
701                                         
702                                         string group = groupMap->getGroup(names[i]);
703                                         
704                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
705                                                 itout = outputStrings.find(group+rareAbund);
706                                                 if (itout == outputStrings.end()) {  
707                                                         outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
708                                                 }else {   outputStrings[group+rareAbund] += "," + names[i]; }
709                                         }else if(group == "not found") {
710                                                 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
711                                         }
712                                 }
713                                 
714                                 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { *(filehandles[itout->first]) << itout->second << endl;     }
715                         }
716                         
717                         
718                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
719                                 (*(filehandles[it3->first])).close();
720                                 outputNames.push_back(fileroot + it3->first + ".names");  outputTypes["name"].push_back(fileroot + it3->first + ".names");
721                                 delete it3->second;
722                         }
723                 }
724                                 
725                 return 0;
726
727         }
728         catch(exception& e) {
729                 m->errorOut(e, "SplitAbundCommand", "writeNames");
730                 exit(1);
731         }
732 }
733 /**********************************************************************************************************************/
734 //just write the unique names - if a namesfile is given
735 int SplitAbundCommand::writeAccnos(string tag) { 
736         try {
737                 
738                 map<string, ofstream*> filehandles;
739                 
740                 if (Groups.size() == 0) {
741                         ofstream aout;
742                         ofstream rout;
743                         
744                         
745                         string rare = outputDir + m->getRootName(m->getSimpleName(inputFile))  + tag + "rare.accnos";
746                         m->openOutputFile(rare, rout);
747                         outputNames.push_back(rare); outputTypes["accnos"].push_back(rare); 
748                         
749                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
750                                 rout << (*itRare) << endl;
751                         }
752                         rout.close();
753                 
754                         string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag  + "abund.accnos";
755                         m->openOutputFile(abund, aout);
756                         outputNames.push_back(abund); outputTypes["accnos"].push_back(abund);
757                         
758                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
759                                 aout << (*itAbund) << endl;
760                         }
761                         aout.close();
762                         
763                 }else{ //parse names by abundance and group
764                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(inputFile));
765                         ofstream* temp;
766                         ofstream* temp2;
767                         map<string, ofstream*> filehandles;
768                         map<string, ofstream*>::iterator it3;
769                         
770                         for (int i=0; i<Groups.size(); i++) {
771                                 temp = new ofstream;
772                                 filehandles[Groups[i]+".rare"] = temp;
773                                 temp2 = new ofstream;
774                                 filehandles[Groups[i]+".abund"] = temp2;
775                                 
776                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"]));
777                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"]));
778                         }
779                         
780                         //write rare
781                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
782                                         string group = groupMap->getGroup(*itRare);
783                                         
784                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
785                                                 *(filehandles[group+".rare"]) << *itRare << endl;
786                                         }
787                         }
788                                 
789                         //write abund   
790                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
791                                         string group = groupMap->getGroup(*itAbund);
792                                         
793                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
794                                                 *(filehandles[group+".abund"]) << *itAbund << endl;
795                                         }
796                         }
797                         
798                         //close files
799                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
800                                 (*(filehandles[it3->first])).close();
801                                 outputNames.push_back(fileroot + tag + it3->first + ".accnos");  outputTypes["accnos"].push_back(fileroot + tag + it3->first + ".accnos");
802                                 delete it3->second;
803                         }
804                 }
805                                 
806                 return 0;
807
808         }
809         catch(exception& e) {
810                 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
811                 exit(1);
812         }
813 }
814 /**********************************************************************************************************************/
815 int SplitAbundCommand::parseGroup(string tag) { //namefile
816         try {
817                 
818                 map<string, ofstream*> filehandles;
819         
820                 if (Groups.size() == 0) {
821                         ofstream aout;
822                         ofstream rout;
823                         
824                         string rare = outputDir + m->getRootName(m->getSimpleName(groupfile))  + tag + "rare.groups";
825                         m->openOutputFile(rare, rout);
826                         outputNames.push_back(rare); outputTypes["group"].push_back(rare);
827                 
828                         string abund = outputDir + m->getRootName(m->getSimpleName(groupfile))  + tag + "abund.groups";
829                         m->openOutputFile(abund, aout);
830                         outputNames.push_back(abund); outputTypes["group"].push_back(abund);
831                         
832                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
833                                 vector<string> names;
834                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
835                                 
836                                 for (int i = 0; i < names.size(); i++) {
837                                 
838                                         string group = groupMap->getGroup(names[i]);
839                                 
840                                         if (group == "not found") { 
841                                                 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
842                                         }else {
843                                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
844                                                         rout << names[i] << '\t' << group << endl;
845                                                 }else{ //you are a abund name
846                                                         aout << names[i] << '\t' << group << endl;
847                                                 }
848                                         }
849                                 }
850                         }
851                         
852                         rout.close(); 
853                         aout.close(); 
854
855                 }else{ //parse names by abundance and group
856                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(groupfile));
857                         ofstream* temp;
858                         ofstream* temp2;
859                         map<string, ofstream*> filehandles;
860                         map<string, ofstream*>::iterator it3;
861
862                         for (int i=0; i<Groups.size(); i++) {
863                                 temp = new ofstream;
864                                 filehandles[Groups[i]+".rare"] = temp;
865                                 temp2 = new ofstream;
866                                 filehandles[Groups[i]+".abund"] = temp2;
867                                 
868                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"]));
869                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"]));
870                         }
871                         
872                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
873                                 vector<string> names;
874                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
875                                 
876                                 string rareAbund;
877                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
878                                         rareAbund = ".rare";
879                                 }else{ //you are a abund name
880                                         rareAbund = ".abund";
881                                 }
882                                 
883                                 for (int i = 0; i < names.size(); i++) {
884                                 
885                                         string group = groupMap->getGroup(names[i]);
886                                                                         
887                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
888                                                 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
889                                         }
890                                 }
891                         }
892                         
893                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
894                                 (*(filehandles[it3->first])).close();
895                                 outputNames.push_back(fileroot + tag + it3->first + ".groups");  outputTypes["group"].push_back(fileroot + tag + it3->first + ".groups");
896                                 delete it3->second;
897                         }
898                 }
899                                 
900                 return 0;
901
902         }
903         catch(exception& e) {
904                 m->errorOut(e, "SplitAbundCommand", "parseGroups");
905                 exit(1);
906         }
907 }
908 /**********************************************************************************************************************/
909 int SplitAbundCommand::parseFasta(string tag) { //namefile
910         try {
911                 
912                 map<string, ofstream*> filehandles;
913                 
914                 if (Groups.size() == 0) {
915                         ofstream aout;
916                         ofstream rout;
917                         
918                         string rare = outputDir + m->getRootName(m->getSimpleName(fastafile))  + tag + "rare.fasta";
919                         m->openOutputFile(rare, rout);
920                         outputNames.push_back(rare); outputTypes["fasta"].push_back(rare);
921                 
922                         string abund = outputDir + m->getRootName(m->getSimpleName(fastafile))  + tag + "abund.fasta";
923                         m->openOutputFile(abund, aout);
924                         outputNames.push_back(abund); outputTypes["fasta"].push_back(abund);
925                 
926                         //open input file
927                         ifstream in;
928                         m->openInputFile(fastafile, in);
929         
930                         while (!in.eof()) {
931                                 if (m->control_pressed) { break; }
932                 
933                                 Sequence seq(in); m->gobble(in);
934                                 
935                                 if (seq.getName() != "") { 
936                                         
937                                         map<string, string>::iterator itNames;
938                                         
939                                         itNames = nameMap.find(seq.getName());
940                                         
941                                         if (itNames == nameMap.end()) {
942                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
943                                         }else{
944                                                 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
945                                                         seq.printSequence(rout);
946                                                 }else{ //you are a abund name
947                                                         seq.printSequence(aout);
948                                                 }
949                                         }
950                                 }
951                         }
952                         in.close();
953                         rout.close(); 
954                         aout.close(); 
955
956                 }else{ //parse names by abundance and group
957                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(fastafile));
958                         ofstream* temp;
959                         ofstream* temp2;
960                         map<string, ofstream*> filehandles;
961                         map<string, ofstream*>::iterator it3;
962
963                         for (int i=0; i<Groups.size(); i++) {
964                                 temp = new ofstream;
965                                 filehandles[Groups[i]+".rare"] = temp;
966                                 temp2 = new ofstream;
967                                 filehandles[Groups[i]+".abund"] = temp2;
968                                 
969                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"]));
970                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"]));
971                         }
972                         
973                         //open input file
974                         ifstream in;
975                         m->openInputFile(fastafile, in);
976         
977                         while (!in.eof()) {
978                                 if (m->control_pressed) { break; }
979                 
980                                 Sequence seq(in); m->gobble(in);
981                                 
982                                 if (seq.getName() != "") { 
983                                         map<string, string>::iterator itNames = nameMap.find(seq.getName());
984                                         
985                                         if (itNames == nameMap.end()) {
986                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
987                                         }else{
988                                                 vector<string> names;
989                                                 m->splitAtComma(itNames->second, names);  //parses bin into individual sequence names
990                                 
991                                                 string rareAbund;
992                                                 if (rareNames.count(itNames->first) != 0) { //you are a rare name
993                                                         rareAbund = ".rare";
994                                                 }else{ //you are a abund name
995                                                         rareAbund = ".abund";
996                                                 }
997                                 
998                                                 for (int i = 0; i < names.size(); i++) {
999                                 
1000                                                         string group = groupMap->getGroup(seq.getName());
1001                                         
1002                                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1003                                                                 seq.printSequence(*(filehandles[group+rareAbund]));
1004                                                         }else if(group == "not found") {
1005                                                                 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
1006                                                         }
1007                                                 }
1008                                         }
1009                                 }
1010                         }
1011                         in.close();
1012                         
1013                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
1014                                 (*(filehandles[it3->first])).close();
1015                                 outputNames.push_back(fileroot + tag + it3->first + ".fasta");  outputTypes["fasta"].push_back(fileroot + tag + it3->first + ".fasta");
1016                                 delete it3->second;
1017                         }
1018                 }
1019                                 
1020                 return 0;
1021
1022         }
1023         catch(exception& e) {
1024                 m->errorOut(e, "SplitAbundCommand", "parseFasta");
1025                 exit(1);
1026         }
1027 }
1028 /**********************************************************************************************************************/
1029