]> git.donarmstrong.com Git - mothur.git/blob - splitabundcommand.cpp
changed random forest output filename
[mothur.git] / splitabundcommand.cpp
1 /*
2  *  splitabundcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/17/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "splitabundcommand.h"
11 #include "sharedutilities.h"
12
13 //**********************************************************************************************************************
14 vector<string> SplitAbundCommand::setParameters(){      
15         try {           
16                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
17         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
18         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false); parameters.push_back(pcount);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false); parameters.push_back(pgroup);
20                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist);
21                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
22                 CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "","",false,true); parameters.push_back(pcutoff);
23                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
24                 CommandParameter paccnos("accnos", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(paccnos);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
27                 
28                 vector<string> myArray;
29                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
30                 return myArray;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "SplitAbundCommand", "setParameters");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 string SplitAbundCommand::getHelpString(){      
39         try {
40                 string helpString = "";
41                 helpString += "The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n";
42                 helpString += "The split.abund command parameters are fasta, list, name, count, cutoff, group, label, groups, cutoff and accnos.\n";
43                 helpString += "The fasta and a list or name or count parameter are required, and you must provide a cutoff value.\n";
44                 helpString += "The cutoff parameter is used to qualify what is abundant and rare.\n";
45                 helpString += "The group parameter allows you to parse a group file into rare and abundant groups.\n";
46                 helpString += "The label parameter is used to read specific labels in your listfile you want to use.\n";
47                 helpString += "The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n";
48                 helpString += "The groups parameter allows you to parse the files into rare and abundant files by group.  \n";
49                 helpString += "For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files.  \n";
50                 helpString += "If you want .abund and .rare files for all groups, set groups=all.  \n";
51                 helpString += "The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n";
52                 helpString += "Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n";
53                 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n";
54                 return helpString;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "SplitAbundCommand", "getHelpString");
58                 exit(1);
59         }
60 }
61
62 //**********************************************************************************************************************
63 string SplitAbundCommand::getOutputPattern(string type) {
64     try {
65         string pattern = "";
66         
67         if (type == "fasta")        {  pattern = "[filename],[tag],[tag2],fasta-[filename],[tag],[group],[tag2],fasta";            } 
68         else if (type == "list")    {   pattern = "[filename],[tag],[tag2],list-[filename],[group],[tag],[tag2],list";            }
69         else if (type == "name")    {   pattern = "[filename],[tag],names-[filename],[group],[tag],names";           }
70         else if (type == "count")   {   pattern = "[filename],[tag],[tag2],count_table-[filename],[tag],count_table";     }
71         else if (type == "group")   {   pattern = "[filename],[tag],[tag2],groups-[filename],[tag],[group],[tag2],groups";          }
72         else if (type == "accnos")  {   pattern = "[filename],[tag],[tag2],accnos-[filename],[tag],[group],[tag2],accnos";          }
73         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
74         
75         return pattern;
76     }
77     catch(exception& e) {
78         m->errorOut(e, "SplitAbundCommand", "getOutputPattern");
79         exit(1);
80     }
81 }
82 //**********************************************************************************************************************
83 SplitAbundCommand::SplitAbundCommand(){ 
84         try {
85                 abort = true; calledHelp = true; 
86                 setParameters();
87                 vector<string> tempOutNames;
88                 outputTypes["list"] = tempOutNames;
89                 outputTypes["name"] = tempOutNames;
90         outputTypes["count"] = tempOutNames;
91                 outputTypes["accnos"] = tempOutNames;
92                 outputTypes["group"] = tempOutNames;
93                 outputTypes["fasta"] = tempOutNames;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101 SplitAbundCommand::SplitAbundCommand(string option)  {
102         try {
103                 abort = false; calledHelp = false;   
104                 allLines = 1;
105                         
106                 //allow user to run help
107                 if(option == "help") { help(); abort = true; calledHelp = true; }
108                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
109                 else {
110                         vector<string> myArray = setParameters();
111                         
112                         OptionParser parser(option);
113                         map<string, string> parameters = parser.getParameters();
114                         
115                         ValidParameters validParameter;
116                         map<string, string>::iterator it;
117                 
118                         //check to make sure all parameters are valid for command
119                         for (it = parameters.begin(); it != parameters.end(); it++) { 
120                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
121                         }
122                         
123                         //initialize outputTypes
124                         vector<string> tempOutNames;
125                         outputTypes["list"] = tempOutNames;
126                         outputTypes["name"] = tempOutNames;
127                         outputTypes["accnos"] = tempOutNames;
128                         outputTypes["group"] = tempOutNames;
129                         outputTypes["fasta"] = tempOutNames;    
130             outputTypes["count"] = tempOutNames;
131                                                                                                 
132                         //if the user changes the input directory command factory will send this info to us in the output parameter 
133                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
134                         if (inputDir == "not found"){   inputDir = "";          }
135                         else {
136                                 string path;
137                                 it = parameters.find("list");
138                                 //user has given a template file
139                                 if(it != parameters.end()){ 
140                                         path = m->hasPath(it->second);
141                                         //if the user has not given a path then, add inputdir. else leave path alone.
142                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
143                                 }
144                                 
145                                 it = parameters.find("group");
146                                 //user has given a template file
147                                 if(it != parameters.end()){ 
148                                         path = m->hasPath(it->second);
149                                         //if the user has not given a path then, add inputdir. else leave path alone.
150                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
151                                 }
152                                 
153                                 it = parameters.find("fasta");
154                                 //user has given a template file
155                                 if(it != parameters.end()){ 
156                                         path = m->hasPath(it->second);
157                                         //if the user has not given a path then, add inputdir. else leave path alone.
158                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
159                                 }
160                                 
161                                 it = parameters.find("name");
162                                 //user has given a template file
163                                 if(it != parameters.end()){ 
164                                         path = m->hasPath(it->second);
165                                         //if the user has not given a path then, add inputdir. else leave path alone.
166                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
167                                 }
168
169                 it = parameters.find("count");
170                                 //user has given a template file
171                                 if(it != parameters.end()){ 
172                                         path = m->hasPath(it->second);
173                                         //if the user has not given a path then, add inputdir. else leave path alone.
174                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
175                                 }
176                         }
177
178                         
179                         //if the user changes the output directory command factory will send this info to us in the output parameter 
180                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
181
182                         //check for required parameters
183                         listfile = validParameter.validFile(parameters, "list", true);
184                         if (listfile == "not open") { abort = true; }
185                         else if (listfile == "not found") { listfile = ""; }
186                         else{ inputFile = listfile; m->setListFile(listfile); } 
187                         
188                         namefile = validParameter.validFile(parameters, "name", true);
189                         if (namefile == "not open") { abort = true; }
190                         else if (namefile == "not found") { namefile = ""; }    
191                         else{ inputFile = namefile; m->setNameFile(namefile); } 
192                 
193                         fastafile = validParameter.validFile(parameters, "fasta", true);
194                         if (fastafile == "not open") { abort = true; }
195                         else if (fastafile == "not found") {                            
196                                 fastafile = m->getFastaFile(); 
197                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
198                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
199                         }else { m->setFastaFile(fastafile); }   
200                         
201                         groupfile = validParameter.validFile(parameters, "group", true);
202                         if (groupfile == "not open") {  groupfile = ""; abort = true; } 
203                         else if (groupfile == "not found") { groupfile = ""; }
204                         else {  
205                                 int error = groupMap.readMap(groupfile);
206                                 if (error == 1) { abort = true; }
207                                 m->setGroupFile(groupfile);
208                         }
209                         
210             countfile = validParameter.validFile(parameters, "count", true);
211                         if (countfile == "not open") { countfile = ""; abort = true; }
212                         else if (countfile == "not found") { countfile = "";  } 
213                         else {
214                 m->setCountTableFile(countfile); 
215                 ct.readTable(countfile, true);
216             }
217             
218             if ((namefile != "") && (countfile != "")) {
219                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
220             }
221                         
222             if ((groupfile != "") && (countfile != "")) {
223                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
224             }
225             
226                         groups = validParameter.validFile(parameters, "groups", false);         
227                         if (groups == "not found") { groups = ""; }
228                         else { m->splitAtDash(groups, Groups); }
229                         
230                         if (((groupfile == "") && (countfile == ""))&& (groups != "")) {  m->mothurOut("You cannot select groups without a valid group or count file, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";  Groups.clear(); }
231                         
232             if (countfile != "") {
233                 if (!ct.hasGroupInfo()) { m->mothurOut("You cannot pick groups without group info in your count file; I will disregard your groups selection."); m->mothurOutEndLine(); groups = "";  Groups.clear(); }
234             }
235             
236                         //do you have all files needed
237                         if ((listfile == "") && (namefile == "") && (countfile == "")) { 
238                                 namefile = m->getNameFile(); 
239                                 if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
240                                 else {                          
241                                         listfile = m->getListFile(); 
242                                         if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
243                                         else {  
244                         countfile  = m->getCountTableFile(); 
245                         if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
246                         else {  m->mothurOut("You have no current list, count or namefile and one is required."); m->mothurOutEndLine(); abort = true; }
247                     }
248                                 }
249                         }
250             
251                         //check for optional parameter and set defaults
252                         // ...at some point should added some additional type checking...
253                         label = validParameter.validFile(parameters, "label", false);                   
254                         if (label == "not found") { label = "";  allLines = 1; }
255                         else { 
256                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
257                                 else { allLines = 1;  }
258                         }
259                         
260                         string temp = validParameter.validFile(parameters, "accnos", false);            if (temp == "not found") { temp = "F"; }
261                         accnos = m->isTrue(temp); 
262                         
263                         temp = validParameter.validFile(parameters, "cutoff", false);                           if (temp == "not found") { temp = "0"; }
264                         m->mothurConvert(temp, cutoff); 
265
266                         if (cutoff == 0) {  m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
267                 }
268
269         }
270         catch(exception& e) {
271                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
272                 exit(1);
273         }
274 }
275 //**********************************************************************************************************************
276 SplitAbundCommand::~SplitAbundCommand(){}
277 //**********************************************************************************************************************
278 int SplitAbundCommand::execute(){
279         try {
280         
281                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
282         
283         if (Groups.size() != 0) {
284             vector<string> allGroups;
285             if (countfile != "") { allGroups = ct.getNamesOfGroups(); }
286             else { allGroups = groupMap.getNamesOfGroups(); }
287             SharedUtil util;
288             util.setGroups(Groups, allGroups);
289         }
290                 
291                 if (listfile != "") { //you are using a listfile to determine abundance
292                         if (outputDir == "") { outputDir = m->hasPath(listfile); }
293                         
294                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
295                         set<string> processedLabels;
296                         set<string> userLabels = labels;        
297                         
298                         InputData input(listfile, "list");
299                         ListVector* list = input.getListVector();
300                         string lastLabel = list->getLabel();
301                         
302                         //do you have a namefile or do we need to similate one?
303                         if (namefile != "") {  readNamesFile();         }
304                         else                            { createNameMap(list);  }
305                         
306                         if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]); } return 0; }
307                         
308                         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
309                         
310                                 if (m->control_pressed) {  delete list; for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0; }
311                                 
312                                 if(allLines == 1 || labels.count(list->getLabel()) == 1){
313                                                 
314                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
315                                                 splitList(list);
316                                                                                         
317                                                 processedLabels.insert(list->getLabel());
318                                                 userLabels.erase(list->getLabel());
319                                 }
320                                 
321                                 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
322                                                 string saveLabel = list->getLabel();
323                                                 
324                                                 delete list;
325                                                 list = input.getListVector(lastLabel); //get new list vector to process
326                                                 
327                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
328                                                 splitList(list);
329                                                 
330                                                 processedLabels.insert(list->getLabel());
331                                                 userLabels.erase(list->getLabel());
332                                                 
333                                                 //restore real lastlabel to save below
334                                                 list->setLabel(saveLabel);
335                                 }
336                                 
337                         
338                                 lastLabel = list->getLabel();
339                                         
340                                 delete list;
341                                 list = input.getListVector(); //get new list vector to process
342                         }
343                         
344                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
345                         
346                         //output error messages about any remaining user labels
347                         set<string>::iterator it;
348                         bool needToRun = false;
349                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
350                                 m->mothurOut("Your file does not include the label " + *it); 
351                                 if (processedLabels.count(lastLabel) != 1) {
352                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
353                                         needToRun = true;
354                                 }else {
355                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
356                                 }
357
358                         }
359                         
360                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
361                         
362                         //run last label if you need to
363                         if (needToRun == true)  {
364                                 if (list != NULL) {     delete list;    }
365                                 list = input.getListVector(lastLabel); //get new list vector to process
366                                 
367                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
368                                 splitList(list);                
369                                 
370                                 delete list;
371                         }
372                         
373                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }      return 0;       }
374                                                                         
375                 }else if (namefile != "") { //you are using the namefile to determine abundance
376                         if (outputDir == "") { outputDir = m->hasPath(namefile); }
377                         
378                         splitNames(); 
379                         writeNames();
380                         
381                         string tag = "";
382                         if (groupfile != "")                            {  parseGroup(tag);             }
383                         if (accnos)                                                     {  writeAccnos(tag);    }
384                         if (fastafile != "")                            {  parseFasta(tag);             }
385                 }else {
386             //split by countfile
387             string tag = "";
388             splitCount();
389             
390                         if (accnos)                                                     {  writeAccnos(tag);    }
391                         if (fastafile != "")                            {  parseFasta(tag);             }
392         }
393                 
394                 //set fasta file as new current fastafile
395                 string current = "";
396                 itTypes = outputTypes.find("fasta");
397                 if (itTypes != outputTypes.end()) {
398                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
399                 }
400                 
401                 itTypes = outputTypes.find("name");
402                 if (itTypes != outputTypes.end()) {
403                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
404                 }
405                 
406                 itTypes = outputTypes.find("group");
407                 if (itTypes != outputTypes.end()) {
408                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
409                 }
410                 
411                 itTypes = outputTypes.find("list");
412                 if (itTypes != outputTypes.end()) {
413                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
414                 }
415                 
416                 itTypes = outputTypes.find("accnos");
417                 if (itTypes != outputTypes.end()) {
418                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
419                 }
420         
421         itTypes = outputTypes.find("count");
422                 if (itTypes != outputTypes.end()) {
423                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
424                 }
425                 
426                 m->mothurOutEndLine();
427                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
428                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
429                 m->mothurOutEndLine();
430                 
431                 return 0;
432         }
433         catch(exception& e) {
434                 m->errorOut(e, "SplitAbundCommand", "execute");
435                 exit(1);
436         }
437 }
438 /**********************************************************************************************************************/
439 int SplitAbundCommand::splitList(ListVector* thisList) {
440         try {
441                 rareNames.clear();
442                 abundNames.clear();
443                 
444                 //get rareNames and abundNames
445         int numRareBins = 0;
446                 for (int i = 0; i < thisList->getNumBins(); i++) {
447                         if (m->control_pressed) { return 0; }
448                         
449                         string bin = thisList->get(i);
450                                                 
451                         vector<string> names;
452                         m->splitAtComma(bin, names);  //parses bin into individual sequence names
453                         int size = names.size();
454             
455             //if countfile is not blank we assume the list file is unique, otherwise we assume it includes all seqs
456             if (countfile != "") {
457                 size = 0;
458                 for (int j = 0; j < names.size(); j++) {  size += ct.getNumSeqs(names[j]); }
459             }
460             
461                         if (size <= cutoff) {
462                 numRareBins++;
463                                 for (int j = 0; j < names.size(); j++) {  rareNames.insert(names[j]);  }
464                         }else{
465                                 for (int j = 0; j < names.size(); j++) {  abundNames.insert(names[j]);  }
466                         }
467                 }//end for
468
469                 
470                 string tag = thisList->getLabel();
471        
472                 writeList(thisList, tag, numRareBins);
473     
474                 if (groupfile != "")                            {  parseGroup(tag);             }
475                 if (accnos)                                                     {  writeAccnos(tag);    }
476                 if (fastafile != "")                            {  parseFasta(tag);             }
477         if (countfile != "")                            {  parseCount(tag);             }
478         
479                 return 0;
480
481         }
482         catch(exception& e) {
483                 m->errorOut(e, "SplitAbundCommand", "splitList");
484                 exit(1);
485         }
486 }
487 /**********************************************************************************************************************/
488 int SplitAbundCommand::writeList(ListVector* thisList, string tag, int numRareBins) { 
489         try {
490                 
491                 map<string, ofstream*> filehandles;
492                 
493                 if (Groups.size() == 0) {
494                         int numAbundBins = thisList->getNumBins() - numRareBins;
495
496                         ofstream aout;
497                         ofstream rout;
498                         
499             map<string, string> variables; 
500             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
501             variables["[tag]"] = tag;
502             variables["[tag2]"] = "rare";
503                         string rare = getOutputFileName("list",variables);
504                         m->openOutputFile(rare, rout);
505                         outputNames.push_back(rare); outputTypes["list"].push_back(rare);
506                         
507             variables["[tag2]"] = "abund";
508                         string abund = getOutputFileName("list",variables);
509                         m->openOutputFile(abund, aout);
510                         outputNames.push_back(abund); outputTypes["list"].push_back(abund);
511
512                         if (rareNames.size() != 0)      {  rout << thisList->getLabel() << '\t' << numRareBins << '\t';         }
513                         if (abundNames.size() != 0) {   aout << thisList->getLabel() << '\t' << numAbundBins << '\t';   }
514
515                         for (int i = 0; i < thisList->getNumBins(); i++) {
516                                 if (m->control_pressed) { break; }
517                         
518                                 string bin = thisList->get(i); 
519                 vector<string> names;
520                 m->splitAtComma(bin, names);
521                 
522                                 int size = names.size();
523                 if (countfile != "") {
524                     size = 0;
525                     for (int j = 0; j < names.size(); j++) {  size += ct.getNumSeqs(names[j]); }
526                 }
527                         
528                                 if (size <= cutoff) {  rout << bin << '\t';  }
529                                 else                            {  aout << bin << '\t'; }
530                         }
531                         
532                         if (rareNames.size() != 0)      { rout << endl; }
533                         if (abundNames.size() != 0) { aout << endl; }
534                         
535                         rout.close();
536                         aout.close();
537                         
538                 }else{ //parse names by abundance and group
539                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(listfile));
540                         ofstream* temp;
541                         ofstream* temp2;
542                         //map<string, bool> wroteFile;
543                         map<string, ofstream*> filehandles;
544                         map<string, ofstream*>::iterator it3;
545
546                         for (int i=0; i<Groups.size(); i++) {
547                                 temp = new ofstream;
548                                 filehandles[Groups[i]+".rare"] = temp;
549                                 temp2 = new ofstream;
550                                 filehandles[Groups[i]+".abund"] = temp2;
551                                 
552                 map<string, string> variables; 
553                 variables["[filename]"] = fileroot;
554                 variables["[tag]"] = tag;
555                 variables["[tag2]"] = "rare";
556                 variables["[group]"] = Groups[i];
557                 string rareGroupFileName = getOutputFileName("list",variables);
558                 variables["[tag2]"] = "abund";
559                 string abundGroupFileName = getOutputFileName("list",variables);
560                                 m->openOutputFile(rareGroupFileName, *(filehandles[Groups[i]+".rare"]));
561                                 m->openOutputFile(abundGroupFileName, *(filehandles[Groups[i]+".abund"]));
562                                 outputNames.push_back(rareGroupFileName); outputTypes["list"].push_back(rareGroupFileName);
563                                 outputNames.push_back(abundGroupFileName); outputTypes["list"].push_back(abundGroupFileName);
564                         }
565                         
566                         map<string, string> groupVector;
567                         map<string, string>::iterator itGroup;
568                         map<string, int> groupNumBins;
569                 
570                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
571                                 groupNumBins[it3->first] = 0;
572                                 groupVector[it3->first] = "";
573                         }
574                 
575                         for (int i = 0; i < thisList->getNumBins(); i++) {
576                                 if (m->control_pressed) { break; }
577                         
578                                 map<string, string> groupBins;
579                                 string bin = thisList->get(i); 
580                         
581                                 vector<string> names;
582                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
583                         
584                                 //parse bin into list of sequences in each group
585                                 for (int j = 0; j < names.size(); j++) {
586                                         string rareAbund;
587                                         if (rareNames.count(names[j]) != 0) { //you are a rare name
588                                                 rareAbund = ".rare";
589                                         }else{ //you are a abund name
590                                                 rareAbund = ".abund";
591                                         }
592                                         
593                     if (countfile == "") {
594                         string group = groupMap.getGroup(names[j]);
595                         
596                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
597                             itGroup = groupBins.find(group+rareAbund);
598                             if(itGroup == groupBins.end()) {
599                                 groupBins[group+rareAbund] = names[j];  //add first name
600                                 groupNumBins[group+rareAbund]++;
601                             }else{ //add another name
602                                 groupBins[group+rareAbund] +=  "," + names[j];
603                             }
604                         }else if(group == "not found") {
605                             m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
606                         }
607                     }else {
608                         vector<string> thisSeqsGroups = ct.getGroups(names[j]);
609                         for (int k = 0; k < thisSeqsGroups.size(); k++) {
610                             if (m->inUsersGroups(thisSeqsGroups[k], Groups)) { //only add if this is in a group we want
611                                 itGroup = groupBins.find(thisSeqsGroups[k]+rareAbund);
612                                 if(itGroup == groupBins.end()) {
613                                     groupBins[thisSeqsGroups[k]+rareAbund] = names[j];  //add first name
614                                     groupNumBins[thisSeqsGroups[k]+rareAbund]++;
615                                 }else{ //add another name
616                                     groupBins[thisSeqsGroups[k]+rareAbund] +=  "," + names[j];
617                                 }
618                             }
619                         }
620                     }
621                                 }
622                         
623                         
624                                 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
625                                         groupVector[itGroup->first] +=  itGroup->second + '\t'; 
626                                 }
627                         }
628                         
629                         //end list vector
630                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
631                                 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl;  // label numBins  listvector for that group
632                                 (*(filehandles[it3->first])).close();
633                                 delete it3->second;
634                         }
635                 }
636                 
637                 return 0;
638
639         }
640         catch(exception& e) {
641                 m->errorOut(e, "SplitAbundCommand", "writeList");
642                 exit(1);
643         }
644 }
645 /**********************************************************************************************************************/
646 int SplitAbundCommand::splitCount() { //countfile
647         try {
648                 rareNames.clear();
649                 abundNames.clear();     
650         
651                 vector<string> allNames = ct.getNamesOfSeqs();
652         for (int i = 0; i < allNames.size(); i++) {
653             
654             if (m->control_pressed) { return 0; }
655             
656             int size = ct.getNumSeqs(allNames[i]);
657             nameMap[allNames[i]] = allNames[i];
658             
659                         if (size <= cutoff) {
660                                 rareNames.insert(allNames[i]); 
661                         }else{
662                                 abundNames.insert(allNames[i]); 
663                         }
664                 }
665         
666         //write out split count files
667         parseCount("");
668                 
669                 return 0;  
670         }
671         catch(exception& e) {
672                 m->errorOut(e, "SplitAbundCommand", "splitCount");
673                 exit(1);
674         }
675 }
676 /**********************************************************************************************************************/
677 int SplitAbundCommand::splitNames() { //namefile
678         try {
679                 
680                 rareNames.clear();
681                 abundNames.clear();     
682                         
683                 //open input file
684                 ifstream in;
685                 m->openInputFile(namefile, in);
686                 
687                 while (!in.eof()) {
688                         if (m->control_pressed) { break; }
689                         
690                         string firstCol, secondCol;
691                         in >> firstCol >> secondCol; m->gobble(in);
692                         
693                         nameMap[firstCol] = secondCol;
694                         
695                         int size = m->getNumNames(secondCol);
696                                 
697                         if (size <= cutoff) {
698                                 rareNames.insert(firstCol); 
699                         }else{
700                                 abundNames.insert(firstCol); 
701                         }
702                 }
703                 in.close();
704                                 
705                 return 0;
706
707         }
708         catch(exception& e) {
709                 m->errorOut(e, "SplitAbundCommand", "splitNames");
710                 exit(1);
711         }
712 }
713 /**********************************************************************************************************************/
714 int SplitAbundCommand::readNamesFile() { 
715         try {
716                 //open input file
717                 ifstream in;
718                 m->openInputFile(namefile, in);
719                 
720                 while (!in.eof()) {
721                         if (m->control_pressed) { break; }
722                         
723                         string firstCol, secondCol;
724                         in >> firstCol >> secondCol; m->gobble(in);
725                         
726                         nameMap[firstCol] = secondCol;
727                 }
728                 in.close();
729                                 
730                 return 0;
731
732         }
733         catch(exception& e) {
734                 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
735                 exit(1);
736         }
737 }
738 /**********************************************************************************************************************/
739 int SplitAbundCommand::createNameMap(ListVector* thisList) {
740         try {
741                 
742                 if (thisList != NULL) {
743                         for (int i = 0; i < thisList->getNumBins(); i++) {
744                                 if (m->control_pressed) { return 0; }
745                                 
746                                 string bin = thisList->get(i);
747                                                         
748                                 vector<string> names;
749                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
750                                 
751                                 for (int j = 0; j < names.size(); j++) {  nameMap[names[j]] = names[j];  }
752                         }//end for
753                 }
754                 
755                 return 0;
756         }
757         catch(exception& e) {
758                 m->errorOut(e, "SplitAbundCommand", "createNameMap");
759                 exit(1);
760         }
761 }
762 /**********************************************************************************************************************/
763 int SplitAbundCommand::parseCount(string tag) { //namefile
764         try {
765                 
766                 map<string, ofstream*> filehandles;
767         
768                 if (Groups.size() == 0) {
769             map<string, string> variables; 
770             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
771             variables["[tag]"] = tag;
772             variables["[tag2]"] = "rare";
773                         string rare = getOutputFileName("count",variables);
774             outputNames.push_back(rare); outputTypes["count"].push_back(rare);
775                         variables["[tag2]"] = "abund";
776                         string abund = getOutputFileName("count",variables);
777                         outputNames.push_back(abund); outputTypes["count"].push_back(abund);
778                         
779             CountTable rareTable;
780             CountTable abundTable;
781             if (ct.hasGroupInfo()) {  
782                 vector<string> ctGroups = ct.getNamesOfGroups();
783                 for (int i = 0; i < ctGroups.size(); i++) {  rareTable.addGroup(ctGroups[i]);  abundTable.addGroup(ctGroups[i]); }
784             }
785             
786                         if (rareNames.size() != 0) {
787                                 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
788                     if (ct.hasGroupInfo()) {
789                         vector<int> groupCounts = ct.getGroupCounts(*itRare);
790                         rareTable.push_back(*itRare, groupCounts);
791                     }else {
792                         int groupCounts = ct.getNumSeqs(*itRare);
793                         rareTable.push_back(*itRare, groupCounts);
794                     }
795                                 }
796                 if (rareTable.hasGroupInfo()) {
797                     vector<string> ctGroups = rareTable.getNamesOfGroups();
798                     for (int i = 0; i < ctGroups.size(); i++) { 
799                         if (rareTable.getGroupCount(ctGroups[i]) == 0) { rareTable.removeGroup(ctGroups[i]); }
800                     }
801                 }
802                 rareTable.printTable(rare);
803                         }
804                         
805             
806                         if (abundNames.size() != 0) {
807                                 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
808                                         if (ct.hasGroupInfo()) {
809                         vector<int> groupCounts = ct.getGroupCounts(*itAbund);
810                         abundTable.push_back(*itAbund, groupCounts);
811                     }else {
812                         int groupCounts = ct.getNumSeqs(*itAbund);
813                         abundTable.push_back(*itAbund, groupCounts);
814                     }
815                                 }
816                 if (abundTable.hasGroupInfo()) {
817                     vector<string> ctGroups = abundTable.getNamesOfGroups();
818                     for (int i = 0; i < ctGroups.size(); i++) { 
819                         if (abundTable.getGroupCount(ctGroups[i]) == 0) { abundTable.removeGroup(ctGroups[i]); }
820                     }
821                 }
822                 abundTable.printTable(abund);
823                         }
824                         
825                 }else{ //parse names by abundance and group
826                         map<string, CountTable*> countTableMap;
827                         map<string, CountTable*>::iterator it3;
828             
829                         for (int i=0; i<Groups.size(); i++) {
830                                 CountTable* rareCt = new CountTable();
831                 rareCt->addGroup(Groups[i]);
832                                 countTableMap[Groups[i]+".rare"] = rareCt;
833                                 CountTable* abundCt = new CountTable();
834                 abundCt->addGroup(Groups[i]);
835                                 countTableMap[Groups[i]+".abund"] = abundCt;
836                         }
837                         
838             vector<string> allNames = ct.getNamesOfSeqs();
839                         for (int i = 0; i < allNames.size(); i++) {                             
840                                 string rareAbund;
841                                 if (rareNames.count(allNames[i]) != 0) { //you are a rare name
842                     rareAbund = ".rare";
843                                 }else{ //you are a abund name
844                     rareAbund = ".abund";
845                                 }
846                                 
847                 vector<string> thisSeqsGroups = ct.getGroups(allNames[i]);
848                 for (int j = 0; j < thisSeqsGroups.size(); j++) {
849                     if (m->inUsersGroups(thisSeqsGroups[j], Groups)) { //only add if this is in a group we want
850                         int num = ct.getGroupCount(allNames[i], thisSeqsGroups[j]);
851                         vector<int> nums; nums.push_back(num);
852                         countTableMap[thisSeqsGroups[j]+rareAbund]->push_back(allNames[i], nums); 
853                     }
854                 }
855                         }
856                         
857                         
858                         for (it3 = countTableMap.begin(); it3 != countTableMap.end(); it3++) { 
859                 string fileroot =  outputDir + m->getRootName(m->getSimpleName(countfile));
860                 map<string, string> variables; 
861                 variables["[filename]"] = fileroot;
862                 variables["[tag]"] = it3->first;
863                 string filename = getOutputFileName("count",variables);
864                 outputNames.push_back(filename);  outputTypes["count"].push_back(filename);
865                 (it3->second)->printTable(filename);
866                                 delete it3->second;
867                         }
868                 }
869         
870                 return 0;
871         
872         }
873         catch(exception& e) {
874                 m->errorOut(e, "SplitAbundCommand", "parseCount");
875                 exit(1);
876         }
877 }
878 /**********************************************************************************************************************/
879 int SplitAbundCommand::writeNames() { //namefile
880         try {
881                 
882                 map<string, ofstream*> filehandles;
883
884                 if (Groups.size() == 0) {
885                         ofstream aout;
886                         ofstream rout;
887                         
888             map<string, string> variables;
889             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(namefile));
890             variables["[tag]"] = "rare";
891                         string rare = getOutputFileName("name", variables);
892                         m->openOutputFile(rare, rout);
893                         outputNames.push_back(rare); outputTypes["name"].push_back(rare);
894                         
895             variables["[tag]"] = "abund";
896                         string abund = getOutputFileName("name", variables);
897                         m->openOutputFile(abund, aout);
898                         outputNames.push_back(abund); outputTypes["name"].push_back(abund);
899                         
900                         if (rareNames.size() != 0) {
901                                 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
902                                         rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
903                                 }
904                         }
905                         rout.close();
906                         
907                         if (abundNames.size() != 0) {
908                                 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
909                                         aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
910                                 }
911                         }
912                         aout.close();
913                         
914                 }else{ //parse names by abundance and group
915                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(namefile));
916                         ofstream* temp;
917                         ofstream* temp2;
918                         map<string, ofstream*> filehandles;
919                         map<string, ofstream*>::iterator it3;
920
921                         for (int i=0; i<Groups.size(); i++) {
922                                 temp = new ofstream;
923                                 filehandles[Groups[i]+".rare"] = temp;
924                                 temp2 = new ofstream;
925                                 filehandles[Groups[i]+".abund"] = temp2;
926                                 
927                 map<string, string> variables;
928                 variables["[filename]"] = fileroot;
929                 variables["[tag]"] = "rare";
930                 variables["[group]"] = Groups[i];
931                 string rareGroupFileName = getOutputFileName("name",variables);
932                 variables["[tag]"] = "abund";
933                 string abundGroupFileName = getOutputFileName("name",variables);
934                                 m->openOutputFile(rareGroupFileName, *(filehandles[Groups[i]+".rare"]));
935                                 m->openOutputFile(abundGroupFileName, *(filehandles[Groups[i]+".abund"]));
936                         }
937                         
938                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
939                                 vector<string> names;
940                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
941                                 
942                                 string rareAbund;
943                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
944                                                 rareAbund = ".rare";
945                                 }else{ //you are a abund name
946                                                 rareAbund = ".abund";
947                                 }
948                                 
949                                 map<string, string> outputStrings;
950                                 map<string, string>::iterator itout;
951                                 for (int i = 0; i < names.size(); i++) {
952                                         
953                                         string group = groupMap.getGroup(names[i]);
954                                         
955                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
956                                                 itout = outputStrings.find(group+rareAbund);
957                                                 if (itout == outputStrings.end()) {  
958                                                         outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
959                                                 }else {   outputStrings[group+rareAbund] += "," + names[i]; }
960                                         }else if(group == "not found") {
961                                                 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
962                                         }
963                                 }
964                                 
965                                 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { *(filehandles[itout->first]) << itout->second << endl;     }
966                         }
967                         
968                         
969                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
970                                 (*(filehandles[it3->first])).close();
971                 map<string, string> variables;
972                 variables["[filename]"] = fileroot;
973                 variables["[tag]"] = it3->first;
974                                 outputNames.push_back(getOutputFileName("name",variables));  outputTypes["name"].push_back(getOutputFileName("name",variables));
975                                 delete it3->second;
976                         }
977                 }
978                                 
979                 return 0;
980
981         }
982         catch(exception& e) {
983                 m->errorOut(e, "SplitAbundCommand", "writeNames");
984                 exit(1);
985         }
986 }
987 /**********************************************************************************************************************/
988 //just write the unique names - if a namesfile is given
989 int SplitAbundCommand::writeAccnos(string tag) { 
990         try {
991                 
992                 map<string, ofstream*> filehandles;
993                 
994                 if (Groups.size() == 0) {
995                         ofstream aout;
996                         ofstream rout;
997                         
998             map<string, string> variables;
999             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFile));
1000             variables["[tag]"] = tag;
1001             variables["[tag2]"] = "rare";
1002                         string rare = getOutputFileName("accnos",variables);
1003                         m->openOutputFile(rare, rout);
1004                         outputNames.push_back(rare); outputTypes["accnos"].push_back(rare); 
1005                         
1006                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
1007                                 rout << (*itRare) << endl;
1008                         }
1009                         rout.close();
1010                 
1011             variables["[tag2]"] = "abund";
1012                         string abund = getOutputFileName("accnos",variables);
1013                         m->openOutputFile(abund, aout);
1014                         outputNames.push_back(abund); outputTypes["accnos"].push_back(abund);
1015                         
1016                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
1017                                 aout << (*itAbund) << endl;
1018                         }
1019                         aout.close();
1020                         
1021                 }else{ //parse names by abundance and group
1022                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(inputFile));
1023                         ofstream* temp;
1024                         ofstream* temp2;
1025                         map<string, ofstream*> filehandles;
1026                         map<string, ofstream*>::iterator it3;
1027                         
1028                         for (int i=0; i<Groups.size(); i++) {
1029                                 temp = new ofstream;
1030                                 filehandles[Groups[i]+".rare"] = temp;
1031                                 temp2 = new ofstream;
1032                                 filehandles[Groups[i]+".abund"] = temp2;
1033                                 
1034                 map<string, string> variables;
1035                 variables["[filename]"] = fileroot;
1036                 variables["[tag]"] = tag;
1037                 variables["[tag2]"] = "rare";
1038                 variables["[group]"] = Groups[i];
1039                                 m->openOutputFile(getOutputFileName("accnos",variables), *(filehandles[Groups[i]+".rare"]));
1040                 variables["[tag2]"] = "abund";
1041                                 m->openOutputFile(getOutputFileName("accnos",variables), *(filehandles[Groups[i]+".abund"]));
1042                         }
1043                         
1044                         //write rare
1045                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
1046                                         string group = groupMap.getGroup(*itRare);
1047                                         
1048                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1049                                                 *(filehandles[group+".rare"]) << *itRare << endl;
1050                                         }
1051                         }
1052                                 
1053                         //write abund   
1054                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
1055                                         string group = groupMap.getGroup(*itAbund);
1056                                         
1057                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1058                                                 *(filehandles[group+".abund"]) << *itAbund << endl;
1059                                         }
1060                         }
1061                         
1062                         //close files
1063                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
1064                                 (*(filehandles[it3->first])).close();
1065                 map<string, string> variables;
1066                 variables["[filename]"] = fileroot;
1067                 variables["[tag]"] = tag;
1068                 variables["[tag2]"] = it3->first;
1069                                 outputNames.push_back(getOutputFileName("accnos",variables));  outputTypes["accnos"].push_back(getOutputFileName("accnos",variables));
1070                                 delete it3->second;
1071                         }
1072                 }
1073                                 
1074                 return 0;
1075
1076         }
1077         catch(exception& e) {
1078                 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
1079                 exit(1);
1080         }
1081 }
1082 /**********************************************************************************************************************/
1083 int SplitAbundCommand::parseGroup(string tag) { //namefile
1084         try {
1085                 
1086                 map<string, ofstream*> filehandles;
1087         
1088                 if (Groups.size() == 0) {
1089                         ofstream aout;
1090                         ofstream rout;
1091                         
1092             map<string, string> variables;
1093             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(groupfile));
1094             variables["[tag]"] = tag;
1095             variables["[tag2]"] = "rare";
1096                         string rare = getOutputFileName("group",variables);
1097                         m->openOutputFile(rare, rout);
1098                         outputNames.push_back(rare); outputTypes["group"].push_back(rare);
1099                 
1100             variables["[tag2]"] = "abund";
1101                         string abund = getOutputFileName("group",variables);
1102 ;
1103                         m->openOutputFile(abund, aout);
1104                         outputNames.push_back(abund); outputTypes["group"].push_back(abund);
1105                         
1106                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
1107                                 vector<string> names;
1108                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
1109                                 
1110                                 for (int i = 0; i < names.size(); i++) {
1111                                 
1112                                         string group = groupMap.getGroup(names[i]);
1113                                 
1114                                         if (group == "not found") { 
1115                                                 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
1116                                         }else {
1117                                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
1118                                                         rout << names[i] << '\t' << group << endl;
1119                                                 }else{ //you are a abund name
1120                                                         aout << names[i] << '\t' << group << endl;
1121                                                 }
1122                                         }
1123                                 }
1124                         }
1125                         
1126                         rout.close(); 
1127                         aout.close(); 
1128
1129                 }else{ //parse names by abundance and group
1130                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(groupfile));
1131                         ofstream* temp;
1132                         ofstream* temp2;
1133                         map<string, ofstream*> filehandles;
1134                         map<string, ofstream*>::iterator it3;
1135
1136                         for (int i=0; i<Groups.size(); i++) {
1137                                 temp = new ofstream;
1138                                 filehandles[Groups[i]+".rare"] = temp;
1139                                 temp2 = new ofstream;
1140                                 filehandles[Groups[i]+".abund"] = temp2;
1141                                 
1142                 map<string, string> variables;
1143                 variables["[filename]"] = fileroot;
1144                 variables["[tag]"] = tag;
1145                 variables["[tag2]"] = "rare";
1146                 variables["[group]"] = Groups[i];
1147                                 m->openOutputFile(getOutputFileName("group",variables), *(filehandles[Groups[i]+".rare"]));
1148                 variables["[tag2]"] = "abund";
1149                                 m->openOutputFile(getOutputFileName("group",variables), *(filehandles[Groups[i]+".abund"]));
1150                         }
1151                         
1152                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
1153                                 vector<string> names;
1154                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
1155                                 
1156                                 string rareAbund;
1157                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
1158                                         rareAbund = ".rare";
1159                                 }else{ //you are a abund name
1160                                         rareAbund = ".abund";
1161                                 }
1162                                 
1163                                 for (int i = 0; i < names.size(); i++) {
1164                                 
1165                                         string group = groupMap.getGroup(names[i]);
1166                                                                         
1167                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1168                                                 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
1169                                         }
1170                                 }
1171                         }
1172                         
1173                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
1174                                 (*(filehandles[it3->first])).close();
1175                 map<string, string> variables;
1176                 variables["[filename]"] = fileroot;
1177                 variables["[tag]"] = tag;
1178                 variables["[tag2]"] = it3->first;
1179                                 outputNames.push_back(getOutputFileName("group",variables));  outputTypes["group"].push_back(getOutputFileName("group",variables));
1180                                 delete it3->second;
1181                         }
1182                 }
1183                                 
1184                 return 0;
1185
1186         }
1187         catch(exception& e) {
1188                 m->errorOut(e, "SplitAbundCommand", "parseGroups");
1189                 exit(1);
1190         }
1191 }
1192 /**********************************************************************************************************************/
1193 int SplitAbundCommand::parseFasta(string tag) { //namefile
1194         try {
1195                 
1196                 map<string, ofstream*> filehandles;
1197                 
1198                 if (Groups.size() == 0) {
1199                         ofstream aout;
1200                         ofstream rout;
1201                         
1202             map<string, string> variables;
1203             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
1204             variables["[tag]"] = tag;
1205             variables["[tag2]"] = "rare";
1206                         string rare = getOutputFileName("fasta",variables);
1207                         m->openOutputFile(rare, rout);
1208                         outputNames.push_back(rare); outputTypes["fasta"].push_back(rare);
1209                 
1210             variables["[tag2]"] = "abund";
1211                         string abund = getOutputFileName("fasta",variables);
1212                         m->openOutputFile(abund, aout);
1213                         outputNames.push_back(abund); outputTypes["fasta"].push_back(abund);
1214                 
1215                         //open input file
1216                         ifstream in;
1217                         m->openInputFile(fastafile, in);
1218         
1219                         while (!in.eof()) {
1220                                 if (m->control_pressed) { break; }
1221                 
1222                                 Sequence seq(in); m->gobble(in);
1223                                 
1224                                 if (seq.getName() != "") { 
1225                                         
1226                                         map<string, string>::iterator itNames;
1227                                         
1228                                         itNames = nameMap.find(seq.getName());
1229                                         
1230                                         if (itNames == nameMap.end()) {
1231                                                 m->mothurOut(seq.getName() + " is not in your names or list file, ignoring."); m->mothurOutEndLine();
1232                                         }else{
1233                                                 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
1234                                                         seq.printSequence(rout);
1235                                                 }else{ //you are a abund name
1236                                                         seq.printSequence(aout);
1237                                                 }
1238                                         }
1239                                 }
1240                         }
1241                         in.close();
1242                         rout.close(); 
1243                         aout.close(); 
1244
1245                 }else{ //parse names by abundance and group
1246                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(fastafile));
1247                         ofstream* temp;
1248                         ofstream* temp2;
1249                         map<string, ofstream*> filehandles;
1250                         map<string, ofstream*>::iterator it3;
1251
1252                         for (int i=0; i<Groups.size(); i++) {
1253                                 temp = new ofstream;
1254                                 filehandles[Groups[i]+".rare"] = temp;
1255                                 temp2 = new ofstream;
1256                                 filehandles[Groups[i]+".abund"] = temp2;
1257                                 
1258                 map<string, string> variables;
1259                 variables["[filename]"] = fileroot;
1260                 variables["[tag]"] = tag;
1261                 variables["[tag2]"] = "rare";
1262                 variables["[group]"] = Groups[i];
1263                                 m->openOutputFile(getOutputFileName("fasta",variables), *(filehandles[Groups[i]+".rare"]));
1264                 variables["[tag2]"] = "abund";
1265                                 m->openOutputFile(getOutputFileName("fasta",variables), *(filehandles[Groups[i]+".abund"]));
1266                         }
1267                         
1268                         //open input file
1269                         ifstream in;
1270                         m->openInputFile(fastafile, in);
1271         
1272                         while (!in.eof()) {
1273                                 if (m->control_pressed) { break; }
1274                 
1275                                 Sequence seq(in); m->gobble(in);
1276                                 
1277                                 if (seq.getName() != "") { 
1278                                         map<string, string>::iterator itNames = nameMap.find(seq.getName());
1279                                         
1280                                         if (itNames == nameMap.end()) {
1281                                                 m->mothurOut(seq.getName() + " is not in your names or list file, ignoring."); m->mothurOutEndLine();
1282                                         }else{
1283                                                 vector<string> names;
1284                                                 m->splitAtComma(itNames->second, names);  //parses bin into individual sequence names
1285                                 
1286                                                 string rareAbund;
1287                                                 if (rareNames.count(itNames->first) != 0) { //you are a rare name
1288                                                         rareAbund = ".rare";
1289                                                 }else{ //you are a abund name
1290                                                         rareAbund = ".abund";
1291                                                 }
1292                         
1293                         if (countfile == "") {
1294                             for (int i = 0; i < names.size(); i++) {
1295                                 string group = groupMap.getGroup(seq.getName());
1296                                 
1297                                 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1298                                     seq.printSequence(*(filehandles[group+rareAbund]));
1299                                 }else if(group == "not found") {
1300                                     m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
1301                                 }
1302                             }
1303                         }else {
1304                             vector<string> thisSeqsGroups = ct.getGroups(names[0]); //we only need names[0], because there is no namefile
1305                             for (int i = 0; i < thisSeqsGroups.size(); i++) {
1306                                 if (m->inUsersGroups(thisSeqsGroups[i], Groups)) { //only add if this is in a group we want
1307                                     seq.printSequence(*(filehandles[thisSeqsGroups[i]+rareAbund]));
1308                                 }
1309                             }
1310                         }
1311                                         }
1312                                 }
1313                         }
1314                         in.close();
1315                         
1316                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
1317                                 (*(filehandles[it3->first])).close();
1318                 map<string, string> variables;
1319                 variables["[filename]"] = fileroot;
1320                 variables["[tag]"] = tag;
1321                 variables["[tag2]"] = it3->first;
1322                                 outputNames.push_back(getOutputFileName("fasta",variables));  outputTypes["fasta"].push_back(getOutputFileName("fasta",variables));
1323                                 delete it3->second;
1324                         }
1325                 }
1326                                 
1327                 return 0;
1328
1329         }
1330         catch(exception& e) {
1331                 m->errorOut(e, "SplitAbundCommand", "parseFasta");
1332                 exit(1);
1333         }
1334 }
1335 /**********************************************************************************************************************/
1336