]> git.donarmstrong.com Git - mothur.git/blob - splitabundcommand.cpp
working on pam
[mothur.git] / splitabundcommand.cpp
1 /*
2  *  splitabundcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/17/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "splitabundcommand.h"
11 #include "sharedutilities.h"
12
13 //**********************************************************************************************************************
14 vector<string> SplitAbundCommand::setParameters(){      
15         try {           
16                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none","fasta",false,true,true); parameters.push_back(pfasta);
17         CommandParameter pname("name", "InputTypes", "", "", "NameCount", "FNGLT", "none","name",false,false,true); parameters.push_back(pname);
18         CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "none", "none","count",false,false); parameters.push_back(pcount);
19                 CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","group",false,false); parameters.push_back(pgroup);
20                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none","list",false,false,true); parameters.push_back(plist);
21                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
22                 CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "","",false,true); parameters.push_back(pcutoff);
23                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
24                 CommandParameter paccnos("accnos", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(paccnos);
25                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
26                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
27                 
28                 vector<string> myArray;
29                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
30                 return myArray;
31         }
32         catch(exception& e) {
33                 m->errorOut(e, "SplitAbundCommand", "setParameters");
34                 exit(1);
35         }
36 }
37 //**********************************************************************************************************************
38 string SplitAbundCommand::getHelpString(){      
39         try {
40                 string helpString = "";
41                 helpString += "The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n";
42                 helpString += "The split.abund command parameters are fasta, list, name, count, cutoff, group, label, groups, cutoff and accnos.\n";
43                 helpString += "The fasta and a list or name or count parameter are required, and you must provide a cutoff value.\n";
44                 helpString += "The cutoff parameter is used to qualify what is abundant and rare.\n";
45                 helpString += "The group parameter allows you to parse a group file into rare and abundant groups.\n";
46                 helpString += "The label parameter is used to read specific labels in your listfile you want to use.\n";
47                 helpString += "The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n";
48                 helpString += "The groups parameter allows you to parse the files into rare and abundant files by group.  \n";
49                 helpString += "For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files.  \n";
50                 helpString += "If you want .abund and .rare files for all groups, set groups=all.  \n";
51                 helpString += "The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n";
52                 helpString += "Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n";
53                 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n";
54                 return helpString;
55         }
56         catch(exception& e) {
57                 m->errorOut(e, "SplitAbundCommand", "getHelpString");
58                 exit(1);
59         }
60 }
61
62 //**********************************************************************************************************************
63 string SplitAbundCommand::getOutputPattern(string type) {
64     try {
65         string pattern = "";
66         
67         if (type == "fasta")        {  pattern = "[filename],[tag],[tag2],fasta-[filename],[tag],[group],[tag2],fasta";            } 
68         else if (type == "list")    {   pattern = "[filename],[tag],[tag2],list-[filename],[group],[tag],[tag2],list";            }
69         else if (type == "name")    {   pattern = "[filename],[tag],names-[filename],[group],[tag],names";           }
70         else if (type == "count")   {   pattern = "[filename],[tag],[tag2],count_table-[filename],[tag],count_table";     }
71         else if (type == "group")   {   pattern = "[filename],[tag],[tag2],groups-[filename],[tag],[group],[tag2],groups";          }
72         else if (type == "accnos")  {   pattern = "[filename],[tag],[tag2],accnos-[filename],[tag],[group],[tag2],accnos";          }
73         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
74         
75         return pattern;
76     }
77     catch(exception& e) {
78         m->errorOut(e, "SplitAbundCommand", "getOutputPattern");
79         exit(1);
80     }
81 }
82 //**********************************************************************************************************************
83 SplitAbundCommand::SplitAbundCommand(){ 
84         try {
85                 abort = true; calledHelp = true; 
86                 setParameters();
87                 vector<string> tempOutNames;
88                 outputTypes["list"] = tempOutNames;
89                 outputTypes["name"] = tempOutNames;
90         outputTypes["count"] = tempOutNames;
91                 outputTypes["accnos"] = tempOutNames;
92                 outputTypes["group"] = tempOutNames;
93                 outputTypes["fasta"] = tempOutNames;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101 SplitAbundCommand::SplitAbundCommand(string option)  {
102         try {
103                 abort = false; calledHelp = false;   
104                 allLines = 1;
105                         
106                 //allow user to run help
107                 if(option == "help") { help(); abort = true; calledHelp = true; }
108                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
109                 else {
110                         vector<string> myArray = setParameters();
111                         
112                         OptionParser parser(option);
113                         map<string, string> parameters = parser.getParameters();
114                         
115                         ValidParameters validParameter;
116                         map<string, string>::iterator it;
117                 
118                         //check to make sure all parameters are valid for command
119                         for (it = parameters.begin(); it != parameters.end(); it++) { 
120                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
121                         }
122                         
123                         //initialize outputTypes
124                         vector<string> tempOutNames;
125                         outputTypes["list"] = tempOutNames;
126                         outputTypes["name"] = tempOutNames;
127                         outputTypes["accnos"] = tempOutNames;
128                         outputTypes["group"] = tempOutNames;
129                         outputTypes["fasta"] = tempOutNames;    
130             outputTypes["count"] = tempOutNames;
131                                                                                                 
132                         //if the user changes the input directory command factory will send this info to us in the output parameter 
133                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
134                         if (inputDir == "not found"){   inputDir = "";          }
135                         else {
136                                 string path;
137                                 it = parameters.find("list");
138                                 //user has given a template file
139                                 if(it != parameters.end()){ 
140                                         path = m->hasPath(it->second);
141                                         //if the user has not given a path then, add inputdir. else leave path alone.
142                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
143                                 }
144                                 
145                                 it = parameters.find("group");
146                                 //user has given a template file
147                                 if(it != parameters.end()){ 
148                                         path = m->hasPath(it->second);
149                                         //if the user has not given a path then, add inputdir. else leave path alone.
150                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
151                                 }
152                                 
153                                 it = parameters.find("fasta");
154                                 //user has given a template file
155                                 if(it != parameters.end()){ 
156                                         path = m->hasPath(it->second);
157                                         //if the user has not given a path then, add inputdir. else leave path alone.
158                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
159                                 }
160                                 
161                                 it = parameters.find("name");
162                                 //user has given a template file
163                                 if(it != parameters.end()){ 
164                                         path = m->hasPath(it->second);
165                                         //if the user has not given a path then, add inputdir. else leave path alone.
166                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
167                                 }
168
169                 it = parameters.find("count");
170                                 //user has given a template file
171                                 if(it != parameters.end()){ 
172                                         path = m->hasPath(it->second);
173                                         //if the user has not given a path then, add inputdir. else leave path alone.
174                                         if (path == "") {       parameters["count"] = inputDir + it->second;            }
175                                 }
176                         }
177
178                         
179                         //if the user changes the output directory command factory will send this info to us in the output parameter 
180                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
181
182                         //check for required parameters
183                         listfile = validParameter.validFile(parameters, "list", true);
184                         if (listfile == "not open") { abort = true; }
185                         else if (listfile == "not found") { listfile = ""; }
186                         else{ inputFile = listfile; m->setListFile(listfile); } 
187                         
188                         namefile = validParameter.validFile(parameters, "name", true);
189                         if (namefile == "not open") { abort = true; }
190                         else if (namefile == "not found") { namefile = ""; }    
191                         else{ inputFile = namefile; m->setNameFile(namefile); } 
192                 
193                         fastafile = validParameter.validFile(parameters, "fasta", true);
194                         if (fastafile == "not open") { abort = true; }
195                         else if (fastafile == "not found") {                            
196                                 fastafile = m->getFastaFile(); 
197                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
198                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
199                         }else { m->setFastaFile(fastafile); }   
200                         
201                         groupfile = validParameter.validFile(parameters, "group", true);
202                         if (groupfile == "not open") {  groupfile = ""; abort = true; } 
203                         else if (groupfile == "not found") { groupfile = ""; }
204                         else {  
205                                 int error = groupMap.readMap(groupfile);
206                                 if (error == 1) { abort = true; }
207                                 m->setGroupFile(groupfile);
208                         }
209                         
210             countfile = validParameter.validFile(parameters, "count", true);
211                         if (countfile == "not open") { countfile = ""; abort = true; }
212                         else if (countfile == "not found") { countfile = "";  } 
213                         else {
214                 m->setCountTableFile(countfile); 
215                 ct.readTable(countfile, true, false);
216             }
217             
218             if ((namefile != "") && (countfile != "")) {
219                 m->mothurOut("[ERROR]: you may only use one of the following: name or count."); m->mothurOutEndLine(); abort = true;
220             }
221                         
222             if ((groupfile != "") && (countfile != "")) {
223                 m->mothurOut("[ERROR]: you may only use one of the following: group or count."); m->mothurOutEndLine(); abort=true;
224             }
225             
226                         groups = validParameter.validFile(parameters, "groups", false);         
227                         if (groups == "not found") { groups = ""; }
228                         else { m->splitAtDash(groups, Groups); }
229                         
230                         if (((groupfile == "") && (countfile == ""))&& (groups != "")) {  m->mothurOut("You cannot select groups without a valid group or count file, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";  Groups.clear(); }
231                         
232             if (countfile != "") {
233                 if (!ct.hasGroupInfo()) { m->mothurOut("You cannot pick groups without group info in your count file; I will disregard your groups selection."); m->mothurOutEndLine(); groups = "";  Groups.clear(); }
234             }
235             
236                         //do you have all files needed
237                         if ((listfile == "") && (namefile == "") && (countfile == "")) { 
238                                 namefile = m->getNameFile(); 
239                                 if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
240                                 else {                          
241                                         listfile = m->getListFile(); 
242                                         if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
243                                         else {  
244                         countfile  = m->getCountTableFile(); 
245                         if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
246                         else {  m->mothurOut("You have no current list, count or namefile and one is required."); m->mothurOutEndLine(); abort = true; }
247                     }
248                                 }
249                         }
250             
251                         //check for optional parameter and set defaults
252                         // ...at some point should added some additional type checking...
253                         label = validParameter.validFile(parameters, "label", false);                   
254                         if (label == "not found") { label = "";  allLines = 1; }
255                         else { 
256                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
257                                 else { allLines = 1;  }
258                         }
259                         
260                         string temp = validParameter.validFile(parameters, "accnos", false);            if (temp == "not found") { temp = "F"; }
261                         accnos = m->isTrue(temp); 
262                         
263                         temp = validParameter.validFile(parameters, "cutoff", false);                           if (temp == "not found") { temp = "0"; }
264                         m->mothurConvert(temp, cutoff); 
265
266                         if (cutoff == 0) {  m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
267                 }
268
269         }
270         catch(exception& e) {
271                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
272                 exit(1);
273         }
274 }
275 //**********************************************************************************************************************
276 SplitAbundCommand::~SplitAbundCommand(){}
277 //**********************************************************************************************************************
278 int SplitAbundCommand::execute(){
279         try {
280         
281                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
282         
283         if (Groups.size() != 0) {
284             vector<string> allGroups;
285             if (countfile != "") { allGroups = ct.getNamesOfGroups(); }
286             else { allGroups = groupMap.getNamesOfGroups(); }
287             SharedUtil util;
288             util.setGroups(Groups, allGroups);
289         }
290                 
291                 if (listfile != "") { //you are using a listfile to determine abundance
292                         if (outputDir == "") { outputDir = m->hasPath(listfile); }
293                         
294                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
295                         set<string> processedLabels;
296                         set<string> userLabels = labels;        
297                         
298                         InputData input(listfile, "list");
299                         ListVector* list = input.getListVector();
300                         string lastLabel = list->getLabel();
301                         
302                         //do you have a namefile or do we need to similate one?
303                         if (namefile != "") {  readNamesFile();         }
304                         else                            { createNameMap(list);  }
305                         
306                         if (m->control_pressed) { delete list; for (int i = 0; i < outputNames.size(); i++) {   m->mothurRemove(outputNames[i]); } return 0; }
307                         
308                         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
309                         
310                                 if (m->control_pressed) {  delete list; for (int i = 0; i < outputNames.size(); i++) {  m->mothurRemove(outputNames[i]); } return 0; }
311                                 
312                                 if(allLines == 1 || labels.count(list->getLabel()) == 1){
313                                                 
314                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
315                                                 splitList(list);
316                                                                                         
317                                                 processedLabels.insert(list->getLabel());
318                                                 userLabels.erase(list->getLabel());
319                                 }
320                                 
321                                 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
322                                                 string saveLabel = list->getLabel();
323                                                 
324                                                 delete list;
325                                                 list = input.getListVector(lastLabel); //get new list vector to process
326                                                 
327                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
328                                                 splitList(list);
329                                                 
330                                                 processedLabels.insert(list->getLabel());
331                                                 userLabels.erase(list->getLabel());
332                                                 
333                                                 //restore real lastlabel to save below
334                                                 list->setLabel(saveLabel);
335                                 }
336                                 
337                         
338                                 lastLabel = list->getLabel();
339                                         
340                                 delete list;
341                                 list = input.getListVector(); //get new list vector to process
342                         }
343                         
344                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
345                         
346                         //output error messages about any remaining user labels
347                         set<string>::iterator it;
348                         bool needToRun = false;
349                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
350                                 m->mothurOut("Your file does not include the label " + *it); 
351                                 if (processedLabels.count(lastLabel) != 1) {
352                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
353                                         needToRun = true;
354                                 }else {
355                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
356                                 }
357
358                         }
359                         
360                         if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       m->mothurRemove(outputNames[i]); } return 0; }
361                         
362                         //run last label if you need to
363                         if (needToRun == true)  {
364                                 if (list != NULL) {     delete list;    }
365                                 list = input.getListVector(lastLabel); //get new list vector to process
366                                 
367                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
368                                 splitList(list);                
369                                 
370                                 delete list;
371                         }
372                         
373                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }      return 0;       }
374                                                                         
375                 }else if (namefile != "") { //you are using the namefile to determine abundance
376                         if (outputDir == "") { outputDir = m->hasPath(namefile); }
377                         
378                         splitNames(); 
379                         writeNames();
380                         
381                         string tag = "";
382                         if (groupfile != "")                            {  parseGroup(tag);             }
383                         if (accnos)                                                     {  writeAccnos(tag);    }
384                         if (fastafile != "")                            {  parseFasta(tag);             }
385                 }else {
386             //split by countfile
387             string tag = "";
388             splitCount();
389             
390                         if (accnos)                                                     {  writeAccnos(tag);    }
391                         if (fastafile != "")                            {  parseFasta(tag);             }
392         }
393                 
394                 //set fasta file as new current fastafile
395                 string current = "";
396                 itTypes = outputTypes.find("fasta");
397                 if (itTypes != outputTypes.end()) {
398                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
399                 }
400                 
401                 itTypes = outputTypes.find("name");
402                 if (itTypes != outputTypes.end()) {
403                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
404                 }
405                 
406                 itTypes = outputTypes.find("group");
407                 if (itTypes != outputTypes.end()) {
408                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
409                 }
410                 
411                 itTypes = outputTypes.find("list");
412                 if (itTypes != outputTypes.end()) {
413                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
414                 }
415                 
416                 itTypes = outputTypes.find("accnos");
417                 if (itTypes != outputTypes.end()) {
418                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
419                 }
420         
421         itTypes = outputTypes.find("count");
422                 if (itTypes != outputTypes.end()) {
423                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setCountTableFile(current); }
424                 }
425                 
426                 m->mothurOutEndLine();
427                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
428                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
429                 m->mothurOutEndLine();
430                 
431                 return 0;
432         }
433         catch(exception& e) {
434                 m->errorOut(e, "SplitAbundCommand", "execute");
435                 exit(1);
436         }
437 }
438 /**********************************************************************************************************************/
439 int SplitAbundCommand::splitList(ListVector* thisList) {
440         try {
441                 rareNames.clear();
442                 abundNames.clear();
443                 
444                 //get rareNames and abundNames
445         int numRareBins = 0;
446                 for (int i = 0; i < thisList->getNumBins(); i++) {
447                         if (m->control_pressed) { return 0; }
448                         
449                         string bin = thisList->get(i);
450                                                 
451                         vector<string> names;
452                         m->splitAtComma(bin, names);  //parses bin into individual sequence names
453                         int size = names.size();
454             
455             //if countfile is not blank we assume the list file is unique, otherwise we assume it includes all seqs
456             if (countfile != "") {
457                 size = 0;
458                 for (int j = 0; j < names.size(); j++) {  size += ct.getNumSeqs(names[j]); }
459             }
460             
461                         if (size <= cutoff) {
462                 numRareBins++;
463                                 for (int j = 0; j < names.size(); j++) {  rareNames.insert(names[j]);  }
464                         }else{
465                                 for (int j = 0; j < names.size(); j++) {  abundNames.insert(names[j]);  }
466                         }
467                 }//end for
468
469                 
470                 string tag = thisList->getLabel();
471        
472                 writeList(thisList, tag, numRareBins);
473     
474                 if (groupfile != "")                            {  parseGroup(tag);             }
475                 if (accnos)                                                     {  writeAccnos(tag);    }
476                 if (fastafile != "")                            {  parseFasta(tag);             }
477         if (countfile != "")                            {  parseCount(tag);             }
478         
479                 return 0;
480
481         }
482         catch(exception& e) {
483                 m->errorOut(e, "SplitAbundCommand", "splitList");
484                 exit(1);
485         }
486 }
487 /**********************************************************************************************************************/
488 int SplitAbundCommand::writeList(ListVector* thisList, string tag, int numRareBins) { 
489         try {
490                 
491                 map<string, ofstream*> filehandles;
492                 
493                 if (Groups.size() == 0) {
494                         int numAbundBins = thisList->getNumBins() - numRareBins;
495
496                         ofstream aout;
497                         ofstream rout;
498                         
499             map<string, string> variables; 
500             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile));
501             variables["[tag]"] = tag;
502             variables["[tag2]"] = "rare";
503                         string rare = getOutputFileName("list",variables);
504                         m->openOutputFile(rare+".temp", rout);
505                         outputNames.push_back(rare); outputTypes["list"].push_back(rare);
506                         
507             variables["[tag2]"] = "abund";
508                         string abund = getOutputFileName("list",variables);
509                         m->openOutputFile(abund+".temp", aout);
510                         outputNames.push_back(abund); outputTypes["list"].push_back(abund);
511
512                         if (rareNames.size() != 0)      {  rout << thisList->getLabel() << '\t' << numRareBins << '\t';         }
513                         if (abundNames.size() != 0) {   aout << thisList->getLabel() << '\t' << numAbundBins << '\t';   }
514             
515             vector<string> binLabels = thisList->getLabels();
516             string rareHeader = "label\tnumOtus\t"; string abundHeader = "label\tnumOtus\t";
517                         for (int i = 0; i < thisList->getNumBins(); i++) {
518                                 if (m->control_pressed) { break; }
519                         
520                                 string bin = thisList->get(i); 
521                 vector<string> names;
522                 m->splitAtComma(bin, names);
523                 
524                                 int size = names.size();
525                 if (countfile != "") {
526                     size = 0;
527                     for (int j = 0; j < names.size(); j++) {  size += ct.getNumSeqs(names[j]); }
528                 }
529                         
530                                 if (size <= cutoff) {  rout << bin << '\t';  rareHeader += binLabels[i] + '\t'; }
531                                 else                            {  aout << bin << '\t';  abundHeader += binLabels[i] + '\t'; }
532                         }
533                         
534                         if (rareNames.size() != 0)      { rout << endl; }
535                         if (abundNames.size() != 0) { aout << endl; }
536                         
537                         rout.close();
538                         aout.close();
539             
540             //add headers
541             ofstream r;
542             m->openOutputFile(rare, r);
543             r << rareHeader << endl;
544             r.close();
545             m->appendFiles(rare+".temp", rare);
546             m->mothurRemove(rare+".temp");
547             
548             ofstream a;
549             m->openOutputFile(abund, a);
550             a << abundHeader << endl;
551             a.close();
552             m->appendFiles(abund+".temp", abund);
553             m->mothurRemove(abund+".temp");
554                         
555                 }else{ //parse names by abundance and group
556                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(listfile));
557                         ofstream* temp;
558                         ofstream* temp2;
559                         //map<string, bool> wroteFile;
560                         map<string, ofstream*> filehandles;
561                         map<string, ofstream*>::iterator it3;
562
563                         for (int i=0; i<Groups.size(); i++) {
564                                 temp = new ofstream;
565                                 filehandles[Groups[i]+".rare"] = temp;
566                                 temp2 = new ofstream;
567                                 filehandles[Groups[i]+".abund"] = temp2;
568                                 
569                 map<string, string> variables; 
570                 variables["[filename]"] = fileroot;
571                 variables["[tag]"] = tag;
572                 variables["[tag2]"] = "rare";
573                 variables["[group]"] = Groups[i];
574                 string rareGroupFileName = getOutputFileName("list",variables);
575                 variables["[tag2]"] = "abund";
576                 string abundGroupFileName = getOutputFileName("list",variables);
577                                 m->openOutputFile(rareGroupFileName, *(filehandles[Groups[i]+".rare"]));
578                                 m->openOutputFile(abundGroupFileName, *(filehandles[Groups[i]+".abund"]));
579                                 outputNames.push_back(rareGroupFileName); outputTypes["list"].push_back(rareGroupFileName);
580                                 outputNames.push_back(abundGroupFileName); outputTypes["list"].push_back(abundGroupFileName);
581                         }
582                         
583                         map<string, string> groupVector;
584             map<string, string> groupLabels;
585                         map<string, string>::iterator itGroup;
586                         map<string, int> groupNumBins;
587                 
588                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
589                                 groupNumBins[it3->first] = 0;
590                                 groupVector[it3->first] = "";
591                 groupLabels[it3->first] = "label\tnumOtus\t";
592                         }
593             vector<string> binLabels = thisList->getLabels();
594                         for (int i = 0; i < thisList->getNumBins(); i++) {
595                                 if (m->control_pressed) { break; }
596                         
597                                 map<string, string> groupBins;
598                                 string bin = thisList->get(i); 
599                         
600                                 vector<string> names;
601                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
602                         
603                                 //parse bin into list of sequences in each group
604                                 for (int j = 0; j < names.size(); j++) {
605                                         string rareAbund;
606                                         if (rareNames.count(names[j]) != 0) { //you are a rare name
607                                                 rareAbund = ".rare";
608                                         }else{ //you are a abund name
609                                                 rareAbund = ".abund";
610                                         }
611                                         
612                     if (countfile == "") {
613                         string group = groupMap.getGroup(names[j]);
614                         
615                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
616                             itGroup = groupBins.find(group+rareAbund);
617                             if(itGroup == groupBins.end()) {
618                                 groupBins[group+rareAbund] = names[j];  //add first name
619                                 groupNumBins[group+rareAbund]++;
620                             }else{ //add another name
621                                 groupBins[group+rareAbund] +=  "," + names[j];
622                             }
623                         }else if(group == "not found") {
624                             m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
625                         }
626                     }else {
627                         vector<string> thisSeqsGroups = ct.getGroups(names[j]);
628                         for (int k = 0; k < thisSeqsGroups.size(); k++) {
629                             if (m->inUsersGroups(thisSeqsGroups[k], Groups)) { //only add if this is in a group we want
630                                 itGroup = groupBins.find(thisSeqsGroups[k]+rareAbund);
631                                 if(itGroup == groupBins.end()) {
632                                     groupBins[thisSeqsGroups[k]+rareAbund] = names[j];  //add first name
633                                     groupNumBins[thisSeqsGroups[k]+rareAbund]++;
634                                 }else{ //add another name
635                                     groupBins[thisSeqsGroups[k]+rareAbund] +=  "," + names[j];
636                                 }
637                             }
638                         }
639                     }
640                                 }
641                         
642                         
643                                 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
644                                         groupVector[itGroup->first] +=  itGroup->second + '\t';
645                     groupLabels[itGroup->first] += binLabels[i] + '\t';
646                                 }
647                         }
648                         
649                         //end list vector
650                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
651                 (*(filehandles[it3->first])) << groupLabels[it3->first] << endl;
652                                 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl;  // label numBins  listvector for that group
653                                 (*(filehandles[it3->first])).close();
654                                 delete it3->second;
655                         }
656                 }
657                 
658                 return 0;
659
660         }
661         catch(exception& e) {
662                 m->errorOut(e, "SplitAbundCommand", "writeList");
663                 exit(1);
664         }
665 }
666 /**********************************************************************************************************************/
667 int SplitAbundCommand::splitCount() { //countfile
668         try {
669                 rareNames.clear();
670                 abundNames.clear();     
671         
672                 vector<string> allNames = ct.getNamesOfSeqs();
673         for (int i = 0; i < allNames.size(); i++) {
674             
675             if (m->control_pressed) { return 0; }
676             
677             int size = ct.getNumSeqs(allNames[i]);
678             nameMap[allNames[i]] = allNames[i];
679             
680                         if (size <= cutoff) {
681                                 rareNames.insert(allNames[i]); 
682                         }else{
683                                 abundNames.insert(allNames[i]); 
684                         }
685                 }
686         
687         //write out split count files
688         parseCount("");
689                 
690                 return 0;  
691         }
692         catch(exception& e) {
693                 m->errorOut(e, "SplitAbundCommand", "splitCount");
694                 exit(1);
695         }
696 }
697 /**********************************************************************************************************************/
698 int SplitAbundCommand::splitNames() { //namefile
699         try {
700                 
701                 rareNames.clear();
702                 abundNames.clear();     
703                         
704                 //open input file
705                 ifstream in;
706                 m->openInputFile(namefile, in);
707                 
708                 while (!in.eof()) {
709                         if (m->control_pressed) { break; }
710                         
711                         string firstCol, secondCol;
712                         in >> firstCol >> secondCol; m->gobble(in);
713                         
714                         nameMap[firstCol] = secondCol;
715                         
716                         int size = m->getNumNames(secondCol);
717                                 
718                         if (size <= cutoff) {
719                                 rareNames.insert(firstCol); 
720                         }else{
721                                 abundNames.insert(firstCol); 
722                         }
723                 }
724                 in.close();
725                                 
726                 return 0;
727
728         }
729         catch(exception& e) {
730                 m->errorOut(e, "SplitAbundCommand", "splitNames");
731                 exit(1);
732         }
733 }
734 /**********************************************************************************************************************/
735 int SplitAbundCommand::readNamesFile() { 
736         try {
737                 //open input file
738                 ifstream in;
739                 m->openInputFile(namefile, in);
740                 
741                 while (!in.eof()) {
742                         if (m->control_pressed) { break; }
743                         
744                         string firstCol, secondCol;
745                         in >> firstCol >> secondCol; m->gobble(in);
746                         
747                         nameMap[firstCol] = secondCol;
748                 }
749                 in.close();
750                                 
751                 return 0;
752
753         }
754         catch(exception& e) {
755                 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
756                 exit(1);
757         }
758 }
759 /**********************************************************************************************************************/
760 int SplitAbundCommand::createNameMap(ListVector* thisList) {
761         try {
762                 
763                 if (thisList != NULL) {
764                         for (int i = 0; i < thisList->getNumBins(); i++) {
765                                 if (m->control_pressed) { return 0; }
766                                 
767                                 string bin = thisList->get(i);
768                                                         
769                                 vector<string> names;
770                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
771                                 
772                                 for (int j = 0; j < names.size(); j++) {  nameMap[names[j]] = names[j];  }
773                         }//end for
774                 }
775                 
776                 return 0;
777         }
778         catch(exception& e) {
779                 m->errorOut(e, "SplitAbundCommand", "createNameMap");
780                 exit(1);
781         }
782 }
783 /**********************************************************************************************************************/
784 int SplitAbundCommand::parseCount(string tag) { //namefile
785         try {
786                 
787                 map<string, ofstream*> filehandles;
788         
789                 if (Groups.size() == 0) {
790             map<string, string> variables; 
791             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(countfile));
792             variables["[tag]"] = tag;
793             variables["[tag2]"] = "rare";
794                         string rare = getOutputFileName("count",variables);
795             outputNames.push_back(rare); outputTypes["count"].push_back(rare);
796                         variables["[tag2]"] = "abund";
797                         string abund = getOutputFileName("count",variables);
798                         outputNames.push_back(abund); outputTypes["count"].push_back(abund);
799                         
800             CountTable rareTable;
801             CountTable abundTable;
802             if (ct.hasGroupInfo()) {  
803                 vector<string> ctGroups = ct.getNamesOfGroups();
804                 for (int i = 0; i < ctGroups.size(); i++) {  rareTable.addGroup(ctGroups[i]);  abundTable.addGroup(ctGroups[i]); }
805             }
806             
807                         if (rareNames.size() != 0) {
808                                 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
809                     if (ct.hasGroupInfo()) {
810                         vector<int> groupCounts = ct.getGroupCounts(*itRare);
811                         rareTable.push_back(*itRare, groupCounts);
812                     }else {
813                         int groupCounts = ct.getNumSeqs(*itRare);
814                         rareTable.push_back(*itRare, groupCounts);
815                     }
816                                 }
817                 if (rareTable.hasGroupInfo()) {
818                     vector<string> ctGroups = rareTable.getNamesOfGroups();
819                     for (int i = 0; i < ctGroups.size(); i++) { 
820                         if (rareTable.getGroupCount(ctGroups[i]) == 0) { rareTable.removeGroup(ctGroups[i]); }
821                     }
822                 }
823                 rareTable.printTable(rare);
824                         }
825                         
826             
827                         if (abundNames.size() != 0) {
828                                 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
829                                         if (ct.hasGroupInfo()) {
830                         vector<int> groupCounts = ct.getGroupCounts(*itAbund);
831                         abundTable.push_back(*itAbund, groupCounts);
832                     }else {
833                         int groupCounts = ct.getNumSeqs(*itAbund);
834                         abundTable.push_back(*itAbund, groupCounts);
835                     }
836                                 }
837                 if (abundTable.hasGroupInfo()) {
838                     vector<string> ctGroups = abundTable.getNamesOfGroups();
839                     for (int i = 0; i < ctGroups.size(); i++) { 
840                         if (abundTable.getGroupCount(ctGroups[i]) == 0) { abundTable.removeGroup(ctGroups[i]); }
841                     }
842                 }
843                 abundTable.printTable(abund);
844                         }
845                         
846                 }else{ //parse names by abundance and group
847                         map<string, CountTable*> countTableMap;
848                         map<string, CountTable*>::iterator it3;
849             
850                         for (int i=0; i<Groups.size(); i++) {
851                                 CountTable* rareCt = new CountTable();
852                 rareCt->addGroup(Groups[i]);
853                                 countTableMap[Groups[i]+".rare"] = rareCt;
854                                 CountTable* abundCt = new CountTable();
855                 abundCt->addGroup(Groups[i]);
856                                 countTableMap[Groups[i]+".abund"] = abundCt;
857                         }
858                         
859             vector<string> allNames = ct.getNamesOfSeqs();
860                         for (int i = 0; i < allNames.size(); i++) {                             
861                                 string rareAbund;
862                                 if (rareNames.count(allNames[i]) != 0) { //you are a rare name
863                     rareAbund = ".rare";
864                                 }else{ //you are a abund name
865                     rareAbund = ".abund";
866                                 }
867                                 
868                 vector<string> thisSeqsGroups = ct.getGroups(allNames[i]);
869                 for (int j = 0; j < thisSeqsGroups.size(); j++) {
870                     if (m->inUsersGroups(thisSeqsGroups[j], Groups)) { //only add if this is in a group we want
871                         int num = ct.getGroupCount(allNames[i], thisSeqsGroups[j]);
872                         vector<int> nums; nums.push_back(num);
873                         countTableMap[thisSeqsGroups[j]+rareAbund]->push_back(allNames[i], nums); 
874                     }
875                 }
876                         }
877                         
878                         
879                         for (it3 = countTableMap.begin(); it3 != countTableMap.end(); it3++) { 
880                 string fileroot =  outputDir + m->getRootName(m->getSimpleName(countfile));
881                 map<string, string> variables; 
882                 variables["[filename]"] = fileroot;
883                 variables["[tag]"] = it3->first;
884                 string filename = getOutputFileName("count",variables);
885                 outputNames.push_back(filename);  outputTypes["count"].push_back(filename);
886                 (it3->second)->printTable(filename);
887                                 delete it3->second;
888                         }
889                 }
890         
891                 return 0;
892         
893         }
894         catch(exception& e) {
895                 m->errorOut(e, "SplitAbundCommand", "parseCount");
896                 exit(1);
897         }
898 }
899 /**********************************************************************************************************************/
900 int SplitAbundCommand::writeNames() { //namefile
901         try {
902                 
903                 map<string, ofstream*> filehandles;
904
905                 if (Groups.size() == 0) {
906                         ofstream aout;
907                         ofstream rout;
908                         
909             map<string, string> variables;
910             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(namefile));
911             variables["[tag]"] = "rare";
912                         string rare = getOutputFileName("name", variables);
913                         m->openOutputFile(rare, rout);
914                         outputNames.push_back(rare); outputTypes["name"].push_back(rare);
915                         
916             variables["[tag]"] = "abund";
917                         string abund = getOutputFileName("name", variables);
918                         m->openOutputFile(abund, aout);
919                         outputNames.push_back(abund); outputTypes["name"].push_back(abund);
920                         
921                         if (rareNames.size() != 0) {
922                                 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
923                                         rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
924                                 }
925                         }
926                         rout.close();
927                         
928                         if (abundNames.size() != 0) {
929                                 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
930                                         aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
931                                 }
932                         }
933                         aout.close();
934                         
935                 }else{ //parse names by abundance and group
936                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(namefile));
937                         ofstream* temp;
938                         ofstream* temp2;
939                         map<string, ofstream*> filehandles;
940                         map<string, ofstream*>::iterator it3;
941
942                         for (int i=0; i<Groups.size(); i++) {
943                                 temp = new ofstream;
944                                 filehandles[Groups[i]+".rare"] = temp;
945                                 temp2 = new ofstream;
946                                 filehandles[Groups[i]+".abund"] = temp2;
947                                 
948                 map<string, string> variables;
949                 variables["[filename]"] = fileroot;
950                 variables["[tag]"] = "rare";
951                 variables["[group]"] = Groups[i];
952                 string rareGroupFileName = getOutputFileName("name",variables);
953                 variables["[tag]"] = "abund";
954                 string abundGroupFileName = getOutputFileName("name",variables);
955                                 m->openOutputFile(rareGroupFileName, *(filehandles[Groups[i]+".rare"]));
956                                 m->openOutputFile(abundGroupFileName, *(filehandles[Groups[i]+".abund"]));
957                         }
958                         
959                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
960                                 vector<string> names;
961                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
962                                 
963                                 string rareAbund;
964                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
965                                                 rareAbund = ".rare";
966                                 }else{ //you are a abund name
967                                                 rareAbund = ".abund";
968                                 }
969                                 
970                                 map<string, string> outputStrings;
971                                 map<string, string>::iterator itout;
972                                 for (int i = 0; i < names.size(); i++) {
973                                         
974                                         string group = groupMap.getGroup(names[i]);
975                                         
976                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
977                                                 itout = outputStrings.find(group+rareAbund);
978                                                 if (itout == outputStrings.end()) {  
979                                                         outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
980                                                 }else {   outputStrings[group+rareAbund] += "," + names[i]; }
981                                         }else if(group == "not found") {
982                                                 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
983                                         }
984                                 }
985                                 
986                                 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { *(filehandles[itout->first]) << itout->second << endl;     }
987                         }
988                         
989                         
990                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
991                                 (*(filehandles[it3->first])).close();
992                 map<string, string> variables;
993                 variables["[filename]"] = fileroot;
994                 variables["[tag]"] = it3->first;
995                                 outputNames.push_back(getOutputFileName("name",variables));  outputTypes["name"].push_back(getOutputFileName("name",variables));
996                                 delete it3->second;
997                         }
998                 }
999                                 
1000                 return 0;
1001
1002         }
1003         catch(exception& e) {
1004                 m->errorOut(e, "SplitAbundCommand", "writeNames");
1005                 exit(1);
1006         }
1007 }
1008 /**********************************************************************************************************************/
1009 //just write the unique names - if a namesfile is given
1010 int SplitAbundCommand::writeAccnos(string tag) { 
1011         try {
1012                 
1013                 map<string, ofstream*> filehandles;
1014                 
1015                 if (Groups.size() == 0) {
1016                         ofstream aout;
1017                         ofstream rout;
1018                         
1019             map<string, string> variables;
1020             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(inputFile));
1021             variables["[tag]"] = tag;
1022             variables["[tag2]"] = "rare";
1023                         string rare = getOutputFileName("accnos",variables);
1024                         m->openOutputFile(rare, rout);
1025                         outputNames.push_back(rare); outputTypes["accnos"].push_back(rare); 
1026                         
1027                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
1028                                 rout << (*itRare) << endl;
1029                         }
1030                         rout.close();
1031                 
1032             variables["[tag2]"] = "abund";
1033                         string abund = getOutputFileName("accnos",variables);
1034                         m->openOutputFile(abund, aout);
1035                         outputNames.push_back(abund); outputTypes["accnos"].push_back(abund);
1036                         
1037                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
1038                                 aout << (*itAbund) << endl;
1039                         }
1040                         aout.close();
1041                         
1042                 }else{ //parse names by abundance and group
1043                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(inputFile));
1044                         ofstream* temp;
1045                         ofstream* temp2;
1046                         map<string, ofstream*> filehandles;
1047                         map<string, ofstream*>::iterator it3;
1048                         
1049                         for (int i=0; i<Groups.size(); i++) {
1050                                 temp = new ofstream;
1051                                 filehandles[Groups[i]+".rare"] = temp;
1052                                 temp2 = new ofstream;
1053                                 filehandles[Groups[i]+".abund"] = temp2;
1054                                 
1055                 map<string, string> variables;
1056                 variables["[filename]"] = fileroot;
1057                 variables["[tag]"] = tag;
1058                 variables["[tag2]"] = "rare";
1059                 variables["[group]"] = Groups[i];
1060                                 m->openOutputFile(getOutputFileName("accnos",variables), *(filehandles[Groups[i]+".rare"]));
1061                 variables["[tag2]"] = "abund";
1062                                 m->openOutputFile(getOutputFileName("accnos",variables), *(filehandles[Groups[i]+".abund"]));
1063                         }
1064                         
1065                         //write rare
1066                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
1067                                         string group = groupMap.getGroup(*itRare);
1068                                         
1069                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1070                                                 *(filehandles[group+".rare"]) << *itRare << endl;
1071                                         }
1072                         }
1073                                 
1074                         //write abund   
1075                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
1076                                         string group = groupMap.getGroup(*itAbund);
1077                                         
1078                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1079                                                 *(filehandles[group+".abund"]) << *itAbund << endl;
1080                                         }
1081                         }
1082                         
1083                         //close files
1084                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
1085                                 (*(filehandles[it3->first])).close();
1086                 map<string, string> variables;
1087                 variables["[filename]"] = fileroot;
1088                 variables["[tag]"] = tag;
1089                 variables["[tag2]"] = it3->first;
1090                                 outputNames.push_back(getOutputFileName("accnos",variables));  outputTypes["accnos"].push_back(getOutputFileName("accnos",variables));
1091                                 delete it3->second;
1092                         }
1093                 }
1094                                 
1095                 return 0;
1096
1097         }
1098         catch(exception& e) {
1099                 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
1100                 exit(1);
1101         }
1102 }
1103 /**********************************************************************************************************************/
1104 int SplitAbundCommand::parseGroup(string tag) { //namefile
1105         try {
1106                 
1107                 map<string, ofstream*> filehandles;
1108         
1109                 if (Groups.size() == 0) {
1110                         ofstream aout;
1111                         ofstream rout;
1112                         
1113             map<string, string> variables;
1114             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(groupfile));
1115             variables["[tag]"] = tag;
1116             variables["[tag2]"] = "rare";
1117                         string rare = getOutputFileName("group",variables);
1118                         m->openOutputFile(rare, rout);
1119                         outputNames.push_back(rare); outputTypes["group"].push_back(rare);
1120                 
1121             variables["[tag2]"] = "abund";
1122                         string abund = getOutputFileName("group",variables);
1123 ;
1124                         m->openOutputFile(abund, aout);
1125                         outputNames.push_back(abund); outputTypes["group"].push_back(abund);
1126                         
1127                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
1128                                 vector<string> names;
1129                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
1130                                 
1131                                 for (int i = 0; i < names.size(); i++) {
1132                                 
1133                                         string group = groupMap.getGroup(names[i]);
1134                                 
1135                                         if (group == "not found") { 
1136                                                 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
1137                                         }else {
1138                                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
1139                                                         rout << names[i] << '\t' << group << endl;
1140                                                 }else{ //you are a abund name
1141                                                         aout << names[i] << '\t' << group << endl;
1142                                                 }
1143                                         }
1144                                 }
1145                         }
1146                         
1147                         rout.close(); 
1148                         aout.close(); 
1149
1150                 }else{ //parse names by abundance and group
1151                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(groupfile));
1152                         ofstream* temp;
1153                         ofstream* temp2;
1154                         map<string, ofstream*> filehandles;
1155                         map<string, ofstream*>::iterator it3;
1156
1157                         for (int i=0; i<Groups.size(); i++) {
1158                                 temp = new ofstream;
1159                                 filehandles[Groups[i]+".rare"] = temp;
1160                                 temp2 = new ofstream;
1161                                 filehandles[Groups[i]+".abund"] = temp2;
1162                                 
1163                 map<string, string> variables;
1164                 variables["[filename]"] = fileroot;
1165                 variables["[tag]"] = tag;
1166                 variables["[tag2]"] = "rare";
1167                 variables["[group]"] = Groups[i];
1168                                 m->openOutputFile(getOutputFileName("group",variables), *(filehandles[Groups[i]+".rare"]));
1169                 variables["[tag2]"] = "abund";
1170                                 m->openOutputFile(getOutputFileName("group",variables), *(filehandles[Groups[i]+".abund"]));
1171                         }
1172                         
1173                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
1174                                 vector<string> names;
1175                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
1176                                 
1177                                 string rareAbund;
1178                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
1179                                         rareAbund = ".rare";
1180                                 }else{ //you are a abund name
1181                                         rareAbund = ".abund";
1182                                 }
1183                                 
1184                                 for (int i = 0; i < names.size(); i++) {
1185                                 
1186                                         string group = groupMap.getGroup(names[i]);
1187                                                                         
1188                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1189                                                 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
1190                                         }
1191                                 }
1192                         }
1193                         
1194                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
1195                                 (*(filehandles[it3->first])).close();
1196                 map<string, string> variables;
1197                 variables["[filename]"] = fileroot;
1198                 variables["[tag]"] = tag;
1199                 variables["[tag2]"] = it3->first;
1200                                 outputNames.push_back(getOutputFileName("group",variables));  outputTypes["group"].push_back(getOutputFileName("group",variables));
1201                                 delete it3->second;
1202                         }
1203                 }
1204                                 
1205                 return 0;
1206
1207         }
1208         catch(exception& e) {
1209                 m->errorOut(e, "SplitAbundCommand", "parseGroups");
1210                 exit(1);
1211         }
1212 }
1213 /**********************************************************************************************************************/
1214 int SplitAbundCommand::parseFasta(string tag) { //namefile
1215         try {
1216                 
1217                 map<string, ofstream*> filehandles;
1218                 
1219                 if (Groups.size() == 0) {
1220                         ofstream aout;
1221                         ofstream rout;
1222                         
1223             map<string, string> variables;
1224             variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(fastafile));
1225             variables["[tag]"] = tag;
1226             variables["[tag2]"] = "rare";
1227                         string rare = getOutputFileName("fasta",variables);
1228                         m->openOutputFile(rare, rout);
1229                         outputNames.push_back(rare); outputTypes["fasta"].push_back(rare);
1230                 
1231             variables["[tag2]"] = "abund";
1232                         string abund = getOutputFileName("fasta",variables);
1233                         m->openOutputFile(abund, aout);
1234                         outputNames.push_back(abund); outputTypes["fasta"].push_back(abund);
1235                 
1236                         //open input file
1237                         ifstream in;
1238                         m->openInputFile(fastafile, in);
1239         
1240                         while (!in.eof()) {
1241                                 if (m->control_pressed) { break; }
1242                 
1243                                 Sequence seq(in); m->gobble(in);
1244                                 
1245                                 if (seq.getName() != "") { 
1246                                         
1247                                         map<string, string>::iterator itNames;
1248                                         
1249                                         itNames = nameMap.find(seq.getName());
1250                                         
1251                                         if (itNames == nameMap.end()) {
1252                                                 m->mothurOut(seq.getName() + " is not in your names or list file, ignoring."); m->mothurOutEndLine();
1253                                         }else{
1254                                                 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
1255                                                         seq.printSequence(rout);
1256                                                 }else{ //you are a abund name
1257                                                         seq.printSequence(aout);
1258                                                 }
1259                                         }
1260                                 }
1261                         }
1262                         in.close();
1263                         rout.close(); 
1264                         aout.close(); 
1265
1266                 }else{ //parse names by abundance and group
1267                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(fastafile));
1268                         ofstream* temp;
1269                         ofstream* temp2;
1270                         map<string, ofstream*> filehandles;
1271                         map<string, ofstream*>::iterator it3;
1272
1273                         for (int i=0; i<Groups.size(); i++) {
1274                                 temp = new ofstream;
1275                                 filehandles[Groups[i]+".rare"] = temp;
1276                                 temp2 = new ofstream;
1277                                 filehandles[Groups[i]+".abund"] = temp2;
1278                                 
1279                 map<string, string> variables;
1280                 variables["[filename]"] = fileroot;
1281                 variables["[tag]"] = tag;
1282                 variables["[tag2]"] = "rare";
1283                 variables["[group]"] = Groups[i];
1284                                 m->openOutputFile(getOutputFileName("fasta",variables), *(filehandles[Groups[i]+".rare"]));
1285                 variables["[tag2]"] = "abund";
1286                                 m->openOutputFile(getOutputFileName("fasta",variables), *(filehandles[Groups[i]+".abund"]));
1287                         }
1288                         
1289                         //open input file
1290                         ifstream in;
1291                         m->openInputFile(fastafile, in);
1292         
1293                         while (!in.eof()) {
1294                                 if (m->control_pressed) { break; }
1295                 
1296                                 Sequence seq(in); m->gobble(in);
1297                                 
1298                                 if (seq.getName() != "") { 
1299                                         map<string, string>::iterator itNames = nameMap.find(seq.getName());
1300                                         
1301                                         if (itNames == nameMap.end()) {
1302                                                 m->mothurOut(seq.getName() + " is not in your names or list file, ignoring."); m->mothurOutEndLine();
1303                                         }else{
1304                                                 vector<string> names;
1305                                                 m->splitAtComma(itNames->second, names);  //parses bin into individual sequence names
1306                                 
1307                                                 string rareAbund;
1308                                                 if (rareNames.count(itNames->first) != 0) { //you are a rare name
1309                                                         rareAbund = ".rare";
1310                                                 }else{ //you are a abund name
1311                                                         rareAbund = ".abund";
1312                                                 }
1313                         
1314                         if (countfile == "") {
1315                             for (int i = 0; i < names.size(); i++) {
1316                                 string group = groupMap.getGroup(seq.getName());
1317                                 
1318                                 if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1319                                     seq.printSequence(*(filehandles[group+rareAbund]));
1320                                 }else if(group == "not found") {
1321                                     m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
1322                                 }
1323                             }
1324                         }else {
1325                             vector<string> thisSeqsGroups = ct.getGroups(names[0]); //we only need names[0], because there is no namefile
1326                             for (int i = 0; i < thisSeqsGroups.size(); i++) {
1327                                 if (m->inUsersGroups(thisSeqsGroups[i], Groups)) { //only add if this is in a group we want
1328                                     seq.printSequence(*(filehandles[thisSeqsGroups[i]+rareAbund]));
1329                                 }
1330                             }
1331                         }
1332                                         }
1333                                 }
1334                         }
1335                         in.close();
1336                         
1337                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
1338                                 (*(filehandles[it3->first])).close();
1339                 map<string, string> variables;
1340                 variables["[filename]"] = fileroot;
1341                 variables["[tag]"] = tag;
1342                 variables["[tag2]"] = it3->first;
1343                                 outputNames.push_back(getOutputFileName("fasta",variables));  outputTypes["fasta"].push_back(getOutputFileName("fasta",variables));
1344                                 delete it3->second;
1345                         }
1346                 }
1347                                 
1348                 return 0;
1349
1350         }
1351         catch(exception& e) {
1352                 m->errorOut(e, "SplitAbundCommand", "parseFasta");
1353                 exit(1);
1354         }
1355 }
1356 /**********************************************************************************************************************/
1357