]> git.donarmstrong.com Git - mothur.git/blob - splitabundcommand.cpp
fixed kendall method in corr.axes
[mothur.git] / splitabundcommand.cpp
1 /*
2  *  splitabundcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/17/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "splitabundcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> SplitAbundCommand::getValidParameters(){ 
14         try {
15                 string Array[] =  {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"};
16                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "SplitAbundCommand", "getValidParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 SplitAbundCommand::SplitAbundCommand(){ 
26         try {
27                 abort = true;
28                 //initialize outputTypes
29                 vector<string> tempOutNames;
30                 outputTypes["list"] = tempOutNames;
31                 outputTypes["name"] = tempOutNames;
32                 outputTypes["accnos"] = tempOutNames;
33                 outputTypes["group"] = tempOutNames;
34                 outputTypes["fasta"] = tempOutNames;
35         }
36         catch(exception& e) {
37                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
38                 exit(1);
39         }
40 }
41 //**********************************************************************************************************************
42 vector<string> SplitAbundCommand::getRequiredParameters(){      
43         try {
44                 string Array[] =  {"fasta","list","name","or"};
45                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
46                 return myArray;
47         }
48         catch(exception& e) {
49                 m->errorOut(e, "SplitAbundCommand", "getRequiredParameters");
50                 exit(1);
51         }
52 }
53 //**********************************************************************************************************************
54 vector<string> SplitAbundCommand::getRequiredFiles(){   
55         try {
56                 vector<string> myArray;
57                 return myArray;
58         }
59         catch(exception& e) {
60                 m->errorOut(e, "SplitAbundCommand", "getRequiredFiles");
61                 exit(1);
62         }
63 }
64 //**********************************************************************************************************************
65 SplitAbundCommand::SplitAbundCommand(string option)  {
66         try {
67                 abort = false;
68                 allLines = 1;
69                         
70                 //allow user to run help
71                 if(option == "help") { help(); abort = true; }
72                 
73                 else {
74                         //valid paramters for this command
75                         string Array[] =  {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"}; //
76                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
77                         
78                         OptionParser parser(option);
79                         map<string, string> parameters = parser.getParameters();
80                         
81                         ValidParameters validParameter;
82                         map<string, string>::iterator it;
83                 
84                         //check to make sure all parameters are valid for command
85                         for (it = parameters.begin(); it != parameters.end(); it++) { 
86                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
87                         }
88                         
89                         //initialize outputTypes
90                         vector<string> tempOutNames;
91                         outputTypes["list"] = tempOutNames;
92                         outputTypes["name"] = tempOutNames;
93                         outputTypes["accnos"] = tempOutNames;
94                         outputTypes["group"] = tempOutNames;
95                         outputTypes["fasta"] = tempOutNames;                    
96                                                                                                 
97                         //if the user changes the input directory command factory will send this info to us in the output parameter 
98                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
99                         if (inputDir == "not found"){   inputDir = "";          }
100                         else {
101                                 string path;
102                                 it = parameters.find("list");
103                                 //user has given a template file
104                                 if(it != parameters.end()){ 
105                                         path = m->hasPath(it->second);
106                                         //if the user has not given a path then, add inputdir. else leave path alone.
107                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
108                                 }
109                                 
110                                 it = parameters.find("group");
111                                 //user has given a template file
112                                 if(it != parameters.end()){ 
113                                         path = m->hasPath(it->second);
114                                         //if the user has not given a path then, add inputdir. else leave path alone.
115                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
116                                 }
117                                 
118                                 it = parameters.find("fasta");
119                                 //user has given a template file
120                                 if(it != parameters.end()){ 
121                                         path = m->hasPath(it->second);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
124                                 }
125                                 
126                                 it = parameters.find("name");
127                                 //user has given a template file
128                                 if(it != parameters.end()){ 
129                                         path = m->hasPath(it->second);
130                                         //if the user has not given a path then, add inputdir. else leave path alone.
131                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
132                                 }
133
134                         }
135
136                         
137                         //if the user changes the output directory command factory will send this info to us in the output parameter 
138                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
139
140                         //check for required parameters
141                         listfile = validParameter.validFile(parameters, "list", true);
142                         if (listfile == "not open") { abort = true; }
143                         else if (listfile == "not found") { listfile = ""; }
144                         else{ inputFile = listfile; }   
145                         
146                         namefile = validParameter.validFile(parameters, "name", true);
147                         if (namefile == "not open") { abort = true; }
148                         else if (namefile == "not found") { namefile = ""; }    
149                         else{ inputFile = namefile; }   
150                 
151                         fastafile = validParameter.validFile(parameters, "fasta", true);
152                         if (fastafile == "not open") { abort = true; }
153                         else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }       
154                         
155                         groupfile = validParameter.validFile(parameters, "group", true);
156                         if (groupfile == "not open") {  groupfile = ""; abort = true; } 
157                         else if (groupfile == "not found") { groupfile = ""; }
158                         else {  
159                                 groupMap = new GroupMap(groupfile);
160                                 
161                                 int error = groupMap->readMap();
162                                 if (error == 1) { abort = true; }
163         
164                         }
165                         
166                         groups = validParameter.validFile(parameters, "groups", false);         
167                         if (groups == "not found") { groups = ""; }
168                         else if (groups == "all") { 
169                                 if (groupfile != "") {  Groups = groupMap->namesOfGroups;  } 
170                                 else {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";   }
171                         }else { 
172                                 m->splitAtDash(groups, Groups);
173                         }
174                         
175                         if ((groupfile == "") && (groups != "")) {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";  Groups.clear(); }
176                         
177                         //do you have all files needed
178                         if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
179                         
180                         //check for optional parameter and set defaults
181                         // ...at some point should added some additional type checking...
182                         label = validParameter.validFile(parameters, "label", false);                   
183                         if (label == "not found") { label = "";  allLines = 1; }
184                         else { 
185                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
186                                 else { allLines = 1;  }
187                         }
188                         
189                         string temp = validParameter.validFile(parameters, "accnos", false);            if (temp == "not found") { temp = "F"; }
190                         accnos = m->isTrue(temp); 
191                         
192                         temp = validParameter.validFile(parameters, "cutoff", false);                           if (temp == "not found") { temp = "0"; }
193                         convert(temp, cutoff); 
194
195                         if (cutoff == 0) {  m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
196
197                 }
198
199         }
200         catch(exception& e) {
201                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
202                 exit(1);
203         }
204 }
205 //**********************************************************************************************************************
206 void SplitAbundCommand::help(){
207         try {
208                 m->mothurOut("The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n");
209                 m->mothurOut("The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n");
210                 m->mothurOut("The fasta and a list or name parameter are required, and you must provide a cutoff value.\n");
211                 m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n");
212                 m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n");
213                 m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n");
214                 m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n");
215                 m->mothurOut("The groups parameter allows you to parse the files into rare and abundant files by group.  \n");
216                 m->mothurOut("For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files.  \n");
217                 m->mothurOut("If you want .abund and .rare files for all groups, set groups=all.  \n");
218                 m->mothurOut("The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n");
219                 m->mothurOut("Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n");
220                 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
221
222         }
223         catch(exception& e) {
224                 m->errorOut(e, "SplitAbundCommand", "help");
225                 exit(1);
226         }
227 }
228 //**********************************************************************************************************************
229 SplitAbundCommand::~SplitAbundCommand(){ 
230         if (groupfile != "") {  delete groupMap;  } 
231 }
232 //**********************************************************************************************************************
233 int SplitAbundCommand::execute(){
234         try {
235         
236                 if (abort == true) {    return 0;       }
237                 
238                 if (listfile != "") { //you are using a listfile to determine abundance
239                         if (outputDir == "") { outputDir = m->hasPath(listfile); }
240                         
241                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
242                         set<string> processedLabels;
243                         set<string> userLabels = labels;        
244                         
245                         input = new InputData(listfile, "list");
246                         list = input->getListVector();
247                         string lastLabel = list->getLabel();
248                         
249                         //do you have a namefile or do we need to similate one?
250                         if (namefile != "") {  readNamesFile();         }
251                         else                            { createNameMap(list);  }
252                         
253                         if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
254                         
255                         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
256                         
257                                 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
258                                 
259                                 if(allLines == 1 || labels.count(list->getLabel()) == 1){
260                                                 
261                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
262                                                 splitList(list);
263                                                                                         
264                                                 processedLabels.insert(list->getLabel());
265                                                 userLabels.erase(list->getLabel());
266                                 }
267                                 
268                                 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
269                                                 string saveLabel = list->getLabel();
270                                                 
271                                                 delete list;
272                                                 list = input->getListVector(lastLabel); //get new list vector to process
273                                                 
274                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
275                                                 splitList(list);
276                                                 
277                                                 processedLabels.insert(list->getLabel());
278                                                 userLabels.erase(list->getLabel());
279                                                 
280                                                 //restore real lastlabel to save below
281                                                 list->setLabel(saveLabel);
282                                 }
283                                 
284                         
285                                 lastLabel = list->getLabel();
286                                         
287                                 delete list;
288                                 list = input->getListVector(); //get new list vector to process
289                         }
290                         
291                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
292                         
293                         //output error messages about any remaining user labels
294                         set<string>::iterator it;
295                         bool needToRun = false;
296                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
297                                 m->mothurOut("Your file does not include the label " + *it); 
298                                 if (processedLabels.count(lastLabel) != 1) {
299                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
300                                         needToRun = true;
301                                 }else {
302                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
303                                 }
304
305                         }
306                         
307                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
308                         
309                         //run last label if you need to
310                         if (needToRun == true)  {
311                                 if (list != NULL) {     delete list;    }
312                                 list = input->getListVector(lastLabel); //get new list vector to process
313                                 
314                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
315                                 splitList(list);                
316                                 
317                                 delete list;
318                         }
319                         
320                         delete input;
321                         
322                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); }       return 0;       }
323                                                                         
324                 }else { //you are using the namefile to determine abundance
325                         if (outputDir == "") { outputDir = m->hasPath(namefile); }
326                         
327                         splitNames(); 
328                         writeNames();
329                         
330                         string tag = "";
331                         if (groupfile != "")                            {  parseGroup(tag);             }
332                         if (accnos)                                                     {  writeAccnos(tag);    }
333                         if (fastafile != "")                            {  parseFasta(tag);             }
334                 }
335                 
336                 m->mothurOutEndLine();
337                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
338                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
339                 m->mothurOutEndLine();
340                 
341                 return 0;
342         }
343         catch(exception& e) {
344                 m->errorOut(e, "SplitAbundCommand", "execute");
345                 exit(1);
346         }
347 }
348 /**********************************************************************************************************************/
349 int SplitAbundCommand::splitList(ListVector* thisList) {
350         try {
351                 rareNames.clear();
352                 abundNames.clear();
353                 
354                 //get rareNames and abundNames
355                 for (int i = 0; i < thisList->getNumBins(); i++) {
356                         if (m->control_pressed) { return 0; }
357                         
358                         string bin = thisList->get(i);
359                                                 
360                         vector<string> names;
361                         m->splitAtComma(bin, names);  //parses bin into individual sequence names
362                         int size = names.size();
363                                 
364                         if (size <= cutoff) {
365                                 for (int j = 0; j < names.size(); j++) {  rareNames.insert(names[j]);  }
366                         }else{
367                                 for (int j = 0; j < names.size(); j++) {  abundNames.insert(names[j]);  }
368                         }
369                 }//end for
370
371                 
372                 string tag = thisList->getLabel() + ".";
373                 
374                 writeList(thisList, tag);
375                 
376                 if (groupfile != "")                            {  parseGroup(tag);             }
377                 if (accnos)                                                     {  writeAccnos(tag);    }
378                 if (fastafile != "")                            {  parseFasta(tag);             }
379                 
380                 return 0;
381
382         }
383         catch(exception& e) {
384                 m->errorOut(e, "SplitAbundCommand", "splitList");
385                 exit(1);
386         }
387 }
388 /**********************************************************************************************************************/
389 int SplitAbundCommand::writeList(ListVector* thisList, string tag) { 
390         try {
391                 
392                 map<string, ofstream*> filehandles;
393                 
394                 if (Groups.size() == 0) {
395                         SAbundVector* sabund = new SAbundVector();
396                         *sabund = thisList->getSAbundVector();
397                 
398                         //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
399                         // and don't have to store the bins until you are done with the whole vector, this save alot of space.
400                         int numRareBins = 0;
401                         for (int i = 0; i <= sabund->getMaxRank(); i++) {
402                                 if (i > cutoff) { break; }
403                                 numRareBins += sabund->get(i);
404                         }
405                         int numAbundBins = thisList->getNumBins() - numRareBins;
406                         delete sabund;
407
408                         ofstream aout;
409                         ofstream rout;
410                         
411                         string rare = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "rare.list";
412                         m->openOutputFile(rare, rout);
413                         outputNames.push_back(rare); outputTypes["list"].push_back(rare);
414                         
415                         string abund = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "abund.list";
416                         m->openOutputFile(abund, aout);
417                         outputNames.push_back(abund); outputTypes["list"].push_back(abund);
418
419                         if (rareNames.size() != 0)      {  rout << thisList->getLabel() << '\t' << numRareBins << '\t';         }
420                         if (abundNames.size() != 0) {   aout << thisList->getLabel() << '\t' << numAbundBins << '\t';   }
421
422                         for (int i = 0; i < thisList->getNumBins(); i++) {
423                                 if (m->control_pressed) { break; }
424                         
425                                 string bin = list->get(i); 
426                         
427                                 int size = m->getNumNames(bin);
428                         
429                                 if (size <= cutoff) {  rout << bin << '\t';  }
430                                 else                            {  aout << bin << '\t'; }
431                         }
432                         
433                         if (rareNames.size() != 0)      { rout << endl; }
434                         if (abundNames.size() != 0) { aout << endl; }
435                         
436                         rout.close();
437                         aout.close();
438                         
439                 }else{ //parse names by abundance and group
440                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(listfile));
441                         ofstream* temp;
442                         ofstream* temp2;
443                         //map<string, bool> wroteFile;
444                         map<string, ofstream*> filehandles;
445                         map<string, ofstream*>::iterator it3;
446
447                         for (int i=0; i<Groups.size(); i++) {
448                                 temp = new ofstream;
449                                 filehandles[Groups[i]+".rare"] = temp;
450                                 temp2 = new ofstream;
451                                 filehandles[Groups[i]+".abund"] = temp2;
452                                 
453                                 m->openOutputFile(fileroot + Groups[i] + tag + ".rare.list", *(filehandles[Groups[i]+".rare"]));
454                                 m->openOutputFile(fileroot + Groups[i] + tag + ".abund.list", *(filehandles[Groups[i]+".abund"]));
455                                 outputNames.push_back(fileroot + Groups[i] + tag + ".rare.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".rare.list");
456                                 outputNames.push_back(fileroot + Groups[i] + tag + ".abund.list"); outputTypes["list"].push_back(fileroot + Groups[i] + tag + ".abund.list");
457                         }
458                         
459                         map<string, string> groupVector;
460                         map<string, string>::iterator itGroup;
461                         map<string, int> groupNumBins;
462                 
463                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
464                                 groupNumBins[it3->first] = 0;
465                                 groupVector[it3->first] = "";
466                         }
467                 
468                         for (int i = 0; i < thisList->getNumBins(); i++) {
469                                 if (m->control_pressed) { break; }
470                         
471                                 map<string, string> groupBins;
472                                 string bin = list->get(i); 
473                         
474                                 vector<string> names;
475                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
476                         
477                                 //parse bin into list of sequences in each group
478                                 for (int j = 0; j < names.size(); j++) {
479                                         string rareAbund;
480                                         if (rareNames.count(names[j]) != 0) { //you are a rare name
481                                                 rareAbund = ".rare";
482                                         }else{ //you are a abund name
483                                                 rareAbund = ".abund";
484                                         }
485                                         
486                                         string group = groupMap->getGroup(names[j]);
487                                 
488                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
489                                                 itGroup = groupBins.find(group+rareAbund);
490                                                 if(itGroup == groupBins.end()) {
491                                                         groupBins[group+rareAbund] = names[j];  //add first name
492                                                         groupNumBins[group+rareAbund]++;
493                                                 }else{ //add another name
494                                                         groupBins[group+rareAbund] +=  "," + names[j];
495                                                 }
496                                         }else if(group == "not found") {
497                                                 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
498                                         }
499                                 }
500                         
501                         
502                                 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
503                                         groupVector[itGroup->first] +=  itGroup->second + '\t'; 
504                                 }
505                         }
506                         
507                         //end list vector
508                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
509                                 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl;  // label numBins  listvector for that group
510                                 (*(filehandles[it3->first])).close();
511                                 delete it3->second;
512                         }
513                 }
514                 
515                 return 0;
516
517         }
518         catch(exception& e) {
519                 m->errorOut(e, "SplitAbundCommand", "writeList");
520                 exit(1);
521         }
522 }
523 /**********************************************************************************************************************/
524 int SplitAbundCommand::splitNames() { //namefile
525         try {
526                 
527                 rareNames.clear();
528                 abundNames.clear();     
529                         
530                 //open input file
531                 ifstream in;
532                 m->openInputFile(namefile, in);
533                 
534                 while (!in.eof()) {
535                         if (m->control_pressed) { break; }
536                         
537                         string firstCol, secondCol;
538                         in >> firstCol >> secondCol; m->gobble(in);
539                         
540                         nameMap[firstCol] = secondCol;
541                         
542                         int size = m->getNumNames(secondCol);
543                                 
544                         if (size <= cutoff) {
545                                 rareNames.insert(firstCol); 
546                         }else{
547                                 abundNames.insert(firstCol); 
548                         }
549                 }
550                 in.close();
551                                 
552                 return 0;
553
554         }
555         catch(exception& e) {
556                 m->errorOut(e, "SplitAbundCommand", "splitNames");
557                 exit(1);
558         }
559 }
560 /**********************************************************************************************************************/
561 int SplitAbundCommand::readNamesFile() { 
562         try {
563                 //open input file
564                 ifstream in;
565                 m->openInputFile(namefile, in);
566                 
567                 while (!in.eof()) {
568                         if (m->control_pressed) { break; }
569                         
570                         string firstCol, secondCol;
571                         in >> firstCol >> secondCol; m->gobble(in);
572                         
573                         nameMap[firstCol] = secondCol;
574                 }
575                 in.close();
576                                 
577                 return 0;
578
579         }
580         catch(exception& e) {
581                 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
582                 exit(1);
583         }
584 }
585 /**********************************************************************************************************************/
586 int SplitAbundCommand::createNameMap(ListVector* thisList) {
587         try {
588                 
589                 if (thisList != NULL) {
590                         for (int i = 0; i < thisList->getNumBins(); i++) {
591                                 if (m->control_pressed) { return 0; }
592                                 
593                                 string bin = thisList->get(i);
594                                                         
595                                 vector<string> names;
596                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
597                                 
598                                 for (int j = 0; j < names.size(); j++) {  nameMap[names[j]] = names[j];  }
599                         }//end for
600                 }
601                 
602                 return 0;
603         }
604         catch(exception& e) {
605                 m->errorOut(e, "SplitAbundCommand", "createNameMap");
606                 exit(1);
607         }
608 }
609 /**********************************************************************************************************************/
610 int SplitAbundCommand::writeNames() { //namefile
611         try {
612                 
613                 map<string, ofstream*> filehandles;
614
615                 if (Groups.size() == 0) {
616                         ofstream aout;
617                         ofstream rout;
618                         
619                         string rare = outputDir + m->getRootName(m->getSimpleName(namefile))  + "rare.names";
620                         m->openOutputFile(rare, rout);
621                         outputNames.push_back(rare); outputTypes["name"].push_back(rare);
622                         
623                         string abund = outputDir + m->getRootName(m->getSimpleName(namefile))  + "abund.names";
624                         m->openOutputFile(abund, aout);
625                         outputNames.push_back(abund); outputTypes["name"].push_back(abund);
626                         
627                         if (rareNames.size() != 0) {
628                                 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
629                                         rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
630                                 }
631                         }
632                         rout.close();
633                         
634                         if (abundNames.size() != 0) {
635                                 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
636                                         aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
637                                 }
638                         }
639                         aout.close();
640                         
641                 }else{ //parse names by abundance and group
642                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(namefile));
643                         ofstream* temp;
644                         ofstream* temp2;
645                         map<string, ofstream*> filehandles;
646                         map<string, ofstream*>::iterator it3;
647
648                         for (int i=0; i<Groups.size(); i++) {
649                                 temp = new ofstream;
650                                 filehandles[Groups[i]+".rare"] = temp;
651                                 temp2 = new ofstream;
652                                 filehandles[Groups[i]+".abund"] = temp2;
653                                 
654                                 m->openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"]));
655                                 m->openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"]));
656                         }
657                         
658                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
659                                 vector<string> names;
660                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
661                                 
662                                 string rareAbund;
663                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
664                                                 rareAbund = ".rare";
665                                 }else{ //you are a abund name
666                                                 rareAbund = ".abund";
667                                 }
668                                 
669                                 map<string, string> outputStrings;
670                                 map<string, string>::iterator itout;
671                                 for (int i = 0; i < names.size(); i++) {
672                                         
673                                         string group = groupMap->getGroup(names[i]);
674                                         
675                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
676                                                 itout = outputStrings.find(group+rareAbund);
677                                                 if (itout == outputStrings.end()) {  
678                                                         outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
679                                                 }else {   outputStrings[group+rareAbund] += "," + names[i]; }
680                                         }else if(group == "not found") {
681                                                 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
682                                         }
683                                 }
684                                 
685                                 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { *(filehandles[itout->first]) << itout->second << endl;     }
686                         }
687                         
688                         
689                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
690                                 (*(filehandles[it3->first])).close();
691                                 outputNames.push_back(fileroot + it3->first + ".names");  outputTypes["name"].push_back(fileroot + it3->first + ".names");
692                                 delete it3->second;
693                         }
694                 }
695                                 
696                 return 0;
697
698         }
699         catch(exception& e) {
700                 m->errorOut(e, "SplitAbundCommand", "writeNames");
701                 exit(1);
702         }
703 }
704 /**********************************************************************************************************************/
705 //just write the unique names - if a namesfile is given
706 int SplitAbundCommand::writeAccnos(string tag) { 
707         try {
708                 
709                 map<string, ofstream*> filehandles;
710                 
711                 if (Groups.size() == 0) {
712                         ofstream aout;
713                         ofstream rout;
714                         
715                         
716                         string rare = outputDir + m->getRootName(m->getSimpleName(inputFile))  + tag + "rare.accnos";
717                         m->openOutputFile(rare, rout);
718                         outputNames.push_back(rare); outputTypes["accnos"].push_back(rare); 
719                         
720                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
721                                 rout << (*itRare) << endl;
722                         }
723                         rout.close();
724                 
725                         string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag  + "abund.accnos";
726                         m->openOutputFile(abund, aout);
727                         outputNames.push_back(abund); outputTypes["accnos"].push_back(abund);
728                         
729                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
730                                 aout << (*itAbund) << endl;
731                         }
732                         aout.close();
733                         
734                 }else{ //parse names by abundance and group
735                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(inputFile));
736                         ofstream* temp;
737                         ofstream* temp2;
738                         map<string, ofstream*> filehandles;
739                         map<string, ofstream*>::iterator it3;
740                         
741                         for (int i=0; i<Groups.size(); i++) {
742                                 temp = new ofstream;
743                                 filehandles[Groups[i]+".rare"] = temp;
744                                 temp2 = new ofstream;
745                                 filehandles[Groups[i]+".abund"] = temp2;
746                                 
747                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"]));
748                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"]));
749                         }
750                         
751                         //write rare
752                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
753                                         string group = groupMap->getGroup(*itRare);
754                                         
755                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
756                                                 *(filehandles[group+".rare"]) << *itRare << endl;
757                                         }
758                         }
759                                 
760                         //write abund   
761                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
762                                         string group = groupMap->getGroup(*itAbund);
763                                         
764                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
765                                                 *(filehandles[group+".abund"]) << *itAbund << endl;
766                                         }
767                         }
768                         
769                         //close files
770                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
771                                 (*(filehandles[it3->first])).close();
772                                 outputNames.push_back(fileroot + tag + it3->first + ".accnos");  outputTypes["accnos"].push_back(fileroot + tag + it3->first + ".accnos");
773                                 delete it3->second;
774                         }
775                 }
776                                 
777                 return 0;
778
779         }
780         catch(exception& e) {
781                 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
782                 exit(1);
783         }
784 }
785 /**********************************************************************************************************************/
786 int SplitAbundCommand::parseGroup(string tag) { //namefile
787         try {
788                 
789                 map<string, ofstream*> filehandles;
790         
791                 if (Groups.size() == 0) {
792                         ofstream aout;
793                         ofstream rout;
794                         
795                         string rare = outputDir + m->getRootName(m->getSimpleName(groupfile))  + tag + "rare.groups";
796                         m->openOutputFile(rare, rout);
797                         outputNames.push_back(rare); outputTypes["group"].push_back(rare);
798                 
799                         string abund = outputDir + m->getRootName(m->getSimpleName(groupfile))  + tag + "abund.groups";
800                         m->openOutputFile(abund, aout);
801                         outputNames.push_back(abund); outputTypes["group"].push_back(abund);
802                         
803                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
804                                 vector<string> names;
805                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
806                                 
807                                 for (int i = 0; i < names.size(); i++) {
808                                 
809                                         string group = groupMap->getGroup(names[i]);
810                                 
811                                         if (group == "not found") { 
812                                                 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
813                                         }else {
814                                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
815                                                         rout << names[i] << '\t' << group << endl;
816                                                 }else{ //you are a abund name
817                                                         aout << names[i] << '\t' << group << endl;
818                                                 }
819                                         }
820                                 }
821                         }
822                         
823                         rout.close(); 
824                         aout.close(); 
825
826                 }else{ //parse names by abundance and group
827                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(groupfile));
828                         ofstream* temp;
829                         ofstream* temp2;
830                         map<string, ofstream*> filehandles;
831                         map<string, ofstream*>::iterator it3;
832
833                         for (int i=0; i<Groups.size(); i++) {
834                                 temp = new ofstream;
835                                 filehandles[Groups[i]+".rare"] = temp;
836                                 temp2 = new ofstream;
837                                 filehandles[Groups[i]+".abund"] = temp2;
838                                 
839                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"]));
840                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"]));
841                         }
842                         
843                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
844                                 vector<string> names;
845                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
846                                 
847                                 string rareAbund;
848                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
849                                         rareAbund = ".rare";
850                                 }else{ //you are a abund name
851                                         rareAbund = ".abund";
852                                 }
853                                 
854                                 for (int i = 0; i < names.size(); i++) {
855                                 
856                                         string group = groupMap->getGroup(names[i]);
857                                                                         
858                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
859                                                 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
860                                         }
861                                 }
862                         }
863                         
864                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
865                                 (*(filehandles[it3->first])).close();
866                                 outputNames.push_back(fileroot + tag + it3->first + ".groups");  outputTypes["group"].push_back(fileroot + tag + it3->first + ".groups");
867                                 delete it3->second;
868                         }
869                 }
870                                 
871                 return 0;
872
873         }
874         catch(exception& e) {
875                 m->errorOut(e, "SplitAbundCommand", "parseGroups");
876                 exit(1);
877         }
878 }
879 /**********************************************************************************************************************/
880 int SplitAbundCommand::parseFasta(string tag) { //namefile
881         try {
882                 
883                 map<string, ofstream*> filehandles;
884                 
885                 if (Groups.size() == 0) {
886                         ofstream aout;
887                         ofstream rout;
888                         
889                         string rare = outputDir + m->getRootName(m->getSimpleName(fastafile))  + tag + "rare.fasta";
890                         m->openOutputFile(rare, rout);
891                         outputNames.push_back(rare); outputTypes["fasta"].push_back(rare);
892                 
893                         string abund = outputDir + m->getRootName(m->getSimpleName(fastafile))  + tag + "abund.fasta";
894                         m->openOutputFile(abund, aout);
895                         outputNames.push_back(abund); outputTypes["fasta"].push_back(abund);
896                 
897                         //open input file
898                         ifstream in;
899                         m->openInputFile(fastafile, in);
900         
901                         while (!in.eof()) {
902                                 if (m->control_pressed) { break; }
903                 
904                                 Sequence seq(in); m->gobble(in);
905                                 
906                                 if (seq.getName() != "") { 
907                                         
908                                         map<string, string>::iterator itNames;
909                                         
910                                         itNames = nameMap.find(seq.getName());
911                                         
912                                         if (itNames == nameMap.end()) {
913                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
914                                         }else{
915                                                 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
916                                                         seq.printSequence(rout);
917                                                 }else{ //you are a abund name
918                                                         seq.printSequence(aout);
919                                                 }
920                                         }
921                                 }
922                         }
923                         in.close();
924                         rout.close(); 
925                         aout.close(); 
926
927                 }else{ //parse names by abundance and group
928                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(fastafile));
929                         ofstream* temp;
930                         ofstream* temp2;
931                         map<string, ofstream*> filehandles;
932                         map<string, ofstream*>::iterator it3;
933
934                         for (int i=0; i<Groups.size(); i++) {
935                                 temp = new ofstream;
936                                 filehandles[Groups[i]+".rare"] = temp;
937                                 temp2 = new ofstream;
938                                 filehandles[Groups[i]+".abund"] = temp2;
939                                 
940                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"]));
941                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"]));
942                         }
943                         
944                         //open input file
945                         ifstream in;
946                         m->openInputFile(fastafile, in);
947         
948                         while (!in.eof()) {
949                                 if (m->control_pressed) { break; }
950                 
951                                 Sequence seq(in); m->gobble(in);
952                                 
953                                 if (seq.getName() != "") { 
954                                         map<string, string>::iterator itNames = nameMap.find(seq.getName());
955                                         
956                                         if (itNames == nameMap.end()) {
957                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
958                                         }else{
959                                                 vector<string> names;
960                                                 m->splitAtComma(itNames->second, names);  //parses bin into individual sequence names
961                                 
962                                                 string rareAbund;
963                                                 if (rareNames.count(itNames->first) != 0) { //you are a rare name
964                                                         rareAbund = ".rare";
965                                                 }else{ //you are a abund name
966                                                         rareAbund = ".abund";
967                                                 }
968                                 
969                                                 for (int i = 0; i < names.size(); i++) {
970                                 
971                                                         string group = groupMap->getGroup(seq.getName());
972                                         
973                                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
974                                                                 seq.printSequence(*(filehandles[group+rareAbund]));
975                                                         }else if(group == "not found") {
976                                                                 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
977                                                         }
978                                                 }
979                                         }
980                                 }
981                         }
982                         in.close();
983                         
984                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
985                                 (*(filehandles[it3->first])).close();
986                                 outputNames.push_back(fileroot + tag + it3->first + ".fasta");  outputTypes["fasta"].push_back(fileroot + tag + it3->first + ".fasta");
987                                 delete it3->second;
988                         }
989                 }
990                                 
991                 return 0;
992
993         }
994         catch(exception& e) {
995                 m->errorOut(e, "SplitAbundCommand", "parseFasta");
996                 exit(1);
997         }
998 }
999 /**********************************************************************************************************************/
1000