]> git.donarmstrong.com Git - mothur.git/blob - splitabundcommand.cpp
added pipeline commands which involved change to command factory and command class...
[mothur.git] / splitabundcommand.cpp
1 /*
2  *  splitabundcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/17/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "splitabundcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> SplitAbundCommand::getValidParameters(){ 
14         try {
15                 string Array[] =  {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"};
16                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
17                 return myArray;
18         }
19         catch(exception& e) {
20                 m->errorOut(e, "SplitAbundCommand", "getValidParameters");
21                 exit(1);
22         }
23 }
24 //**********************************************************************************************************************
25 SplitAbundCommand::SplitAbundCommand(){ 
26         try {
27                 //initialize outputTypes
28                 vector<string> tempOutNames;
29                 outputTypes["list"] = tempOutNames;
30                 outputTypes["name"] = tempOutNames;
31                 outputTypes["accnos"] = tempOutNames;
32                 outputTypes["group"] = tempOutNames;
33                 outputTypes["fasta"] = tempOutNames;
34         }
35         catch(exception& e) {
36                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
37                 exit(1);
38         }
39 }
40 //**********************************************************************************************************************
41 vector<string> SplitAbundCommand::getRequiredParameters(){      
42         try {
43                 string Array[] =  {"fasta","list","name","or"};
44                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
45                 return myArray;
46         }
47         catch(exception& e) {
48                 m->errorOut(e, "SplitAbundCommand", "getRequiredParameters");
49                 exit(1);
50         }
51 }
52 //**********************************************************************************************************************
53 vector<string> SplitAbundCommand::getRequiredFiles(){   
54         try {
55                 vector<string> myArray;
56                 return myArray;
57         }
58         catch(exception& e) {
59                 m->errorOut(e, "SplitAbundCommand", "getRequiredFiles");
60                 exit(1);
61         }
62 }
63 //**********************************************************************************************************************
64 SplitAbundCommand::SplitAbundCommand(string option)  {
65         try {
66                 abort = false;
67                 allLines = 1;
68                         
69                 //allow user to run help
70                 if(option == "help") { help(); abort = true; }
71                 
72                 else {
73                         //valid paramters for this command
74                         string Array[] =  {"name","group","list","label","accnos","groups","fasta","cutoff","outputdir","inputdir"}; //
75                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
76                         
77                         OptionParser parser(option);
78                         map<string, string> parameters = parser.getParameters();
79                         
80                         ValidParameters validParameter;
81                         map<string, string>::iterator it;
82                 
83                         //check to make sure all parameters are valid for command
84                         for (it = parameters.begin(); it != parameters.end(); it++) { 
85                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
86                         }
87                         
88                         //initialize outputTypes
89                         vector<string> tempOutNames;
90                         outputTypes["list"] = tempOutNames;
91                         outputTypes["name"] = tempOutNames;
92                         outputTypes["accnos"] = tempOutNames;
93                         outputTypes["group"] = tempOutNames;
94                         outputTypes["fasta"] = tempOutNames;                    
95                                                                                                 
96                         //if the user changes the input directory command factory will send this info to us in the output parameter 
97                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
98                         if (inputDir == "not found"){   inputDir = "";          }
99                         else {
100                                 string path;
101                                 it = parameters.find("list");
102                                 //user has given a template file
103                                 if(it != parameters.end()){ 
104                                         path = m->hasPath(it->second);
105                                         //if the user has not given a path then, add inputdir. else leave path alone.
106                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
107                                 }
108                                 
109                                 it = parameters.find("group");
110                                 //user has given a template file
111                                 if(it != parameters.end()){ 
112                                         path = m->hasPath(it->second);
113                                         //if the user has not given a path then, add inputdir. else leave path alone.
114                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
115                                 }
116                                 
117                                 it = parameters.find("fasta");
118                                 //user has given a template file
119                                 if(it != parameters.end()){ 
120                                         path = m->hasPath(it->second);
121                                         //if the user has not given a path then, add inputdir. else leave path alone.
122                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
123                                 }
124                                 
125                                 it = parameters.find("name");
126                                 //user has given a template file
127                                 if(it != parameters.end()){ 
128                                         path = m->hasPath(it->second);
129                                         //if the user has not given a path then, add inputdir. else leave path alone.
130                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
131                                 }
132
133                         }
134
135                         
136                         //if the user changes the output directory command factory will send this info to us in the output parameter 
137                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
138
139                         //check for required parameters
140                         listfile = validParameter.validFile(parameters, "list", true);
141                         if (listfile == "not open") { abort = true; }
142                         else if (listfile == "not found") { listfile = ""; }
143                         else{ inputFile = listfile; }   
144                         
145                         namefile = validParameter.validFile(parameters, "name", true);
146                         if (namefile == "not open") { abort = true; }
147                         else if (namefile == "not found") { namefile = ""; }    
148                         else{ inputFile = namefile; }   
149                 
150                         fastafile = validParameter.validFile(parameters, "fasta", true);
151                         if (fastafile == "not open") { abort = true; }
152                         else if (fastafile == "not found") { fastafile = ""; m->mothurOut("fasta is a required parameter for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }       
153                         
154                         groupfile = validParameter.validFile(parameters, "group", true);
155                         if (groupfile == "not open") {  groupfile = ""; abort = true; } 
156                         else if (groupfile == "not found") { groupfile = ""; }
157                         else {  
158                                 groupMap = new GroupMap(groupfile);
159                                 
160                                 int error = groupMap->readMap();
161                                 if (error == 1) { abort = true; }
162         
163                         }
164                         
165                         groups = validParameter.validFile(parameters, "groups", false);         
166                         if (groups == "not found") { groups = ""; }
167                         else if (groups == "all") { 
168                                 if (groupfile != "") {  Groups = groupMap->namesOfGroups;  } 
169                                 else {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";   }
170                         }else { 
171                                 m->splitAtDash(groups, Groups);
172                         }
173                         
174                         if ((groupfile == "") && (groups != "")) {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";  Groups.clear(); }
175                         
176                         //do you have all files needed
177                         if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
178                         
179                         //check for optional parameter and set defaults
180                         // ...at some point should added some additional type checking...
181                         label = validParameter.validFile(parameters, "label", false);                   
182                         if (label == "not found") { label = "";  allLines = 1; }
183                         else { 
184                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
185                                 else { allLines = 1;  }
186                         }
187                         
188                         string temp = validParameter.validFile(parameters, "accnos", false);            if (temp == "not found") { temp = "F"; }
189                         accnos = m->isTrue(temp); 
190                         
191                         temp = validParameter.validFile(parameters, "cutoff", false);                           if (temp == "not found") { temp = "0"; }
192                         convert(temp, cutoff); 
193
194                         if (cutoff == 0) {  m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
195
196                 }
197
198         }
199         catch(exception& e) {
200                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
201                 exit(1);
202         }
203 }
204 //**********************************************************************************************************************
205 void SplitAbundCommand::help(){
206         try {
207                 m->mothurOut("The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n");
208                 m->mothurOut("The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n");
209                 m->mothurOut("The fasta and a list or name parameter are required, and you must provide a cutoff value.\n");
210                 m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n");
211                 m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n");
212                 m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n");
213                 m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n");
214                 m->mothurOut("The groups parameter allows you to parse the files into rare and abundant files by group.  \n");
215                 m->mothurOut("For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files.  \n");
216                 m->mothurOut("If you want .abund and .rare files for all groups, set groups=all.  \n");
217                 m->mothurOut("The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n");
218                 m->mothurOut("Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n");
219                 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
220
221         }
222         catch(exception& e) {
223                 m->errorOut(e, "SplitAbundCommand", "help");
224                 exit(1);
225         }
226 }
227 //**********************************************************************************************************************
228 SplitAbundCommand::~SplitAbundCommand(){ 
229         if (groupfile != "") {  delete groupMap;  } 
230 }
231 //**********************************************************************************************************************
232 int SplitAbundCommand::execute(){
233         try {
234         
235                 if (abort == true) {    return 0;       }
236                 
237                 if (listfile != "") { //you are using a listfile to determine abundance
238                         if (outputDir == "") { outputDir = m->hasPath(listfile); }
239                 
240                         //remove old files so you can append later....
241                         string fileroot = outputDir + m->getRootName(m->getSimpleName(listfile));
242                         if (Groups.size() == 0) {
243                                 remove((fileroot + "rare.list").c_str());
244                                 remove((fileroot + "abund.list").c_str());
245                                 
246                                 outputNames.push_back((fileroot + "rare.list"));
247                                 outputNames.push_back((fileroot + "abund.list"));
248                                 outputTypes["list"].push_back((fileroot + "rare.list"));
249                                 outputTypes["list"].push_back((fileroot + "abund.list"));
250                         }else{
251                                 for (int i=0; i<Groups.size(); i++) {
252                                         remove((fileroot + Groups[i] + ".rare.list").c_str());
253                                         remove((fileroot + Groups[i] + ".abund.list").c_str());
254                                         
255                                         outputNames.push_back((fileroot + Groups[i] + ".rare.list"));
256                                         outputNames.push_back((fileroot + Groups[i] + ".abund.list"));
257                                         outputTypes["list"].push_back((fileroot + Groups[i] + ".rare.list"));
258                                         outputTypes["list"].push_back((fileroot + Groups[i] + ".abund.list"));
259                                 }
260                         }
261                         
262                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
263                         set<string> processedLabels;
264                         set<string> userLabels = labels;        
265                         
266                         input = new InputData(listfile, "list");
267                         list = input->getListVector();
268                         string lastLabel = list->getLabel();
269                         
270                         //do you have a namefile or do we need to similate one?
271                         if (namefile != "") {  readNamesFile();         }
272                         else                            { createNameMap(list);  }
273                         
274                         if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
275                         
276                         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
277                         
278                                 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
279                                 
280                                 if(allLines == 1 || labels.count(list->getLabel()) == 1){
281                                                 
282                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
283                                                 splitList(list);
284                                                                                         
285                                                 processedLabels.insert(list->getLabel());
286                                                 userLabels.erase(list->getLabel());
287                                 }
288                                 
289                                 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
290                                                 string saveLabel = list->getLabel();
291                                                 
292                                                 delete list;
293                                                 list = input->getListVector(lastLabel); //get new list vector to process
294                                                 
295                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
296                                                 splitList(list);
297                                                 
298                                                 processedLabels.insert(list->getLabel());
299                                                 userLabels.erase(list->getLabel());
300                                                 
301                                                 //restore real lastlabel to save below
302                                                 list->setLabel(saveLabel);
303                                 }
304                                 
305                         
306                                 lastLabel = list->getLabel();
307                                         
308                                 delete list;
309                                 list = input->getListVector(); //get new list vector to process
310                         }
311                         
312                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
313                         
314                         //output error messages about any remaining user labels
315                         set<string>::iterator it;
316                         bool needToRun = false;
317                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
318                                 m->mothurOut("Your file does not include the label " + *it); 
319                                 if (processedLabels.count(lastLabel) != 1) {
320                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
321                                         needToRun = true;
322                                 }else {
323                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
324                                 }
325
326                         }
327                         
328                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
329                         
330                         //run last label if you need to
331                         if (needToRun == true)  {
332                                 if (list != NULL) {     delete list;    }
333                                 list = input->getListVector(lastLabel); //get new list vector to process
334                                 
335                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
336                                 splitList(list);                
337                                 
338                                 delete list;
339                         }
340                         
341                         delete input;
342                         
343                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); }       return 0;       }
344                                                                         
345                 }else { //you are using the namefile to determine abundance
346                         if (outputDir == "") { outputDir = m->hasPath(namefile); }
347                         
348                         splitNames(); 
349                         writeNames();
350                         
351                         string tag = "";
352                         if (groupfile != "")                            {  parseGroup(tag);             }
353                         if (accnos)                                                     {  writeAccnos(tag);    }
354                         if (fastafile != "")                            {  parseFasta(tag);             }
355                 }
356
357                 m->mothurOutEndLine();
358                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
359                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
360                 m->mothurOutEndLine();
361                 
362                 return 0;
363         }
364         catch(exception& e) {
365                 m->errorOut(e, "SplitAbundCommand", "execute");
366                 exit(1);
367         }
368 }
369 /**********************************************************************************************************************/
370 int SplitAbundCommand::splitList(ListVector* thisList) {
371         try {
372                 rareNames.clear();
373                 abundNames.clear();
374                 
375                 //get rareNames and abundNames
376                 for (int i = 0; i < thisList->getNumBins(); i++) {
377                         if (m->control_pressed) { return 0; }
378                         
379                         string bin = thisList->get(i);
380                                                 
381                         vector<string> names;
382                         m->splitAtComma(bin, names);  //parses bin into individual sequence names
383                         int size = names.size();
384                                 
385                         if (size <= cutoff) {
386                                 for (int j = 0; j < names.size(); j++) {  rareNames.insert(names[j]);  }
387                         }else{
388                                 for (int j = 0; j < names.size(); j++) {  abundNames.insert(names[j]);  }
389                         }
390                 }//end for
391
392                 writeList(thisList);
393                 
394                 string tag = thisList->getLabel() + ".";
395                 if (groupfile != "")                            {  parseGroup(tag);             }
396                 if (accnos)                                                     {  writeAccnos(tag);    }
397                 if (fastafile != "")                            {  parseFasta(tag);             }
398                 
399                 return 0;
400
401         }
402         catch(exception& e) {
403                 m->errorOut(e, "SplitAbundCommand", "splitList");
404                 exit(1);
405         }
406 }
407 /**********************************************************************************************************************/
408 int SplitAbundCommand::writeList(ListVector* thisList) { 
409         try {
410                 
411                 map<string, ofstream*> filehandles;
412                 
413                 if (Groups.size() == 0) {
414                         SAbundVector* sabund = new SAbundVector();
415                         *sabund = thisList->getSAbundVector();
416                 
417                         //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
418                         // and don't have to store the bins until you are done with the whole vector, this save alot of space.
419                         int numRareBins = 0;
420                         for (int i = 0; i <= sabund->getMaxRank(); i++) {
421                                 if (i > cutoff) { break; }
422                                 numRareBins += sabund->get(i);
423                         }
424                         int numAbundBins = thisList->getNumBins() - numRareBins;
425                         delete sabund;
426
427                         ofstream aout;
428                         ofstream rout;
429                         
430                         string rare = outputDir + m->getRootName(m->getSimpleName(listfile))  + "rare.list";
431                         m->openOutputFileAppend(rare, rout);
432                         //outputNames.push_back(rare);
433                         
434                         string abund = outputDir + m->getRootName(m->getSimpleName(listfile))  + "abund.list";
435                         m->openOutputFileAppend(abund, aout);
436                         //outputNames.push_back(abund);
437
438                         if (rareNames.size() != 0)      {  rout << thisList->getLabel() << '\t' << numRareBins << '\t';         }
439                         if (abundNames.size() != 0) {   aout << thisList->getLabel() << '\t' << numAbundBins << '\t';   }
440
441                         for (int i = 0; i < thisList->getNumBins(); i++) {
442                                 if (m->control_pressed) { break; }
443                         
444                                 string bin = list->get(i); 
445                         
446                                 int size = m->getNumNames(bin);
447                         
448                                 if (size <= cutoff) {  rout << bin << '\t';  }
449                                 else                            {  aout << bin << '\t'; }
450                         }
451                         
452                         if (rareNames.size() != 0)      { rout << endl; }
453                         if (abundNames.size() != 0) { aout << endl; }
454                         
455                         rout.close();
456                         aout.close();
457                         
458                 }else{ //parse names by abundance and group
459                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(listfile));
460                         ofstream* temp;
461                         ofstream* temp2;
462                         //map<string, bool> wroteFile;
463                         map<string, ofstream*> filehandles;
464                         map<string, ofstream*>::iterator it3;
465
466                         for (int i=0; i<Groups.size(); i++) {
467                                 temp = new ofstream;
468                                 filehandles[Groups[i]+".rare"] = temp;
469                                 temp2 = new ofstream;
470                                 filehandles[Groups[i]+".abund"] = temp2;
471                                 
472                                 m->openOutputFileAppend(fileroot + Groups[i] + ".rare.list", *(filehandles[Groups[i]+".rare"]));
473                                 m->openOutputFileAppend(fileroot + Groups[i] + ".abund.list", *(filehandles[Groups[i]+".abund"]));
474                         }
475                         
476                         map<string, string> groupVector;
477                         map<string, string>::iterator itGroup;
478                         map<string, int> groupNumBins;
479                 
480                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
481                                 groupNumBins[it3->first] = 0;
482                                 groupVector[it3->first] = "";
483                         }
484                 
485                         for (int i = 0; i < thisList->getNumBins(); i++) {
486                                 if (m->control_pressed) { break; }
487                         
488                                 map<string, string> groupBins;
489                                 string bin = list->get(i); 
490                         
491                                 vector<string> names;
492                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
493                         
494                                 //parse bin into list of sequences in each group
495                                 for (int j = 0; j < names.size(); j++) {
496                                         string rareAbund;
497                                         if (rareNames.count(names[j]) != 0) { //you are a rare name
498                                                 rareAbund = ".rare";
499                                         }else{ //you are a abund name
500                                                 rareAbund = ".abund";
501                                         }
502                                         
503                                         string group = groupMap->getGroup(names[j]);
504                                 
505                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
506                                                 itGroup = groupBins.find(group+rareAbund);
507                                                 if(itGroup == groupBins.end()) {
508                                                         groupBins[group+rareAbund] = names[j];  //add first name
509                                                         groupNumBins[group+rareAbund]++;
510                                                 }else{ //add another name
511                                                         groupBins[group+rareAbund] +=  "," + names[j];
512                                                 }
513                                         }else if(group == "not found") {
514                                                 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
515                                         }
516                                 }
517                         
518                         
519                                 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
520                                         groupVector[itGroup->first] +=  itGroup->second + '\t'; 
521                                 }
522                         }
523                         
524                         //end list vector
525                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
526                                 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl;  // label numBins  listvector for that group
527                                 (*(filehandles[it3->first])).close();
528                                 delete it3->second;
529                         }
530                 }
531                 
532                 return 0;
533
534         }
535         catch(exception& e) {
536                 m->errorOut(e, "SplitAbundCommand", "writeList");
537                 exit(1);
538         }
539 }
540 /**********************************************************************************************************************/
541 int SplitAbundCommand::splitNames() { //namefile
542         try {
543                 
544                 rareNames.clear();
545                 abundNames.clear();     
546                         
547                 //open input file
548                 ifstream in;
549                 m->openInputFile(namefile, in);
550                 
551                 while (!in.eof()) {
552                         if (m->control_pressed) { break; }
553                         
554                         string firstCol, secondCol;
555                         in >> firstCol >> secondCol; m->gobble(in);
556                         
557                         nameMap[firstCol] = secondCol;
558                         
559                         int size = m->getNumNames(secondCol);
560                                 
561                         if (size <= cutoff) {
562                                 rareNames.insert(firstCol); 
563                         }else{
564                                 abundNames.insert(firstCol); 
565                         }
566                 }
567                 in.close();
568                                 
569                 return 0;
570
571         }
572         catch(exception& e) {
573                 m->errorOut(e, "SplitAbundCommand", "splitNames");
574                 exit(1);
575         }
576 }
577 /**********************************************************************************************************************/
578 int SplitAbundCommand::readNamesFile() { 
579         try {
580                 //open input file
581                 ifstream in;
582                 m->openInputFile(namefile, in);
583                 
584                 while (!in.eof()) {
585                         if (m->control_pressed) { break; }
586                         
587                         string firstCol, secondCol;
588                         in >> firstCol >> secondCol; m->gobble(in);
589                         
590                         nameMap[firstCol] = secondCol;
591                 }
592                 in.close();
593                                 
594                 return 0;
595
596         }
597         catch(exception& e) {
598                 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
599                 exit(1);
600         }
601 }
602 /**********************************************************************************************************************/
603 int SplitAbundCommand::createNameMap(ListVector* thisList) {
604         try {
605                 
606                 if (thisList != NULL) {
607                         for (int i = 0; i < thisList->getNumBins(); i++) {
608                                 if (m->control_pressed) { return 0; }
609                                 
610                                 string bin = thisList->get(i);
611                                                         
612                                 vector<string> names;
613                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
614                                 
615                                 for (int j = 0; j < names.size(); j++) {  nameMap[names[j]] = names[j];  }
616                         }//end for
617                 }
618                 
619                 return 0;
620         }
621         catch(exception& e) {
622                 m->errorOut(e, "SplitAbundCommand", "createNameMap");
623                 exit(1);
624         }
625 }
626 /**********************************************************************************************************************/
627 int SplitAbundCommand::writeNames() { //namefile
628         try {
629                 
630                 map<string, ofstream*> filehandles;
631
632                 if (Groups.size() == 0) {
633                         ofstream aout;
634                         ofstream rout;
635                         
636                         string rare = outputDir + m->getRootName(m->getSimpleName(namefile))  + "rare.names";
637                         m->openOutputFile(rare, rout);
638                         outputNames.push_back(rare); outputTypes["name"].push_back(rare);
639                         
640                         string abund = outputDir + m->getRootName(m->getSimpleName(namefile))  + "abund.names";
641                         m->openOutputFile(abund, aout);
642                         outputNames.push_back(abund); outputTypes["name"].push_back(abund);
643                         
644                         if (rareNames.size() != 0) {
645                                 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
646                                         rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
647                                 }
648                         }
649                         rout.close();
650                         
651                         if (abundNames.size() != 0) {
652                                 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
653                                         aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
654                                 }
655                         }
656                         aout.close();
657                         
658                 }else{ //parse names by abundance and group
659                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(namefile));
660                         ofstream* temp;
661                         ofstream* temp2;
662                         map<string, ofstream*> filehandles;
663                         map<string, ofstream*>::iterator it3;
664
665                         for (int i=0; i<Groups.size(); i++) {
666                                 temp = new ofstream;
667                                 filehandles[Groups[i]+".rare"] = temp;
668                                 temp2 = new ofstream;
669                                 filehandles[Groups[i]+".abund"] = temp2;
670                                 
671                                 m->openOutputFile(fileroot + Groups[i] + ".rare.names", *(filehandles[Groups[i]+".rare"]));
672                                 m->openOutputFile(fileroot + Groups[i] + ".abund.names", *(filehandles[Groups[i]+".abund"]));
673                         }
674                         
675                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
676                                 vector<string> names;
677                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
678                                 
679                                 string rareAbund;
680                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
681                                                 rareAbund = ".rare";
682                                 }else{ //you are a abund name
683                                                 rareAbund = ".abund";
684                                 }
685                                 
686                                 map<string, string> outputStrings;
687                                 map<string, string>::iterator itout;
688                                 for (int i = 0; i < names.size(); i++) {
689                                         
690                                         string group = groupMap->getGroup(names[i]);
691                                         
692                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
693                                                 itout = outputStrings.find(group+rareAbund);
694                                                 if (itout == outputStrings.end()) {  
695                                                         outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
696                                                 }else {   outputStrings[group+rareAbund] += "," + names[i]; }
697                                         }else if(group == "not found") {
698                                                 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
699                                         }
700                                 }
701                                 
702                                 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { *(filehandles[itout->first]) << itout->second << endl;     }
703                         }
704                         
705                         
706                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
707                                 (*(filehandles[it3->first])).close();
708                                 outputNames.push_back(fileroot + it3->first + ".names");  outputTypes["name"].push_back(fileroot + it3->first + ".names");
709                                 delete it3->second;
710                         }
711                 }
712                                 
713                 return 0;
714
715         }
716         catch(exception& e) {
717                 m->errorOut(e, "SplitAbundCommand", "writeNames");
718                 exit(1);
719         }
720 }
721 /**********************************************************************************************************************/
722 //just write the unique names - if a namesfile is given
723 int SplitAbundCommand::writeAccnos(string tag) { 
724         try {
725                 
726                 map<string, ofstream*> filehandles;
727                 
728                 if (Groups.size() == 0) {
729                         ofstream aout;
730                         ofstream rout;
731                         
732                         
733                         string rare = outputDir + m->getRootName(m->getSimpleName(inputFile))  + tag + "rare.accnos";
734                         m->openOutputFile(rare, rout);
735                         outputNames.push_back(rare); outputTypes["accnos"].push_back(rare); 
736                         
737                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
738                                 rout << (*itRare) << endl;
739                         }
740                         rout.close();
741                 
742                         string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag  + "abund.accnos";
743                         m->openOutputFile(abund, aout);
744                         outputNames.push_back(abund); outputTypes["accnos"].push_back(abund);
745                         
746                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
747                                 aout << (*itAbund) << endl;
748                         }
749                         aout.close();
750                         
751                 }else{ //parse names by abundance and group
752                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(inputFile));
753                         ofstream* temp;
754                         ofstream* temp2;
755                         map<string, ofstream*> filehandles;
756                         map<string, ofstream*>::iterator it3;
757                         
758                         for (int i=0; i<Groups.size(); i++) {
759                                 temp = new ofstream;
760                                 filehandles[Groups[i]+".rare"] = temp;
761                                 temp2 = new ofstream;
762                                 filehandles[Groups[i]+".abund"] = temp2;
763                                 
764                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.accnos", *(filehandles[Groups[i]+".rare"]));
765                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.accnos", *(filehandles[Groups[i]+".abund"]));
766                         }
767                         
768                         //write rare
769                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
770                                         string group = groupMap->getGroup(*itRare);
771                                         
772                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
773                                                 *(filehandles[group+".rare"]) << *itRare << endl;
774                                         }
775                         }
776                                 
777                         //write abund   
778                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
779                                         string group = groupMap->getGroup(*itAbund);
780                                         
781                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
782                                                 *(filehandles[group+".abund"]) << *itAbund << endl;
783                                         }
784                         }
785                         
786                         //close files
787                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
788                                 (*(filehandles[it3->first])).close();
789                                 outputNames.push_back(fileroot + tag + it3->first + ".accnos");  outputTypes["accnos"].push_back(fileroot + tag + it3->first + ".accnos");
790                                 delete it3->second;
791                         }
792                 }
793                                 
794                 return 0;
795
796         }
797         catch(exception& e) {
798                 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
799                 exit(1);
800         }
801 }
802 /**********************************************************************************************************************/
803 int SplitAbundCommand::parseGroup(string tag) { //namefile
804         try {
805                 
806                 map<string, ofstream*> filehandles;
807         
808                 if (Groups.size() == 0) {
809                         ofstream aout;
810                         ofstream rout;
811                         
812                         string rare = outputDir + m->getRootName(m->getSimpleName(groupfile))  + tag + "rare.groups";
813                         m->openOutputFile(rare, rout);
814                         outputNames.push_back(rare); outputTypes["group"].push_back(rare);
815                 
816                         string abund = outputDir + m->getRootName(m->getSimpleName(groupfile))  + tag + "abund.groups";
817                         m->openOutputFile(abund, aout);
818                         outputNames.push_back(abund); outputTypes["group"].push_back(abund);
819                         
820                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
821                                 vector<string> names;
822                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
823                                 
824                                 for (int i = 0; i < names.size(); i++) {
825                                 
826                                         string group = groupMap->getGroup(names[i]);
827                                 
828                                         if (group == "not found") { 
829                                                 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
830                                         }else {
831                                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
832                                                         rout << names[i] << '\t' << group << endl;
833                                                 }else{ //you are a abund name
834                                                         aout << names[i] << '\t' << group << endl;
835                                                 }
836                                         }
837                                 }
838                         }
839                         
840                         rout.close(); 
841                         aout.close(); 
842
843                 }else{ //parse names by abundance and group
844                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(groupfile));
845                         ofstream* temp;
846                         ofstream* temp2;
847                         map<string, ofstream*> filehandles;
848                         map<string, ofstream*>::iterator it3;
849
850                         for (int i=0; i<Groups.size(); i++) {
851                                 temp = new ofstream;
852                                 filehandles[Groups[i]+".rare"] = temp;
853                                 temp2 = new ofstream;
854                                 filehandles[Groups[i]+".abund"] = temp2;
855                                 
856                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.groups", *(filehandles[Groups[i]+".rare"]));
857                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.groups", *(filehandles[Groups[i]+".abund"]));
858                         }
859                         
860                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
861                                 vector<string> names;
862                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
863                                 
864                                 string rareAbund;
865                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
866                                         rareAbund = ".rare";
867                                 }else{ //you are a abund name
868                                         rareAbund = ".abund";
869                                 }
870                                 
871                                 for (int i = 0; i < names.size(); i++) {
872                                 
873                                         string group = groupMap->getGroup(names[i]);
874                                                                         
875                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
876                                                 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
877                                         }
878                                 }
879                         }
880                         
881                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
882                                 (*(filehandles[it3->first])).close();
883                                 outputNames.push_back(fileroot + tag + it3->first + ".groups");  outputTypes["group"].push_back(fileroot + tag + it3->first + ".groups");
884                                 delete it3->second;
885                         }
886                 }
887                                 
888                 return 0;
889
890         }
891         catch(exception& e) {
892                 m->errorOut(e, "SplitAbundCommand", "parseGroups");
893                 exit(1);
894         }
895 }
896 /**********************************************************************************************************************/
897 int SplitAbundCommand::parseFasta(string tag) { //namefile
898         try {
899                 
900                 map<string, ofstream*> filehandles;
901                 
902                 if (Groups.size() == 0) {
903                         ofstream aout;
904                         ofstream rout;
905                         
906                         string rare = outputDir + m->getRootName(m->getSimpleName(fastafile))  + tag + "rare.fasta";
907                         m->openOutputFile(rare, rout);
908                         outputNames.push_back(rare); outputTypes["fasta"].push_back(rare);
909                 
910                         string abund = outputDir + m->getRootName(m->getSimpleName(fastafile))  + tag + "abund.fasta";
911                         m->openOutputFile(abund, aout);
912                         outputNames.push_back(abund); outputTypes["fasta"].push_back(abund);
913                 
914                         //open input file
915                         ifstream in;
916                         m->openInputFile(fastafile, in);
917         
918                         while (!in.eof()) {
919                                 if (m->control_pressed) { break; }
920                 
921                                 Sequence seq(in); m->gobble(in);
922                                 
923                                 if (seq.getName() != "") { 
924                                         
925                                         map<string, string>::iterator itNames;
926                                         
927                                         itNames = nameMap.find(seq.getName());
928                                         
929                                         if (itNames == nameMap.end()) {
930                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
931                                         }else{
932                                                 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
933                                                         seq.printSequence(rout);
934                                                 }else{ //you are a abund name
935                                                         seq.printSequence(aout);
936                                                 }
937                                         }
938                                 }
939                         }
940                         in.close();
941                         rout.close(); 
942                         aout.close(); 
943
944                 }else{ //parse names by abundance and group
945                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(fastafile));
946                         ofstream* temp;
947                         ofstream* temp2;
948                         map<string, ofstream*> filehandles;
949                         map<string, ofstream*>::iterator it3;
950
951                         for (int i=0; i<Groups.size(); i++) {
952                                 temp = new ofstream;
953                                 filehandles[Groups[i]+".rare"] = temp;
954                                 temp2 = new ofstream;
955                                 filehandles[Groups[i]+".abund"] = temp2;
956                                 
957                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare.fasta", *(filehandles[Groups[i]+".rare"]));
958                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund.fasta", *(filehandles[Groups[i]+".abund"]));
959                         }
960                         
961                         //open input file
962                         ifstream in;
963                         m->openInputFile(fastafile, in);
964         
965                         while (!in.eof()) {
966                                 if (m->control_pressed) { break; }
967                 
968                                 Sequence seq(in); m->gobble(in);
969                                 
970                                 if (seq.getName() != "") { 
971                                         map<string, string>::iterator itNames = nameMap.find(seq.getName());
972                                         
973                                         if (itNames == nameMap.end()) {
974                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
975                                         }else{
976                                                 vector<string> names;
977                                                 m->splitAtComma(itNames->second, names);  //parses bin into individual sequence names
978                                 
979                                                 string rareAbund;
980                                                 if (rareNames.count(itNames->first) != 0) { //you are a rare name
981                                                         rareAbund = ".rare";
982                                                 }else{ //you are a abund name
983                                                         rareAbund = ".abund";
984                                                 }
985                                 
986                                                 for (int i = 0; i < names.size(); i++) {
987                                 
988                                                         string group = groupMap->getGroup(seq.getName());
989                                         
990                                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
991                                                                 seq.printSequence(*(filehandles[group+rareAbund]));
992                                                         }else if(group == "not found") {
993                                                                 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
994                                                         }
995                                                 }
996                                         }
997                                 }
998                         }
999                         in.close();
1000                         
1001                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
1002                                 (*(filehandles[it3->first])).close();
1003                                 outputNames.push_back(fileroot + tag + it3->first + ".fasta");  outputTypes["fasta"].push_back(fileroot + tag + it3->first + ".fasta");
1004                                 delete it3->second;
1005                         }
1006                 }
1007                                 
1008                 return 0;
1009
1010         }
1011         catch(exception& e) {
1012                 m->errorOut(e, "SplitAbundCommand", "parseFasta");
1013                 exit(1);
1014         }
1015 }
1016 /**********************************************************************************************************************/
1017