]> git.donarmstrong.com Git - mothur.git/blob - splitabundcommand.cpp
added load.logfile command. changed summary.single output for subsample=t.
[mothur.git] / splitabundcommand.cpp
1 /*
2  *  splitabundcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/17/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "splitabundcommand.h"
11
12 //**********************************************************************************************************************
13 vector<string> SplitAbundCommand::setParameters(){      
14         try {           
15                 CommandParameter pfasta("fasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
16                 CommandParameter pname("name", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(pname);
17                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
18                 CommandParameter plist("list", "InputTypes", "", "", "none", "FNGLT", "none",false,false); parameters.push_back(plist);
19                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
20                 CommandParameter pcutoff("cutoff", "Number", "", "0", "", "", "",false,true); parameters.push_back(pcutoff);
21                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
22                 CommandParameter paccnos("accnos", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(paccnos);
23                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
24                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
25                 
26                 vector<string> myArray;
27                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
28                 return myArray;
29         }
30         catch(exception& e) {
31                 m->errorOut(e, "SplitAbundCommand", "setParameters");
32                 exit(1);
33         }
34 }
35 //**********************************************************************************************************************
36 string SplitAbundCommand::getHelpString(){      
37         try {
38                 string helpString = "";
39                 helpString += "The split.abund command reads a fasta file and a list or a names file splits the sequences into rare and abundant groups. \n";
40                 helpString += "The split.abund command parameters are fasta, list, name, cutoff, group, label, groups, cutoff and accnos.\n";
41                 helpString += "The fasta and a list or name parameter are required, and you must provide a cutoff value.\n";
42                 helpString += "The cutoff parameter is used to qualify what is abundant and rare.\n";
43                 helpString += "The group parameter allows you to parse a group file into rare and abundant groups.\n";
44                 helpString += "The label parameter is used to read specific labels in your listfile you want to use.\n";
45                 helpString += "The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n";
46                 helpString += "The groups parameter allows you to parse the files into rare and abundant files by group.  \n";
47                 helpString += "For example if you set groups=A-B-C, you will get a .A.abund, .A.rare, .B.abund, .B.rare, .C.abund, .C.rare files.  \n";
48                 helpString += "If you want .abund and .rare files for all groups, set groups=all.  \n";
49                 helpString += "The split.abund command should be used in the following format: split.abund(fasta=yourFasta, list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n";
50                 helpString += "Example: split.abund(fasta=abrecovery.fasta, list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n";
51                 helpString += "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n";
52                 return helpString;
53         }
54         catch(exception& e) {
55                 m->errorOut(e, "SplitAbundCommand", "getHelpString");
56                 exit(1);
57         }
58 }
59 //**********************************************************************************************************************
60 string SplitAbundCommand::getOutputFileNameTag(string type, string inputName=""){       
61         try {
62         string outputFileName = "";
63                 map<string, vector<string> >::iterator it;
64         
65         //is this a type this command creates
66         it = outputTypes.find(type);
67         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
68         else {
69             if (type == "fasta")            {   outputFileName =  "fasta";   }
70             else if (type == "list")    {   outputFileName =  "list";   }
71             else if (type == "name")        {   outputFileName =  "names";   }
72             else if (type == "group")       {   outputFileName =  "groups";   }
73             else if (type == "accnos")        {   outputFileName =  "accnos";   }
74             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
75         }
76         return outputFileName;
77         }
78         catch(exception& e) {
79                 m->errorOut(e, "SplitAbundCommand", "getOutputFileNameTag");
80                 exit(1);
81         }
82 }
83 //**********************************************************************************************************************
84 SplitAbundCommand::SplitAbundCommand(){ 
85         try {
86                 abort = true; calledHelp = true; 
87                 setParameters();
88                 vector<string> tempOutNames;
89                 outputTypes["list"] = tempOutNames;
90                 outputTypes["name"] = tempOutNames;
91                 outputTypes["accnos"] = tempOutNames;
92                 outputTypes["group"] = tempOutNames;
93                 outputTypes["fasta"] = tempOutNames;
94         }
95         catch(exception& e) {
96                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
97                 exit(1);
98         }
99 }
100 //**********************************************************************************************************************
101 SplitAbundCommand::SplitAbundCommand(string option)  {
102         try {
103                 abort = false; calledHelp = false;   
104                 allLines = 1;
105                         
106                 //allow user to run help
107                 if(option == "help") { help(); abort = true; calledHelp = true; }
108                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
109                 else {
110                         vector<string> myArray = setParameters();
111                         
112                         OptionParser parser(option);
113                         map<string, string> parameters = parser.getParameters();
114                         
115                         ValidParameters validParameter;
116                         map<string, string>::iterator it;
117                 
118                         //check to make sure all parameters are valid for command
119                         for (it = parameters.begin(); it != parameters.end(); it++) { 
120                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
121                         }
122                         
123                         //initialize outputTypes
124                         vector<string> tempOutNames;
125                         outputTypes["list"] = tempOutNames;
126                         outputTypes["name"] = tempOutNames;
127                         outputTypes["accnos"] = tempOutNames;
128                         outputTypes["group"] = tempOutNames;
129                         outputTypes["fasta"] = tempOutNames;                    
130                                                                                                 
131                         //if the user changes the input directory command factory will send this info to us in the output parameter 
132                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
133                         if (inputDir == "not found"){   inputDir = "";          }
134                         else {
135                                 string path;
136                                 it = parameters.find("list");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
142                                 }
143                                 
144                                 it = parameters.find("group");
145                                 //user has given a template file
146                                 if(it != parameters.end()){ 
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
150                                 }
151                                 
152                                 it = parameters.find("fasta");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
158                                 }
159                                 
160                                 it = parameters.find("name");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
166                                 }
167
168                         }
169
170                         
171                         //if the user changes the output directory command factory will send this info to us in the output parameter 
172                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
173
174                         //check for required parameters
175                         listfile = validParameter.validFile(parameters, "list", true);
176                         if (listfile == "not open") { abort = true; }
177                         else if (listfile == "not found") { listfile = ""; }
178                         else{ inputFile = listfile; m->setListFile(listfile); } 
179                         
180                         namefile = validParameter.validFile(parameters, "name", true);
181                         if (namefile == "not open") { abort = true; }
182                         else if (namefile == "not found") { namefile = ""; }    
183                         else{ inputFile = namefile; m->setNameFile(namefile); } 
184                 
185                         fastafile = validParameter.validFile(parameters, "fasta", true);
186                         if (fastafile == "not open") { abort = true; }
187                         else if (fastafile == "not found") {                            
188                                 fastafile = m->getFastaFile(); 
189                                 if (fastafile != "") { m->mothurOut("Using " + fastafile + " as input file for the fasta parameter."); m->mothurOutEndLine(); }
190                                 else {  m->mothurOut("You have no current fastafile and the fasta parameter is required."); m->mothurOutEndLine(); abort = true; }
191                         }else { m->setFastaFile(fastafile); }   
192                         
193                         groupfile = validParameter.validFile(parameters, "group", true);
194                         if (groupfile == "not open") {  groupfile = ""; abort = true; } 
195                         else if (groupfile == "not found") { groupfile = ""; }
196                         else {  
197                                 groupMap = new GroupMap(groupfile);
198                                 
199                                 int error = groupMap->readMap();
200                                 if (error == 1) { abort = true; }
201                                 m->setGroupFile(groupfile);
202                         }
203                         
204                         groups = validParameter.validFile(parameters, "groups", false);         
205                         if (groups == "not found") { groups = ""; }
206                         else if (groups == "all") { 
207                                 if (groupfile != "") {  Groups = groupMap->getNamesOfGroups();  } 
208                                 else {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";   }
209                         }else { 
210                                 m->splitAtDash(groups, Groups);
211                         }
212                         
213                         if ((groupfile == "") && (groups != "")) {  m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = "";  Groups.clear(); }
214                         
215                         //do you have all files needed
216                         if ((listfile == "") && (namefile == "")) { 
217                                 namefile = m->getNameFile(); 
218                                 if (namefile != "") { m->mothurOut("Using " + namefile + " as input file for the name parameter."); m->mothurOutEndLine(); }
219                                 else {                          
220                                         listfile = m->getListFile(); 
221                                         if (listfile != "") { m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
222                                         else {  m->mothurOut("You have no current list or namefile and the list or name parameter is required."); m->mothurOutEndLine(); abort = true; }
223                                 }
224                         }
225                         
226                         //check for optional parameter and set defaults
227                         // ...at some point should added some additional type checking...
228                         label = validParameter.validFile(parameters, "label", false);                   
229                         if (label == "not found") { label = "";  allLines = 1; }
230                         else { 
231                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
232                                 else { allLines = 1;  }
233                         }
234                         
235                         string temp = validParameter.validFile(parameters, "accnos", false);            if (temp == "not found") { temp = "F"; }
236                         accnos = m->isTrue(temp); 
237                         
238                         temp = validParameter.validFile(parameters, "cutoff", false);                           if (temp == "not found") { temp = "0"; }
239                         m->mothurConvert(temp, cutoff); 
240
241                         if (cutoff == 0) {  m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
242                 }
243
244         }
245         catch(exception& e) {
246                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
247                 exit(1);
248         }
249 }
250 //**********************************************************************************************************************
251 SplitAbundCommand::~SplitAbundCommand(){ 
252         if (groupfile != "") {  delete groupMap;  } 
253 }
254 //**********************************************************************************************************************
255 int SplitAbundCommand::execute(){
256         try {
257         
258                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
259                 
260                 if (listfile != "") { //you are using a listfile to determine abundance
261                         if (outputDir == "") { outputDir = m->hasPath(listfile); }
262                         
263                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
264                         set<string> processedLabels;
265                         set<string> userLabels = labels;        
266                         
267                         input = new InputData(listfile, "list");
268                         list = input->getListVector();
269                         string lastLabel = list->getLabel();
270                         
271                         //do you have a namefile or do we need to similate one?
272                         if (namefile != "") {  readNamesFile();         }
273                         else                            { createNameMap(list);  }
274                         
275                         if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]); } return 0; }
276                         
277                         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
278                         
279                                 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     m->mothurRemove(outputNames[i]); } return 0; }
280                                 
281                                 if(allLines == 1 || labels.count(list->getLabel()) == 1){
282                                                 
283                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
284                                                 splitList(list);
285                                                                                         
286                                                 processedLabels.insert(list->getLabel());
287                                                 userLabels.erase(list->getLabel());
288                                 }
289                                 
290                                 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
291                                                 string saveLabel = list->getLabel();
292                                                 
293                                                 delete list;
294                                                 list = input->getListVector(lastLabel); //get new list vector to process
295                                                 
296                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
297                                                 splitList(list);
298                                                 
299                                                 processedLabels.insert(list->getLabel());
300                                                 userLabels.erase(list->getLabel());
301                                                 
302                                                 //restore real lastlabel to save below
303                                                 list->setLabel(saveLabel);
304                                 }
305                                 
306                         
307                                 lastLabel = list->getLabel();
308                                         
309                                 delete list;
310                                 list = input->getListVector(); //get new list vector to process
311                         }
312                         
313                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
314                         
315                         //output error messages about any remaining user labels
316                         set<string>::iterator it;
317                         bool needToRun = false;
318                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
319                                 m->mothurOut("Your file does not include the label " + *it); 
320                                 if (processedLabels.count(lastLabel) != 1) {
321                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
322                                         needToRun = true;
323                                 }else {
324                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
325                                 }
326
327                         }
328                         
329                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
330                         
331                         //run last label if you need to
332                         if (needToRun == true)  {
333                                 if (list != NULL) {     delete list;    }
334                                 list = input->getListVector(lastLabel); //get new list vector to process
335                                 
336                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
337                                 splitList(list);                
338                                 
339                                 delete list;
340                         }
341                         
342                         delete input;
343                         
344                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }      return 0;       }
345                                                                         
346                 }else { //you are using the namefile to determine abundance
347                         if (outputDir == "") { outputDir = m->hasPath(namefile); }
348                         
349                         splitNames(); 
350                         writeNames();
351                         
352                         string tag = "";
353                         if (groupfile != "")                            {  parseGroup(tag);             }
354                         if (accnos)                                                     {  writeAccnos(tag);    }
355                         if (fastafile != "")                            {  parseFasta(tag);             }
356                 }
357                 
358                 //set fasta file as new current fastafile
359                 string current = "";
360                 itTypes = outputTypes.find("fasta");
361                 if (itTypes != outputTypes.end()) {
362                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setFastaFile(current); }
363                 }
364                 
365                 itTypes = outputTypes.find("name");
366                 if (itTypes != outputTypes.end()) {
367                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setNameFile(current); }
368                 }
369                 
370                 itTypes = outputTypes.find("group");
371                 if (itTypes != outputTypes.end()) {
372                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setGroupFile(current); }
373                 }
374                 
375                 itTypes = outputTypes.find("list");
376                 if (itTypes != outputTypes.end()) {
377                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setListFile(current); }
378                 }
379                 
380                 itTypes = outputTypes.find("accnos");
381                 if (itTypes != outputTypes.end()) {
382                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setAccnosFile(current); }
383                 }
384                 
385                 m->mothurOutEndLine();
386                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
387                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
388                 m->mothurOutEndLine();
389                 
390                 return 0;
391         }
392         catch(exception& e) {
393                 m->errorOut(e, "SplitAbundCommand", "execute");
394                 exit(1);
395         }
396 }
397 /**********************************************************************************************************************/
398 int SplitAbundCommand::splitList(ListVector* thisList) {
399         try {
400                 rareNames.clear();
401                 abundNames.clear();
402                 
403                 //get rareNames and abundNames
404                 for (int i = 0; i < thisList->getNumBins(); i++) {
405                         if (m->control_pressed) { return 0; }
406                         
407                         string bin = thisList->get(i);
408                                                 
409                         vector<string> names;
410                         m->splitAtComma(bin, names);  //parses bin into individual sequence names
411                         int size = names.size();
412                                 
413                         if (size <= cutoff) {
414                                 for (int j = 0; j < names.size(); j++) {  rareNames.insert(names[j]);  }
415                         }else{
416                                 for (int j = 0; j < names.size(); j++) {  abundNames.insert(names[j]);  }
417                         }
418                 }//end for
419
420                 
421                 string tag = thisList->getLabel() + ".";
422                 
423                 writeList(thisList, tag);
424                 
425                 if (groupfile != "")                            {  parseGroup(tag);             }
426                 if (accnos)                                                     {  writeAccnos(tag);    }
427                 if (fastafile != "")                            {  parseFasta(tag);             }
428                 
429                 return 0;
430
431         }
432         catch(exception& e) {
433                 m->errorOut(e, "SplitAbundCommand", "splitList");
434                 exit(1);
435         }
436 }
437 /**********************************************************************************************************************/
438 int SplitAbundCommand::writeList(ListVector* thisList, string tag) { 
439         try {
440                 
441                 map<string, ofstream*> filehandles;
442                 
443                 if (Groups.size() == 0) {
444                         SAbundVector* sabund = new SAbundVector();
445                         *sabund = thisList->getSAbundVector();
446                 
447                         //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
448                         // and don't have to store the bins until you are done with the whole vector, this save alot of space.
449                         int numRareBins = 0;
450                         for (int i = 0; i <= sabund->getMaxRank(); i++) {
451                                 if (i > cutoff) { break; }
452                                 numRareBins += sabund->get(i);
453                         }
454                         int numAbundBins = thisList->getNumBins() - numRareBins;
455                         delete sabund;
456
457                         ofstream aout;
458                         ofstream rout;
459                         
460                         string rare = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "rare." + getOutputFileNameTag("list");
461                         m->openOutputFile(rare, rout);
462                         outputNames.push_back(rare); outputTypes["list"].push_back(rare);
463                         
464                         string abund = outputDir + m->getRootName(m->getSimpleName(listfile)) + tag + "abund." + getOutputFileNameTag("list");
465                         m->openOutputFile(abund, aout);
466                         outputNames.push_back(abund); outputTypes["list"].push_back(abund);
467
468                         if (rareNames.size() != 0)      {  rout << thisList->getLabel() << '\t' << numRareBins << '\t';         }
469                         if (abundNames.size() != 0) {   aout << thisList->getLabel() << '\t' << numAbundBins << '\t';   }
470
471                         for (int i = 0; i < thisList->getNumBins(); i++) {
472                                 if (m->control_pressed) { break; }
473                         
474                                 string bin = list->get(i); 
475                         
476                                 int size = m->getNumNames(bin);
477                         
478                                 if (size <= cutoff) {  rout << bin << '\t';  }
479                                 else                            {  aout << bin << '\t'; }
480                         }
481                         
482                         if (rareNames.size() != 0)      { rout << endl; }
483                         if (abundNames.size() != 0) { aout << endl; }
484                         
485                         rout.close();
486                         aout.close();
487                         
488                 }else{ //parse names by abundance and group
489                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(listfile));
490                         ofstream* temp;
491                         ofstream* temp2;
492                         //map<string, bool> wroteFile;
493                         map<string, ofstream*> filehandles;
494                         map<string, ofstream*>::iterator it3;
495
496                         for (int i=0; i<Groups.size(); i++) {
497                                 temp = new ofstream;
498                                 filehandles[Groups[i]+".rare"] = temp;
499                                 temp2 = new ofstream;
500                                 filehandles[Groups[i]+".abund"] = temp2;
501                                 
502                 string rareGroupFileName = fileroot + Groups[i] + tag + ".rare." + getOutputFileNameTag("list");
503                 string abundGroupFileName = fileroot + Groups[i] + tag + ".abund." + getOutputFileNameTag("list");
504                                 m->openOutputFile(rareGroupFileName, *(filehandles[Groups[i]+".rare"]));
505                                 m->openOutputFile(abundGroupFileName, *(filehandles[Groups[i]+".abund"]));
506                                 outputNames.push_back(rareGroupFileName); outputTypes["list"].push_back(rareGroupFileName);
507                                 outputNames.push_back(abundGroupFileName); outputTypes["list"].push_back(abundGroupFileName);
508                         }
509                         
510                         map<string, string> groupVector;
511                         map<string, string>::iterator itGroup;
512                         map<string, int> groupNumBins;
513                 
514                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
515                                 groupNumBins[it3->first] = 0;
516                                 groupVector[it3->first] = "";
517                         }
518                 
519                         for (int i = 0; i < thisList->getNumBins(); i++) {
520                                 if (m->control_pressed) { break; }
521                         
522                                 map<string, string> groupBins;
523                                 string bin = list->get(i); 
524                         
525                                 vector<string> names;
526                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
527                         
528                                 //parse bin into list of sequences in each group
529                                 for (int j = 0; j < names.size(); j++) {
530                                         string rareAbund;
531                                         if (rareNames.count(names[j]) != 0) { //you are a rare name
532                                                 rareAbund = ".rare";
533                                         }else{ //you are a abund name
534                                                 rareAbund = ".abund";
535                                         }
536                                         
537                                         string group = groupMap->getGroup(names[j]);
538                                 
539                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
540                                                 itGroup = groupBins.find(group+rareAbund);
541                                                 if(itGroup == groupBins.end()) {
542                                                         groupBins[group+rareAbund] = names[j];  //add first name
543                                                         groupNumBins[group+rareAbund]++;
544                                                 }else{ //add another name
545                                                         groupBins[group+rareAbund] +=  "," + names[j];
546                                                 }
547                                         }else if(group == "not found") {
548                                                 m->mothurOut(names[j] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
549                                         }
550                                 }
551                         
552                         
553                                 for (itGroup = groupBins.begin(); itGroup != groupBins.end(); itGroup++) {
554                                         groupVector[itGroup->first] +=  itGroup->second + '\t'; 
555                                 }
556                         }
557                         
558                         //end list vector
559                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) {
560                                 (*(filehandles[it3->first])) << thisList->getLabel() << '\t' << groupNumBins[it3->first] << '\t' << groupVector[it3->first] << endl;  // label numBins  listvector for that group
561                                 (*(filehandles[it3->first])).close();
562                                 delete it3->second;
563                         }
564                 }
565                 
566                 return 0;
567
568         }
569         catch(exception& e) {
570                 m->errorOut(e, "SplitAbundCommand", "writeList");
571                 exit(1);
572         }
573 }
574 /**********************************************************************************************************************/
575 int SplitAbundCommand::splitNames() { //namefile
576         try {
577                 
578                 rareNames.clear();
579                 abundNames.clear();     
580                         
581                 //open input file
582                 ifstream in;
583                 m->openInputFile(namefile, in);
584                 
585                 while (!in.eof()) {
586                         if (m->control_pressed) { break; }
587                         
588                         string firstCol, secondCol;
589                         in >> firstCol >> secondCol; m->gobble(in);
590                         
591                         nameMap[firstCol] = secondCol;
592                         
593                         int size = m->getNumNames(secondCol);
594                                 
595                         if (size <= cutoff) {
596                                 rareNames.insert(firstCol); 
597                         }else{
598                                 abundNames.insert(firstCol); 
599                         }
600                 }
601                 in.close();
602                                 
603                 return 0;
604
605         }
606         catch(exception& e) {
607                 m->errorOut(e, "SplitAbundCommand", "splitNames");
608                 exit(1);
609         }
610 }
611 /**********************************************************************************************************************/
612 int SplitAbundCommand::readNamesFile() { 
613         try {
614                 //open input file
615                 ifstream in;
616                 m->openInputFile(namefile, in);
617                 
618                 while (!in.eof()) {
619                         if (m->control_pressed) { break; }
620                         
621                         string firstCol, secondCol;
622                         in >> firstCol >> secondCol; m->gobble(in);
623                         
624                         nameMap[firstCol] = secondCol;
625                 }
626                 in.close();
627                                 
628                 return 0;
629
630         }
631         catch(exception& e) {
632                 m->errorOut(e, "SplitAbundCommand", "readNamesFile");
633                 exit(1);
634         }
635 }
636 /**********************************************************************************************************************/
637 int SplitAbundCommand::createNameMap(ListVector* thisList) {
638         try {
639                 
640                 if (thisList != NULL) {
641                         for (int i = 0; i < thisList->getNumBins(); i++) {
642                                 if (m->control_pressed) { return 0; }
643                                 
644                                 string bin = thisList->get(i);
645                                                         
646                                 vector<string> names;
647                                 m->splitAtComma(bin, names);  //parses bin into individual sequence names
648                                 
649                                 for (int j = 0; j < names.size(); j++) {  nameMap[names[j]] = names[j];  }
650                         }//end for
651                 }
652                 
653                 return 0;
654         }
655         catch(exception& e) {
656                 m->errorOut(e, "SplitAbundCommand", "createNameMap");
657                 exit(1);
658         }
659 }
660 /**********************************************************************************************************************/
661 int SplitAbundCommand::writeNames() { //namefile
662         try {
663                 
664                 map<string, ofstream*> filehandles;
665
666                 if (Groups.size() == 0) {
667                         ofstream aout;
668                         ofstream rout;
669                         
670                         string rare = outputDir + m->getRootName(m->getSimpleName(namefile))  + "rare." + getOutputFileNameTag("name");
671                         m->openOutputFile(rare, rout);
672                         outputNames.push_back(rare); outputTypes["name"].push_back(rare);
673                         
674                         string abund = outputDir + m->getRootName(m->getSimpleName(namefile))  + "abund." + getOutputFileNameTag("name");
675                         m->openOutputFile(abund, aout);
676                         outputNames.push_back(abund); outputTypes["name"].push_back(abund);
677                         
678                         if (rareNames.size() != 0) {
679                                 for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
680                                         rout << (*itRare) << '\t' << nameMap[(*itRare)] << endl;
681                                 }
682                         }
683                         rout.close();
684                         
685                         if (abundNames.size() != 0) {
686                                 for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
687                                         aout << (*itAbund) << '\t' << nameMap[(*itAbund)] << endl;
688                                 }
689                         }
690                         aout.close();
691                         
692                 }else{ //parse names by abundance and group
693                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(namefile));
694                         ofstream* temp;
695                         ofstream* temp2;
696                         map<string, ofstream*> filehandles;
697                         map<string, ofstream*>::iterator it3;
698
699                         for (int i=0; i<Groups.size(); i++) {
700                                 temp = new ofstream;
701                                 filehandles[Groups[i]+".rare"] = temp;
702                                 temp2 = new ofstream;
703                                 filehandles[Groups[i]+".abund"] = temp2;
704                                 
705                 string rareGroupFileName = fileroot + Groups[i] + ".rare." + getOutputFileNameTag("name");
706                 string abundGroupFileName = fileroot + Groups[i] + ".abund." + getOutputFileNameTag("name");
707                                 m->openOutputFile(rareGroupFileName, *(filehandles[Groups[i]+".rare"]));
708                                 m->openOutputFile(abundGroupFileName, *(filehandles[Groups[i]+".abund"]));
709                         }
710                         
711                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
712                                 vector<string> names;
713                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
714                                 
715                                 string rareAbund;
716                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
717                                                 rareAbund = ".rare";
718                                 }else{ //you are a abund name
719                                                 rareAbund = ".abund";
720                                 }
721                                 
722                                 map<string, string> outputStrings;
723                                 map<string, string>::iterator itout;
724                                 for (int i = 0; i < names.size(); i++) {
725                                         
726                                         string group = groupMap->getGroup(names[i]);
727                                         
728                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
729                                                 itout = outputStrings.find(group+rareAbund);
730                                                 if (itout == outputStrings.end()) {  
731                                                         outputStrings[group+rareAbund] = names[i] + '\t' + names[i];
732                                                 }else {   outputStrings[group+rareAbund] += "," + names[i]; }
733                                         }else if(group == "not found") {
734                                                 m->mothurOut(names[i] + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
735                                         }
736                                 }
737                                 
738                                 for (itout = outputStrings.begin(); itout != outputStrings.end(); itout++) { *(filehandles[itout->first]) << itout->second << endl;     }
739                         }
740                         
741                         
742                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
743                                 (*(filehandles[it3->first])).close();
744                                 outputNames.push_back(fileroot + it3->first + "." + getOutputFileNameTag("name"));  outputTypes["name"].push_back(fileroot + it3->first + "." + getOutputFileNameTag("name"));
745                                 delete it3->second;
746                         }
747                 }
748                                 
749                 return 0;
750
751         }
752         catch(exception& e) {
753                 m->errorOut(e, "SplitAbundCommand", "writeNames");
754                 exit(1);
755         }
756 }
757 /**********************************************************************************************************************/
758 //just write the unique names - if a namesfile is given
759 int SplitAbundCommand::writeAccnos(string tag) { 
760         try {
761                 
762                 map<string, ofstream*> filehandles;
763                 
764                 if (Groups.size() == 0) {
765                         ofstream aout;
766                         ofstream rout;
767                         
768                         
769                         string rare = outputDir + m->getRootName(m->getSimpleName(inputFile))  + tag + "rare." + getOutputFileNameTag("accnos");
770                         m->openOutputFile(rare, rout);
771                         outputNames.push_back(rare); outputTypes["accnos"].push_back(rare); 
772                         
773                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
774                                 rout << (*itRare) << endl;
775                         }
776                         rout.close();
777                 
778                         string abund = outputDir + m->getRootName(m->getSimpleName(inputFile)) + tag  + "abund." + getOutputFileNameTag("accnos");
779                         m->openOutputFile(abund, aout);
780                         outputNames.push_back(abund); outputTypes["accnos"].push_back(abund);
781                         
782                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
783                                 aout << (*itAbund) << endl;
784                         }
785                         aout.close();
786                         
787                 }else{ //parse names by abundance and group
788                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(inputFile));
789                         ofstream* temp;
790                         ofstream* temp2;
791                         map<string, ofstream*> filehandles;
792                         map<string, ofstream*>::iterator it3;
793                         
794                         for (int i=0; i<Groups.size(); i++) {
795                                 temp = new ofstream;
796                                 filehandles[Groups[i]+".rare"] = temp;
797                                 temp2 = new ofstream;
798                                 filehandles[Groups[i]+".abund"] = temp2;
799                                 
800                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare." + getOutputFileNameTag("accnos"), *(filehandles[Groups[i]+".rare"]));
801                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund." + getOutputFileNameTag("accnos"), *(filehandles[Groups[i]+".abund"]));
802                         }
803                         
804                         //write rare
805                         for (set<string>::iterator itRare = rareNames.begin(); itRare != rareNames.end(); itRare++) {
806                                         string group = groupMap->getGroup(*itRare);
807                                         
808                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
809                                                 *(filehandles[group+".rare"]) << *itRare << endl;
810                                         }
811                         }
812                                 
813                         //write abund   
814                         for (set<string>::iterator itAbund = abundNames.begin(); itAbund != abundNames.end(); itAbund++) {
815                                         string group = groupMap->getGroup(*itAbund);
816                                         
817                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
818                                                 *(filehandles[group+".abund"]) << *itAbund << endl;
819                                         }
820                         }
821                         
822                         //close files
823                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
824                                 (*(filehandles[it3->first])).close();
825                                 outputNames.push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("accnos"));  outputTypes["accnos"].push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("accnos"));
826                                 delete it3->second;
827                         }
828                 }
829                                 
830                 return 0;
831
832         }
833         catch(exception& e) {
834                 m->errorOut(e, "SplitAbundCommand", "writeAccnos");
835                 exit(1);
836         }
837 }
838 /**********************************************************************************************************************/
839 int SplitAbundCommand::parseGroup(string tag) { //namefile
840         try {
841                 
842                 map<string, ofstream*> filehandles;
843         
844                 if (Groups.size() == 0) {
845                         ofstream aout;
846                         ofstream rout;
847                         
848                         string rare = outputDir + m->getRootName(m->getSimpleName(groupfile))  + tag + "rare." + getOutputFileNameTag("group");
849                         m->openOutputFile(rare, rout);
850                         outputNames.push_back(rare); outputTypes["group"].push_back(rare);
851                 
852                         string abund = outputDir + m->getRootName(m->getSimpleName(groupfile))  + tag + "abund." + getOutputFileNameTag("group");
853 ;
854                         m->openOutputFile(abund, aout);
855                         outputNames.push_back(abund); outputTypes["group"].push_back(abund);
856                         
857                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
858                                 vector<string> names;
859                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
860                                 
861                                 for (int i = 0; i < names.size(); i++) {
862                                 
863                                         string group = groupMap->getGroup(names[i]);
864                                 
865                                         if (group == "not found") { 
866                                                 m->mothurOut(names[i] + " is not in your groupfile, ignoring, please correct."); m->mothurOutEndLine();
867                                         }else {
868                                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
869                                                         rout << names[i] << '\t' << group << endl;
870                                                 }else{ //you are a abund name
871                                                         aout << names[i] << '\t' << group << endl;
872                                                 }
873                                         }
874                                 }
875                         }
876                         
877                         rout.close(); 
878                         aout.close(); 
879
880                 }else{ //parse names by abundance and group
881                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(groupfile));
882                         ofstream* temp;
883                         ofstream* temp2;
884                         map<string, ofstream*> filehandles;
885                         map<string, ofstream*>::iterator it3;
886
887                         for (int i=0; i<Groups.size(); i++) {
888                                 temp = new ofstream;
889                                 filehandles[Groups[i]+".rare"] = temp;
890                                 temp2 = new ofstream;
891                                 filehandles[Groups[i]+".abund"] = temp2;
892                                 
893                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare." + getOutputFileNameTag("group"), *(filehandles[Groups[i]+".rare"]));
894                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund." + getOutputFileNameTag("group"), *(filehandles[Groups[i]+".abund"]));
895                         }
896                         
897                         for (map<string, string>::iterator itName = nameMap.begin(); itName != nameMap.end(); itName++) {                               
898                                 vector<string> names;
899                                 m->splitAtComma(itName->second, names);  //parses bin into individual sequence names
900                                 
901                                 string rareAbund;
902                                 if (rareNames.count(itName->first) != 0) { //you are a rare name
903                                         rareAbund = ".rare";
904                                 }else{ //you are a abund name
905                                         rareAbund = ".abund";
906                                 }
907                                 
908                                 for (int i = 0; i < names.size(); i++) {
909                                 
910                                         string group = groupMap->getGroup(names[i]);
911                                                                         
912                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
913                                                 *(filehandles[group+rareAbund]) << names[i] << '\t' << group << endl;
914                                         }
915                                 }
916                         }
917                         
918                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
919                                 (*(filehandles[it3->first])).close();
920                                 outputNames.push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("group"));  outputTypes["group"].push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("group"));
921                                 delete it3->second;
922                         }
923                 }
924                                 
925                 return 0;
926
927         }
928         catch(exception& e) {
929                 m->errorOut(e, "SplitAbundCommand", "parseGroups");
930                 exit(1);
931         }
932 }
933 /**********************************************************************************************************************/
934 int SplitAbundCommand::parseFasta(string tag) { //namefile
935         try {
936                 
937                 map<string, ofstream*> filehandles;
938                 
939                 if (Groups.size() == 0) {
940                         ofstream aout;
941                         ofstream rout;
942                         
943                         string rare = outputDir + m->getRootName(m->getSimpleName(fastafile))  + tag + "rare." + getOutputFileNameTag("fasta");
944                         m->openOutputFile(rare, rout);
945                         outputNames.push_back(rare); outputTypes["fasta"].push_back(rare);
946                 
947                         string abund = outputDir + m->getRootName(m->getSimpleName(fastafile))  + tag + "abund." + getOutputFileNameTag("fasta");
948                         m->openOutputFile(abund, aout);
949                         outputNames.push_back(abund); outputTypes["fasta"].push_back(abund);
950                 
951                         //open input file
952                         ifstream in;
953                         m->openInputFile(fastafile, in);
954         
955                         while (!in.eof()) {
956                                 if (m->control_pressed) { break; }
957                 
958                                 Sequence seq(in); m->gobble(in);
959                                 
960                                 if (seq.getName() != "") { 
961                                         
962                                         map<string, string>::iterator itNames;
963                                         
964                                         itNames = nameMap.find(seq.getName());
965                                         
966                                         if (itNames == nameMap.end()) {
967                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
968                                         }else{
969                                                 if (rareNames.count(seq.getName()) != 0) { //you are a rare name
970                                                         seq.printSequence(rout);
971                                                 }else{ //you are a abund name
972                                                         seq.printSequence(aout);
973                                                 }
974                                         }
975                                 }
976                         }
977                         in.close();
978                         rout.close(); 
979                         aout.close(); 
980
981                 }else{ //parse names by abundance and group
982                         string fileroot =  outputDir + m->getRootName(m->getSimpleName(fastafile));
983                         ofstream* temp;
984                         ofstream* temp2;
985                         map<string, ofstream*> filehandles;
986                         map<string, ofstream*>::iterator it3;
987
988                         for (int i=0; i<Groups.size(); i++) {
989                                 temp = new ofstream;
990                                 filehandles[Groups[i]+".rare"] = temp;
991                                 temp2 = new ofstream;
992                                 filehandles[Groups[i]+".abund"] = temp2;
993                                 
994                                 m->openOutputFile(fileroot + tag + Groups[i] + ".rare." + getOutputFileNameTag("fasta"), *(filehandles[Groups[i]+".rare"]));
995                                 m->openOutputFile(fileroot + tag + Groups[i] + ".abund." + getOutputFileNameTag("fasta"), *(filehandles[Groups[i]+".abund"]));
996                         }
997                         
998                         //open input file
999                         ifstream in;
1000                         m->openInputFile(fastafile, in);
1001         
1002                         while (!in.eof()) {
1003                                 if (m->control_pressed) { break; }
1004                 
1005                                 Sequence seq(in); m->gobble(in);
1006                                 
1007                                 if (seq.getName() != "") { 
1008                                         map<string, string>::iterator itNames = nameMap.find(seq.getName());
1009                                         
1010                                         if (itNames == nameMap.end()) {
1011                                                 m->mothurOut(seq.getName() + " is not in your namesfile, ignoring."); m->mothurOutEndLine();
1012                                         }else{
1013                                                 vector<string> names;
1014                                                 m->splitAtComma(itNames->second, names);  //parses bin into individual sequence names
1015                                 
1016                                                 string rareAbund;
1017                                                 if (rareNames.count(itNames->first) != 0) { //you are a rare name
1018                                                         rareAbund = ".rare";
1019                                                 }else{ //you are a abund name
1020                                                         rareAbund = ".abund";
1021                                                 }
1022                                 
1023                                                 for (int i = 0; i < names.size(); i++) {
1024                                 
1025                                                         string group = groupMap->getGroup(seq.getName());
1026                                         
1027                                                         if (m->inUsersGroups(group, Groups)) { //only add if this is in a group we want
1028                                                                 seq.printSequence(*(filehandles[group+rareAbund]));
1029                                                         }else if(group == "not found") {
1030                                                                 m->mothurOut(seq.getName() + " is not in your groupfile. Ignoring."); m->mothurOutEndLine();
1031                                                         }
1032                                                 }
1033                                         }
1034                                 }
1035                         }
1036                         in.close();
1037                         
1038                         for (it3 = filehandles.begin(); it3 != filehandles.end(); it3++) { 
1039                                 (*(filehandles[it3->first])).close();
1040                                 outputNames.push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("fasta"));  outputTypes["fasta"].push_back(fileroot + tag + it3->first + "." + getOutputFileNameTag("fasta"));
1041                                 delete it3->second;
1042                         }
1043                 }
1044                                 
1045                 return 0;
1046
1047         }
1048         catch(exception& e) {
1049                 m->errorOut(e, "SplitAbundCommand", "parseFasta");
1050                 exit(1);
1051         }
1052 }
1053 /**********************************************************************************************************************/
1054