]> git.donarmstrong.com Git - mothur.git/blob - splitabundcommand.cpp
added split.abund comand
[mothur.git] / splitabundcommand.cpp
1 /*
2  *  splitabundcommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 5/17/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "splitabundcommand.h"
11
12 //**********************************************************************************************************************
13 SplitAbundCommand::SplitAbundCommand(string option)  {
14         try {
15                 abort = false;
16                 wroteRareList = false;
17                 wroteAbundList = false;
18                 allLines = 1;
19                         
20                 //allow user to run help
21                 if(option == "help") { help(); abort = true; }
22                 
23                 else {
24                         //valid paramters for this command
25                         string Array[] =  {"list","name","group","label","accnos","cutoff","outputdir","inputdir"};
26                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
27                         
28                         OptionParser parser(option);
29                         map<string, string> parameters = parser.getParameters();
30                         
31                         ValidParameters validParameter;
32                         map<string, string>::iterator it;
33                 
34                         //check to make sure all parameters are valid for command
35                         for (it = parameters.begin(); it != parameters.end(); it++) { 
36                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
37                         }
38                                                 
39                         //if the user changes the input directory command factory will send this info to us in the output parameter 
40                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
41                         if (inputDir == "not found"){   inputDir = "";          }
42                         else {
43                                 string path;
44                                 it = parameters.find("list");
45                                 //user has given a template file
46                                 if(it != parameters.end()){ 
47                                         path = hasPath(it->second);
48                                         //if the user has not given a path then, add inputdir. else leave path alone.
49                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
50                                 }
51                                 
52                                 it = parameters.find("group");
53                                 //user has given a template file
54                                 if(it != parameters.end()){ 
55                                         path = hasPath(it->second);
56                                         //if the user has not given a path then, add inputdir. else leave path alone.
57                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
58                                 }
59                                 
60                                 it = parameters.find("name");
61                                 //user has given a template file
62                                 if(it != parameters.end()){ 
63                                         path = hasPath(it->second);
64                                         //if the user has not given a path then, add inputdir. else leave path alone.
65                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
66                                 }
67
68                         }
69
70                         
71                         //if the user changes the output directory command factory will send this info to us in the output parameter 
72                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
73
74                         //check for required parameters
75                         listfile = validParameter.validFile(parameters, "list", true);
76                         if (listfile == "not open") { abort = true; }
77                         else if (listfile == "not found") { listfile = ""; }    
78                         
79                         //check for required parameters
80                         namefile = validParameter.validFile(parameters, "name", true);
81                         if (namefile == "not open") { abort = true; }
82                         else if (namefile == "not found") { namefile = ""; }    
83                         
84                         groupfile = validParameter.validFile(parameters, "group", true);
85                         if (groupfile == "not open") { abort = true; }  
86                         else if (groupfile == "not found") { groupfile = ""; }
87                         else {  
88                                 groupMap = new GroupMap(groupfile);
89                                 
90                                 int error = groupMap->readMap();
91                                 if (error == 1) { abort = true; }
92                         }
93                         
94                         //do you have all files needed
95                         if ((listfile == "") && (namefile == "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
96                         if ((listfile != "") && (namefile != "")) { m->mothurOut("You must either a listfile or a namefile for the split.abund command, but NOT BOTH. "); m->mothurOutEndLine(); abort = true;  }
97                         
98                         //check for optional parameter and set defaults
99                         // ...at some point should added some additional type checking...
100                         label = validParameter.validFile(parameters, "label", false);                   
101                         if (label == "not found") { label = "";  allLines = 1; }
102                         else { 
103                                 if(label != "all") {  splitAtDash(label, labels);  allLines = 0;  }
104                                 else { allLines = 1;  }
105                         }
106                         
107                         string temp = validParameter.validFile(parameters, "accnos", false);            if (temp == "not found") { temp = "F"; }
108                         accnos = isTrue(temp); 
109                         
110                         temp = validParameter.validFile(parameters, "cutoff", false);                           if (temp == "not found") { temp = "0"; }
111                         convert(temp, cutoff); 
112
113                         if (cutoff == 0) {  m->mothurOut("You must provide a cutoff to qualify what is abundant for the split.abund command. "); m->mothurOutEndLine(); abort = true;  }
114
115                 }
116
117         }
118         catch(exception& e) {
119                 m->errorOut(e, "SplitAbundCommand", "SplitAbundCommand");
120                 exit(1);
121         }
122 }
123 //**********************************************************************************************************************
124 void SplitAbundCommand::help(){
125         try {
126                 m->mothurOut("The split.abund command reads a list or a names file splits the sequences into rare and abundant groups.. \n");
127                 m->mothurOut("The split.abund command parameters are list, name, cutoff, group, label and accnos.\n");
128                 m->mothurOut("The list or name parameter is required, and you must provide a cutoff value.\n");
129                 m->mothurOut("The cutoff parameter is used to qualify what is abundant and rare.\n");
130                 m->mothurOut("The group parameter allows you to parse a group file into rare and abundant groups.\n");
131                 m->mothurOut("The label parameter is used to read specific labels in your listfile you want to use.\n");
132                 m->mothurOut("The accnos parameter allows you to output a .rare.accnos and .abund.accnos files to use with the get.seqs and remove.seqs commands.\n");
133                 m->mothurOut("The split.abund command should be used in the following format: split.abund(list=yourListFile, group=yourGroupFile, label=yourLabels, cutoff=yourCutoff).\n");
134                 m->mothurOut("Example: split.abundt(list=abrecovery.fn.list, group=abrecovery.groups, label=0.03, cutoff=2).\n");
135                 m->mothurOut("Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile).\n\n");
136
137         }
138         catch(exception& e) {
139                 m->errorOut(e, "SplitAbundCommand", "help");
140                 exit(1);
141         }
142 }
143 //**********************************************************************************************************************
144 SplitAbundCommand::~SplitAbundCommand(){ 
145         if (groupfile != "") {  delete groupMap;  } 
146 }
147 //**********************************************************************************************************************
148 int SplitAbundCommand::execute(){
149         try {
150         
151                 if (abort == true) {    return 0;       }
152                 
153                 if (namefile != "") {  split();  }
154                 else {
155                 
156                         //remove old files so you can append later....
157                         string fileroot = outputDir + getRootName(getSimpleName(listfile));
158                         remove((fileroot + "rare.list").c_str());
159                         remove((fileroot + "abund.list").c_str());
160                         
161                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
162                         set<string> processedLabels;
163                         set<string> userLabels = labels;        
164                         
165                         input = new InputData(listfile, "list");
166                         list = input->getListVector();
167                         string lastLabel = list->getLabel();
168                         
169                         if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
170                         
171                         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
172                         
173                                 if (m->control_pressed) { delete input; delete list; for (int i = 0; i < outputNames.size(); i++) {     remove(outputNames[i].c_str()); } return 0; }
174                                 
175                                 if(allLines == 1 || labels.count(list->getLabel()) == 1){
176                                                 
177                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
178                                                 split(list);
179                                                                                         
180                                                 processedLabels.insert(list->getLabel());
181                                                 userLabels.erase(list->getLabel());
182                                 }
183                                 
184                                 if ((anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
185                                                 string saveLabel = list->getLabel();
186                                                 
187                                                 delete list;
188                                                 list = input->getListVector(lastLabel); //get new list vector to process
189                                                 
190                                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
191                                                 split(list);
192                                                 
193                                                 processedLabels.insert(list->getLabel());
194                                                 userLabels.erase(list->getLabel());
195                                                 
196                                                 //restore real lastlabel to save below
197                                                 list->setLabel(saveLabel);
198                                 }
199                                 
200                         
201                                 lastLabel = list->getLabel();
202                                         
203                                 delete list;
204                                 list = input->getListVector(); //get new list vector to process
205                         }
206                         
207                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
208                         
209                         //output error messages about any remaining user labels
210                         set<string>::iterator it;
211                         bool needToRun = false;
212                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
213                                 m->mothurOut("Your file does not include the label " + *it); 
214                                 if (processedLabels.count(lastLabel) != 1) {
215                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
216                                         needToRun = true;
217                                 }else {
218                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
219                                 }
220
221                         }
222                         
223                         if (m->control_pressed) { delete input;  for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; }
224                         
225                         //run last label if you need to
226                         if (needToRun == true)  {
227                                 if (list != NULL) {     delete list;    }
228                                 list = input->getListVector(lastLabel); //get new list vector to process
229                                 
230                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
231                                 split(list);            
232                                 
233                                 delete list;
234                         }
235                         
236                         delete input;
237                         
238                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str()); }       return 0;       }
239                         
240                         if (wroteAbundList) {  outputNames.push_back(fileroot + "abund.list");          }
241                         if (wroteRareList)      {  outputNames.push_back(fileroot + "rare.list");               }
242                 }
243                 
244                 
245                 m->mothurOutEndLine();
246                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
247                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
248                 m->mothurOutEndLine();
249                 
250                 return 0;
251         }
252         catch(exception& e) {
253                 m->errorOut(e, "SplitAbundCommand", "execute");
254                 exit(1);
255         }
256 }
257 /**********************************************************************************************************************/
258 int SplitAbundCommand::split(ListVector* thisList) {
259         try {
260         
261                 SAbundVector* sabund = new SAbundVector();
262                 *sabund = thisList->getSAbundVector();
263                 
264                 //find out how many bins are rare and how many are abundant so you can process the list vector one bin at a time
265                 // and don't have to store the bins until you are done with the whole vector, this save alot of space.
266                 int numRareBins = 0;
267                 for (int i = 0; i <= sabund->getMaxRank(); i++) {
268                         if (i > cutoff) { break; }
269                         numRareBins += sabund->get(i);
270                 }
271                 int numAbundBins = thisList->getNumBins() - numRareBins;
272                 delete sabund;
273                 
274                 //setup output files
275                 ofstream outListAbund;
276                 ofstream outListRare;
277                 ofstream outGroupRare;
278                 ofstream outGroupAbund;
279                 ofstream outAccnosRare;
280                 ofstream outAccnosAbund;
281                 
282                 string fileroot = outputDir + getRootName(getSimpleName(listfile));
283                 if (numRareBins > 0) {
284                         wroteRareList = true;
285                         string listRareName = fileroot + "rare.list";
286                         openOutputFileAppend(listRareName, outListRare);
287                         outListRare << thisList->getLabel() << '\t' << numRareBins << '\t';
288                         
289                         if (accnos) {
290                                 string accnosName = fileroot + thisList->getLabel() + ".rare.accnos";
291                                 openOutputFile(accnosName, outAccnosRare);
292                                 outputNames.push_back(accnosName);
293                         }
294                         
295                         if (groupfile != "") {
296                                 string groupFileName = outputDir + getRootName(getSimpleName(groupfile)) + thisList->getLabel() + ".rare.group";
297                                 openOutputFile(groupFileName, outGroupRare);
298                                 outputNames.push_back(groupFileName);
299                         }
300                 }
301                 
302                 if (numAbundBins > 0) {
303                         wroteAbundList = true;
304                         string listAbundName = fileroot + "abund.list";
305                         openOutputFileAppend(listAbundName, outListAbund);
306                         outListAbund << thisList->getLabel() << '\t' << numAbundBins << '\t';
307                         
308                         if (accnos) {
309                                 string accnosName = fileroot + thisList->getLabel() + ".abund.accnos";
310                                 openOutputFile(accnosName, outAccnosAbund);
311                                 outputNames.push_back(accnosName);
312                         }
313                         
314                         if (groupfile != "") {
315                                 string groupFileName = outputDir + getRootName(getSimpleName(groupfile)) + thisList->getLabel() + ".abund.group";
316                                 openOutputFile(groupFileName, outGroupAbund);
317                                 outputNames.push_back(groupFileName);
318                         }
319                 }
320                 
321                 for (int i = 0; i < thisList->getNumBins(); i++) {
322                         if (m->control_pressed) { break; }
323                         
324                         string bin = list->get(i); 
325                         
326                         int size = getNumNames(bin);
327                         
328                         if (size <= cutoff) {  outListRare << bin << '\t';  }
329                         else                            {  outListAbund << bin << '\t'; }
330                         
331                         if ((groupfile != "") || (accnos)) { //you need to parse the bin...
332                                 vector<string> names;
333                                 splitAtComma(bin, names);  //parses bin into individual sequence names
334                                 
335                                 //parse bin into list of sequences in each group
336                                 for (int j = 0; j < names.size(); j++) {
337                                         
338                                         //write to accnos file
339                                         if (accnos) {
340                                                 if (size <= cutoff) {  outAccnosRare << names[j] << endl;  }
341                                                 else                            {  outAccnosAbund << names[j] << endl; }
342                                         }
343                                         
344                                         //write to groupfile
345                                         if (groupfile != "") {
346                                                 string group = groupMap->getGroup(names[j]);
347                                         
348                                                 if (group == "not found") {  //error in groupfile so close and remove output file and disregard groupfile
349                                                         m->mothurOut(names[j] + " is not in your groupfile. disregarding groupfile."); m->mothurOutEndLine(); 
350                                                         delete groupMap; 
351                                                         if (numAbundBins > 0) { 
352                                                                 outGroupAbund.close();
353                                                                 remove((outputDir + getRootName(getSimpleName(groupfile)) + thisList->getLabel() + ".abund.group").c_str()); 
354                                                         }
355                                                         if (numRareBins > 0) { 
356                                                                 outGroupRare.close();
357                                                                 remove((outputDir + getRootName(getSimpleName(groupfile)) + thisList->getLabel() + ".rare.group").c_str());  
358                                                         }
359                                                         groupfile = "";
360                                                 }else {
361                                                         if (size <= cutoff) {  outGroupRare << names[j] << '\t' << group << endl;  }
362                                                         else                            {  outGroupAbund << names[j] << '\t' << group << endl; }
363                                                 }
364                                         }
365                                         
366                                 }//end for names
367                         }//end if parse
368                 }//end for list
369                 
370                 
371                 //close files
372                 if (numRareBins > 0) {  
373                         outListRare << endl;
374                         outListRare.close();
375                         if (accnos) {   outAccnosRare.close();  }
376                         if (groupfile != "") {  outGroupRare.close();   }
377                 }
378                 
379                 if (numAbundBins > 0) { 
380                         outListAbund << endl;
381                         outListAbund.close();
382                         if (accnos) {   outAccnosAbund.close(); }
383                         if (groupfile != "") {  outGroupAbund.close();  }
384                 }
385
386                 return 0;
387
388         }
389         catch(exception& e) {
390                 m->errorOut(e, "SplitAbundCommand", "split");
391                 exit(1);
392         }
393 }
394 /**********************************************************************************************************************/
395 int SplitAbundCommand::split() { //namefile
396         try {
397                 //setup output files
398                 ofstream outNameAbund;
399                 ofstream outNameRare;
400                 ofstream outGroupRare;
401                 ofstream outGroupAbund;
402                 ofstream outAccnosRare;
403                 ofstream outAccnosAbund;
404                 
405                 bool wroteNameAbund = false;
406                 bool wroteNameRare = false;
407                 bool wroteGroupRare = false;
408                 bool wroteGroupAbund = false;
409                 bool wroteAccnosRare = false;
410                 bool wroteAccnosAbund = false;
411                 
412                 //prepare output files
413                 string fileroot = outputDir + getRootName(getSimpleName(namefile));
414                         
415                 string nameRareName = fileroot + "rare.names";
416                 openOutputFile(nameRareName, outNameRare);
417                 string nameAbundName = fileroot + "abund.names";
418                 openOutputFile(nameAbundName, outNameAbund);
419                         
420                 if (accnos) {
421                         string accnosName = fileroot + "rare.accnos";
422                         openOutputFile(accnosName, outAccnosRare);
423                         
424                         accnosName = fileroot + "abund.accnos";
425                         openOutputFile(accnosName, outAccnosAbund);
426                 }
427                         
428                 if (groupfile != "") {
429                         string groupFileName = outputDir + getRootName(getSimpleName(groupfile))  + ".rare.group";
430                         openOutputFile(groupFileName, outGroupRare);
431                         
432                         groupFileName = outputDir + getRootName(getSimpleName(groupfile))  + ".abund.group";
433                         openOutputFile(groupFileName, outGroupAbund);
434                 }
435                 
436                 
437                 //open input file
438                 ifstream in;
439                 openInputFile(namefile, in);
440                 
441                 while (!in.eof()) {
442                         if (m->control_pressed) { break; }
443                         
444                         string firstCol, secondCol;
445                         in >> firstCol >> secondCol; gobble(in);
446                         
447                         int size = getNumNames(secondCol);
448                                 
449                         if (size <= cutoff) {  outNameRare << firstCol << '\t' << secondCol << endl;  wroteNameRare = true;  }
450                         else                            {  outNameAbund << firstCol << '\t' << secondCol << endl; wroteNameAbund = true; }
451
452                         
453                         if ((groupfile != "") || (accnos)) { //you need to parse the bin...
454                                 vector<string> names;
455                                 splitAtComma(secondCol, names);  //parses bin into individual sequence names
456                                 
457                                 //parse bin into list of sequences in each group
458                                 for (int j = 0; j < names.size(); j++) {
459                                         
460                                         //write to accnos file
461                                         if (accnos) {
462                                                 if (size <= cutoff) {  outAccnosRare << names[j] << endl;  wroteAccnosRare = true; }
463                                                 else                            {  outAccnosAbund << names[j] << endl; wroteAccnosAbund = true; }
464                                         }
465                                         
466                                         //write to groupfile
467                                         if (groupfile != "") {
468                                                 string group = groupMap->getGroup(names[j]);
469                                         
470                                                 if (group == "not found") {  //error in groupfile so close and remove output file and disregard groupfile
471                                                         m->mothurOut(names[j] + " is not in your groupfile. disregarding groupfile."); m->mothurOutEndLine(); 
472                                                         delete groupMap; 
473                                                 
474                                                         outGroupAbund.close();
475                                                         remove((outputDir + getRootName(getSimpleName(groupfile))  + ".abund.group").c_str()); 
476                                                         outGroupRare.close();
477                                                         remove((outputDir + getRootName(getSimpleName(groupfile)) + ".rare.group").c_str());  
478                                                         
479                                                         groupfile = "";
480                                                         wroteGroupRare = false;
481                                                         wroteGroupAbund = false;
482                                                 }else {
483                                                         if (size <= cutoff) {  outGroupRare << names[j] << '\t' << group << endl;  wroteGroupRare = true; }
484                                                         else                            {  outGroupAbund << names[j] << '\t' << group << endl; wroteGroupAbund = true; }
485                                                 }
486                                         }
487                                         
488                                 }//end for names
489                         }//end if parse
490                 }//end while
491                 
492                 
493                 //close files
494                 in.close();
495                 outNameRare.close();
496                 outNameAbund.close();
497                 if (!wroteNameRare) { remove((fileroot + "rare.names").c_str());  }
498                 else { outputNames.push_back((fileroot + "rare.names"));  }
499                 if (!wroteNameAbund) { remove((fileroot + "abund.names").c_str());  }
500                 else { outputNames.push_back((fileroot + "abund.names"));  }
501                 
502                 if (groupfile != "") {  
503                         outGroupRare.close();    outGroupAbund.close();
504                         if (!wroteGroupRare) { remove((outputDir + getRootName(getSimpleName(groupfile))  + ".rare.group").c_str());  }
505                         else { outputNames.push_back((outputDir + getRootName(getSimpleName(groupfile))  + ".rare.group"));  }
506                         if (!wroteGroupAbund) { remove((outputDir + getRootName(getSimpleName(groupfile))  + ".abund.group").c_str());  }
507                         else { outputNames.push_back((outputDir + getRootName(getSimpleName(groupfile))  + ".abund.group"));  }
508                 }
509         
510                 if (accnos) {   
511                         outAccnosAbund.close(); outAccnosRare.close();
512                         if (!wroteAccnosRare) { remove((fileroot + "rare.accnos").c_str());  }
513                         else { outputNames.push_back((fileroot + "rare.accnos"));  }
514                         if (!wroteAccnosAbund) { remove((fileroot + "abund.accnos").c_str());  }
515                         else { outputNames.push_back((fileroot + "abund.accnos"));  }
516                 }
517                                                 
518                 return 0;
519
520         }
521         catch(exception& e) {
522                 m->errorOut(e, "SplitAbundCommand", "split");
523                 exit(1);
524         }
525 }
526
527 /**********************************************************************************************************************/
528
529