]> git.donarmstrong.com Git - mothur.git/blob - subsamplecommand.cpp
parsimony calculator changed order of groups to reflect change in shared Utils made...
[mothur.git] / subsamplecommand.cpp
1 /*
2  *  subsamplecommand.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 10/27/10.
6  *  Copyright 2010 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "subsamplecommand.h"
11 #include "sharedutilities.h"
12
13 //**********************************************************************************************************************
14 vector<string> SubSampleCommand::getValidParameters(){  
15         try {
16                 string Array[] =  {"fasta", "group", "list","shared","rabund", "name","sabund","size","groups","label","outputdir","inputdir"};
17                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
18                 return myArray;
19         }
20         catch(exception& e) {
21                 m->errorOut(e, "SubSampleCommand", "getValidParameters");
22                 exit(1);
23         }
24 }
25 //**********************************************************************************************************************
26 SubSampleCommand::SubSampleCommand(){   
27         try {
28                 abort = true;
29                 //initialize outputTypes
30                 vector<string> tempOutNames;
31                 outputTypes["shared"] = tempOutNames;
32                 outputTypes["list"] = tempOutNames;
33                 outputTypes["rabund"] = tempOutNames;
34                 outputTypes["sabund"] = tempOutNames;
35                 outputTypes["fasta"] = tempOutNames;
36                 outputTypes["name"] = tempOutNames;
37                 outputTypes["group"] = tempOutNames;
38         }
39         catch(exception& e) {
40                 m->errorOut(e, "SubSampleCommand", "GetRelAbundCommand");
41                 exit(1);
42         }
43 }
44 //**********************************************************************************************************************
45 vector<string> SubSampleCommand::getRequiredParameters(){       
46         try {
47                 string Array[] =  {"fasta","list","shared","rabund", "sabund","or"};
48                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
49                 return myArray;
50         }
51         catch(exception& e) {
52                 m->errorOut(e, "SubSampleCommand", "getRequiredParameters");
53                 exit(1);
54         }
55 }
56 //**********************************************************************************************************************
57 vector<string> SubSampleCommand::getRequiredFiles(){    
58         try {
59                 vector<string> myArray;
60                 return myArray;
61         }
62         catch(exception& e) {
63                 m->errorOut(e, "SubSampleCommand", "getRequiredFiles");
64                 exit(1);
65         }
66 }
67 //**********************************************************************************************************************
68 SubSampleCommand::SubSampleCommand(string option) {
69         try {
70                 globaldata = GlobalData::getInstance();
71                 abort = false;
72                 allLines = 1;
73                 labels.clear();
74                 
75                 //allow user to run help
76                 if(option == "help") { help(); abort = true; }
77                 
78                 else {
79                         //valid paramters for this command
80                         string Array[] =  {"fasta", "group", "list","shared","rabund", "sabund","name","size","groups","label","outputdir","inputdir"};
81                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
82                         
83                         OptionParser parser(option);
84                         map<string,string> parameters = parser.getParameters();
85                         
86                         ValidParameters validParameter;
87                         
88                         //check to make sure all parameters are valid for command
89                         map<string,string>::iterator it;
90                         for (it = parameters.begin(); it != parameters.end(); it++) { 
91                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
92                         }
93                         
94                         //initialize outputTypes
95                         vector<string> tempOutNames;
96                         outputTypes["shared"] = tempOutNames;
97                         outputTypes["list"] = tempOutNames;
98                         outputTypes["rabund"] = tempOutNames;
99                         outputTypes["sabund"] = tempOutNames;
100                         outputTypes["fasta"] = tempOutNames;
101                         outputTypes["name"] = tempOutNames;
102                         outputTypes["group"] = tempOutNames;
103                                         
104                         //if the user changes the output directory command factory will send this info to us in the output parameter 
105                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = ""; }
106                         
107                         //if the user changes the input directory command factory will send this info to us in the output parameter 
108                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
109                         if (inputDir == "not found"){   inputDir = "";          }
110                         else {
111                                 string path;
112                                 it = parameters.find("list");
113                                 //user has given a template file
114                                 if(it != parameters.end()){ 
115                                         path = m->hasPath(it->second);
116                                         //if the user has not given a path then, add inputdir. else leave path alone.
117                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
118                                 }
119                                 
120                                 it = parameters.find("fasta");
121                                 //user has given a template file
122                                 if(it != parameters.end()){ 
123                                         path = m->hasPath(it->second);
124                                         //if the user has not given a path then, add inputdir. else leave path alone.
125                                         if (path == "") {       parameters["fasta"] = inputDir + it->second;            }
126                                 }
127                                 
128                                 it = parameters.find("shared");
129                                 //user has given a template file
130                                 if(it != parameters.end()){ 
131                                         path = m->hasPath(it->second);
132                                         //if the user has not given a path then, add inputdir. else leave path alone.
133                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
134                                 }
135                                 
136                                 it = parameters.find("group");
137                                 //user has given a template file
138                                 if(it != parameters.end()){ 
139                                         path = m->hasPath(it->second);
140                                         //if the user has not given a path then, add inputdir. else leave path alone.
141                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
142                                 }
143                                 
144                                 it = parameters.find("sabund");
145                                 //user has given a template file
146                                 if(it != parameters.end()){ 
147                                         path = m->hasPath(it->second);
148                                         //if the user has not given a path then, add inputdir. else leave path alone.
149                                         if (path == "") {       parameters["sabund"] = inputDir + it->second;           }
150                                 }
151                                 
152                                 it = parameters.find("rabund");
153                                 //user has given a template file
154                                 if(it != parameters.end()){ 
155                                         path = m->hasPath(it->second);
156                                         //if the user has not given a path then, add inputdir. else leave path alone.
157                                         if (path == "") {       parameters["rabund"] = inputDir + it->second;           }
158                                 }
159                                 
160                                 it = parameters.find("name");
161                                 //user has given a template file
162                                 if(it != parameters.end()){ 
163                                         path = m->hasPath(it->second);
164                                         //if the user has not given a path then, add inputdir. else leave path alone.
165                                         if (path == "") {       parameters["name"] = inputDir + it->second;             }
166                                 }
167                         }
168                         
169                         //check for required parameters
170                         listfile = validParameter.validFile(parameters, "list", true);
171                         if (listfile == "not open") { listfile = ""; abort = true; }
172                         else if (listfile == "not found") { listfile = ""; }    
173                         
174                         sabundfile = validParameter.validFile(parameters, "sabund", true);
175                         if (sabundfile == "not open") { sabundfile = ""; abort = true; }        
176                         else if (sabundfile == "not found") { sabundfile = ""; }
177                         
178                         rabundfile = validParameter.validFile(parameters, "rabund", true);
179                         if (rabundfile == "not open") { rabundfile = ""; abort = true; }        
180                         else if (rabundfile == "not found") { rabundfile = ""; }
181                         
182                         fastafile = validParameter.validFile(parameters, "fasta", true);
183                         if (fastafile == "not open") { fastafile = ""; abort = true; }  
184                         else if (fastafile == "not found") { fastafile = ""; }
185                         
186                         sharedfile = validParameter.validFile(parameters, "shared", true);
187                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
188                         else if (sharedfile == "not found") { sharedfile = ""; }
189                         
190                         namefile = validParameter.validFile(parameters, "name", true);
191                         if (namefile == "not open") { namefile = ""; abort = true; }    
192                         else if (namefile == "not found") { namefile = ""; }
193                         
194                         groupfile = validParameter.validFile(parameters, "group", true);
195                         if (groupfile == "not open") { groupfile = ""; abort = true; }  
196                         else if (groupfile == "not found") { groupfile = ""; }
197                         
198                         
199                         //check for optional parameter and set defaults
200                         // ...at some point should added some additional type checking...
201                         label = validParameter.validFile(parameters, "label", false);                   
202                         if (label == "not found") { label = ""; }
203                         else { 
204                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
205                                 else { allLines = 1;  }
206                         }
207                         
208                         groups = validParameter.validFile(parameters, "groups", false);                 
209                         if (groups == "not found") { groups = ""; pickedGroups = false; }
210                         else { 
211                                 pickedGroups = true;
212                                 m->splitAtDash(groups, Groups);
213                                 globaldata->Groups = Groups;
214                         }
215                         
216                         string temp = validParameter.validFile(parameters, "size", false);              if (temp == "not found"){       temp = "0";             }
217                         convert(temp, size);  
218                         
219                         if ((namefile != "") && (fastafile == "")) { m->mothurOut("You may only use a namefile with a fastafile."); m->mothurOutEndLine(); abort = true; }
220                         
221                         if ((fastafile == "") && (listfile == "") && (sabundfile == "") && (rabundfile == "") && (sharedfile == "")) {
222                                 m->mothurOut("You must provide a fasta, list, sabund, rabund or shared file as an input file."); m->mothurOutEndLine(); abort = true; }
223                         
224                         if (pickedGroups && ((groupfile == "") && (sharedfile == ""))) { 
225                                 m->mothurOut("You cannot pick groups without a valid group file or shared file."); m->mothurOutEndLine(); abort = true; }
226                         
227                         if ((groupfile != "") && ((fastafile == "") && (listfile == ""))) { 
228                                 m->mothurOut("Group file only valid with listfile or fastafile."); m->mothurOutEndLine(); abort = true; }
229                         
230                         if ((groupfile != "") && ((fastafile != "") && (listfile != ""))) { 
231                                 m->mothurOut("A new group file can only be made from the subsample of a listfile or fastafile, not both. Please correct."); m->mothurOutEndLine(); abort = true; }
232                         
233                 }
234
235         }
236         catch(exception& e) {
237                 m->errorOut(e, "SubSampleCommand", "SubSampleCommand");
238                 exit(1);
239         }
240 }
241
242 //**********************************************************************************************************************
243
244 void SubSampleCommand::help(){
245         try {
246                 m->mothurOut("The sub.sample command is designed to be used as a way to normalize your data, or create a smaller set from your original set.\n");
247                 m->mothurOut("The sub.sample command parameters are fasta, name, list, group, rabund, sabund, shared, groups, size and label.  You must provide a fasta, list, sabund, rabund or shared file as an input file.\n");
248                 m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n");
249                 m->mothurOut("The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n");
250                 m->mothurOut("The size parameter allows you indicate the size of your subsample.\n");
251                 m->mothurOut("The size parameter is not set: with shared file size=number of seqs in smallest sample, with all other files, 10% of number of seqs.\n");
252                 m->mothurOut("The sub.sample command should be in the following format: sub.sample(list=yourListFile, group=yourGroupFile, groups=yourGroups, label=yourLabels).\n");
253                 m->mothurOut("Example sub.sample(list=abrecovery.fn.list, group=abrecovery.groups, groups=B-C, size=20).\n");
254                 m->mothurOut("The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n");
255                 m->mothurOut("The sub.sample command outputs a .subsample file.\n");
256                 m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n");
257
258         }
259         catch(exception& e) {
260                 m->errorOut(e, "SubSampleCommand", "help");
261                 exit(1);
262         }
263 }
264
265 //**********************************************************************************************************************
266
267 SubSampleCommand::~SubSampleCommand(){}
268
269 //**********************************************************************************************************************
270
271 int SubSampleCommand::execute(){
272         try {
273         
274                 if (abort == true) { return 0; }
275                 
276                 if (sharedfile != "")   {   getSubSampleShared();       }
277                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); return 0; } }
278                 if (listfile != "")             {   getSubSampleList();         }
279                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); return 0; } }
280                 //if (rabund != "")             {   getSubSampleRabund();       }
281                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); return 0; } }
282                 //if (sabundfile != "") {   getSubSampleSabund();       }
283                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); return 0; } }
284                 //if (fastafile != "")  {   getSubSampleFasta();        }
285                 if (m->control_pressed) {  for (int i = 0; i < outputNames.size(); i++) {       remove(outputNames[i].c_str()); return 0; } }
286                         
287                                 
288                 m->mothurOutEndLine();
289                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
290                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
291                 m->mothurOutEndLine();
292                 
293                 return 0;
294         }
295         catch(exception& e) {
296                 m->errorOut(e, "SubSampleCommand", "execute");
297                 exit(1);
298         }
299 }
300 //**********************************************************************************************************************
301 int SubSampleCommand::getSubSampleShared() {
302         try {
303                 
304                 string thisOutputDir = outputDir;
305                 if (outputDir == "") {  thisOutputDir += m->hasPath(sharedfile);  }
306                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(sharedfile)) + "subsample" + m->getExtension(sharedfile);
307                 
308                 ofstream out;
309                 m->openOutputFile(outputFileName, out);
310                 outputTypes["shared"].push_back(outputFileName);  outputNames.push_back(outputFileName);
311                 
312                 InputData* input = new InputData(sharedfile, "sharedfile");
313                 vector<SharedRAbundVector*> lookup = input->getSharedRAbundVectors();
314                 string lastLabel = lookup[0]->getLabel();
315                 
316                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
317                 set<string> processedLabels;
318                 set<string> userLabels = labels;
319                 
320                 
321                 //as long as you are not at the end of the file or done wih the lines you want
322                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
323                         if (m->control_pressed) {  delete input; out.close(); return 0;  }
324                         
325                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
326                                 
327                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
328                                 
329                                 processShared(lookup, out);
330                                 
331                                 processedLabels.insert(lookup[0]->getLabel());
332                                 userLabels.erase(lookup[0]->getLabel());
333                         }
334                         
335                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
336                                 string saveLabel = lookup[0]->getLabel();
337                                 
338                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
339                                 
340                                 lookup = input->getSharedRAbundVectors(lastLabel);
341                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
342                                 
343                                 processShared(lookup, out);
344                                 
345                                 processedLabels.insert(lookup[0]->getLabel());
346                                 userLabels.erase(lookup[0]->getLabel());
347                                 
348                                 //restore real lastlabel to save below
349                                 lookup[0]->setLabel(saveLabel);
350                         }
351                         
352                         lastLabel = lookup[0]->getLabel();
353                         //prevent memory leak
354                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
355                         
356                         //get next line to process
357                         lookup = input->getSharedRAbundVectors();                               
358                 }
359                 
360                 
361                 if (m->control_pressed) {  out.close(); return 0;  }
362                 
363                 //output error messages about any remaining user labels
364                 set<string>::iterator it;
365                 bool needToRun = false;
366                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
367                         m->mothurOut("Your file does not include the label " + *it); 
368                         if (processedLabels.count(lastLabel) != 1) {
369                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
370                                 needToRun = true;
371                         }else {
372                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
373                         }
374                 }
375                 
376                 //run last label if you need to
377                 if (needToRun == true)  {
378                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
379                         lookup = input->getSharedRAbundVectors(lastLabel);
380                         
381                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
382                         
383                         processShared(lookup, out);
384                         
385                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
386                 }
387                 
388                 delete input;
389                 out.close();  
390                 
391                 return 0;
392                 
393         }
394         catch(exception& e) {
395                 m->errorOut(e, "SubSampleCommand", "getSubSampleShared");
396                 exit(1);
397         }
398 }
399 //**********************************************************************************************************************
400 int SubSampleCommand::processShared(vector<SharedRAbundVector*>& thislookup, ofstream& out) {
401         try {
402                 
403                 //if (pickedGroups) { eliminateZeroOTUS(thislookup); }
404                 
405                 if (size == 0) { //user has not set size, set size = smallest samples size
406                         size = thislookup[0]->getNumSeqs();
407                         for (int i = 1; i < thislookup.size(); i++) {
408                                 int thisSize = thislookup[i]->getNumSeqs();
409                                 
410                                 if (thisSize < size) {  size = thisSize;        }
411                         }
412                 }
413                 
414                 int numBins = thislookup[0]->getNumBins();
415                 for (int i = 0; i < thislookup.size(); i++) {           
416                         int thisSize = thislookup[i]->getNumSeqs();
417                         
418                         if (thisSize != size) {
419                                 
420                                 string thisgroup = thislookup[i]->getGroup();
421                                 
422                                 OrderVector* order = new OrderVector();
423                                 for(int p=0;p<numBins;p++){
424                                         for(int j=0;j<thislookup[i]->getAbundance(p);j++){
425                                                 order->push_back(p);
426                                         }
427                                 }
428                                 random_shuffle(order->begin(), order->end());
429                                 
430                                 SharedRAbundVector* temp = new SharedRAbundVector(numBins);
431                                 temp->setLabel(thislookup[i]->getLabel());
432                                 temp->setGroup(thislookup[i]->getGroup());
433                                 
434                                 delete thislookup[i];
435                                 thislookup[i] = temp;
436                                 
437                                 
438                                 for (int j = 0; j < size; j++) {
439                                         
440                                         if (m->control_pressed) { delete order; return 0; }
441                                         
442                                         //get random number to sample from order between 0 and thisSize-1.
443                                         int myrand = (int)((float)(rand()) / (RAND_MAX / (thisSize-1) + 1));
444                                         
445                                         int bin = order->get(myrand);
446                                         
447                                         int abund = thislookup[i]->getAbundance(bin);
448                                         thislookup[i]->set(bin, (abund+1), thisgroup);
449                                 }       
450                                 delete order;
451                         }
452                 }
453                 
454                 //subsampling may have created some otus with no sequences in them
455                 eliminateZeroOTUS(thislookup);
456                 
457                 if (m->control_pressed) { return 0; }
458                 
459                 for (int i = 0; i < thislookup.size(); i++) {
460                         out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t';
461                         thislookup[i]->print(out);
462                 }
463                 
464                 return 0;
465                 
466         }
467         catch(exception& e) {
468                 m->errorOut(e, "SubSampleCommand", "processShared");
469                 exit(1);
470         }
471 }                       
472 //**********************************************************************************************************************
473 int SubSampleCommand::getSubSampleList() {
474         try {
475                 
476                 string thisOutputDir = outputDir;
477                 if (outputDir == "") {  thisOutputDir += m->hasPath(listfile);  }
478                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(listfile)) + "subsample" + m->getExtension(listfile);
479                 
480                 ofstream out;
481                 m->openOutputFile(outputFileName, out);
482                 outputTypes["list"].push_back(outputFileName);  outputNames.push_back(outputFileName);
483                 
484                 InputData* input;
485                 //if you have a groupfile you want to read a shared list
486                 if (groupfile != "") {
487                         
488                         GroupMap* groupMap = new GroupMap(groupfile);
489                         groupMap->readMap();
490                         
491                         //takes care of user setting groupNames that are invalid or setting groups=all
492                         SharedUtil* util = new SharedUtil();
493                         util->setGroups(Groups, groupMap->namesOfGroups);
494                         delete util;
495                         
496                         //create outputfiles
497                         string groupOutputDir = outputDir;
498                         if (outputDir == "") {  groupOutputDir += m->hasPath(groupfile);  }
499                         string groupOutputFileName = groupOutputDir + m->getRootName(m->getSimpleName(groupfile)) + "subsample" + m->getExtension(groupfile);
500                         
501                         ofstream outGroup;
502                         m->openOutputFile(groupOutputFileName, outGroup);
503                         outputTypes["group"].push_back(groupOutputFileName);  outputNames.push_back(groupOutputFileName);
504                         
505                         globaldata->setGroupFile(groupfile); //shared list needs this
506                         
507                         input = new InputData(listfile, "list");
508                         ListVector* list = input->getListVector();
509                         string lastLabel = list->getLabel();
510                         
511                         //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
512                         set<string> processedLabels;
513                         set<string> userLabels = labels;
514                         
515                         //file mismatch quit
516                         if (list->getNumSeqs() != groupMap->getNumSeqs()) { 
517                                 m->mothurOut("[ERROR]: your list file contains " + toString(list->getNumSeqs()) + " sequences, and your groupfile contains " + toString(groupMap->getNumSeqs()) + ", please correct."); 
518                                 m->mothurOutEndLine();
519                                 delete groupMap;
520                                 delete list;
521                                 delete input;
522                                 out.close();
523                                 outGroup.close();
524                                 return 0;
525                         }
526                         
527                         //as long as you are not at the end of the file or done wih the lines you want
528                         while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
529                                 
530                                 if (m->control_pressed) {  delete list; delete input; delete groupMap; out.close(); outGroup.close(); return 0;  }
531                                 
532                                 if(allLines == 1 || labels.count(list->getLabel()) == 1){                       
533                                         
534                                         m->mothurOut(list->getLabel()); m->mothurOutEndLine();
535                                         
536                                         processListGroup(list, groupMap, out, outGroup);
537                                         
538                                         processedLabels.insert(list->getLabel());
539                                         userLabels.erase(list->getLabel());
540                                 }
541                                 
542                                 if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
543                                         string saveLabel = list->getLabel();
544                                         
545                                         delete list; 
546                                         
547                                         list = input->getListVector(lastLabel);
548                                         m->mothurOut(list->getLabel()); m->mothurOutEndLine();
549                                         
550                                         processListGroup(list, groupMap, out, outGroup);
551                                         
552                                         processedLabels.insert(list->getLabel());
553                                         userLabels.erase(list->getLabel());
554                                         
555                                         //restore real lastlabel to save below
556                                         list->setLabel(saveLabel);
557                                 }
558                                 
559                                 lastLabel = list->getLabel();
560                                 
561                                 delete list; list = NULL;
562                                 
563                                 //get next line to process
564                                 list = input->getListVector();                          
565                         }
566                         
567                         
568                         if (m->control_pressed) {  if (list != NULL) { delete list; } delete input; delete groupMap; out.close(); outGroup.close(); return 0;  }
569                         
570                         //output error messages about any remaining user labels
571                         set<string>::iterator it;
572                         bool needToRun = false;
573                         for (it = userLabels.begin(); it != userLabels.end(); it++) {  
574                                 m->mothurOut("Your file does not include the label " + *it); 
575                                 if (processedLabels.count(lastLabel) != 1) {
576                                         m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
577                                         needToRun = true;
578                                 }else {
579                                         m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
580                                 }
581                         }
582                         
583                         //run last label if you need to
584                         if (needToRun == true)  {
585                                 if (list != NULL) { delete list; }
586                                 
587                                 list = input->getListVector(lastLabel);
588                                 
589                                 m->mothurOut(list->getLabel()); m->mothurOutEndLine();
590                                 
591                                 processListGroup(list, groupMap, out, outGroup);
592                                 
593                                 delete list; list = NULL;
594                         }
595                         
596                         out.close();  outGroup.close();
597                         if (list != NULL) { delete list; }
598                         delete groupMap;
599                         
600                 }else {
601                         //need to complete
602                 }
603                 
604                 
605                 delete input;
606                                                 
607                 return 0;
608  
609         }
610         catch(exception& e) {
611                 m->errorOut(e, "SubSampleCommand", "getSubSampleList");
612                 exit(1);
613         }
614 }
615 //**********************************************************************************************************************
616 int SubSampleCommand::processListGroup(ListVector*& list, GroupMap*& groupMap, ofstream& out, ofstream& outGroup) {
617         try {
618                                 
619                 if (size == 0) { //user has not set size, set size = 10% samples size
620                         size = int (list->getNumSeqs() * 0.10);
621                 }
622                 
623                 int numBins = list->getNumBins();
624                 int thisSize = list->getNumSeqs();
625                 
626                 if (size > thisSize) { m->mothurOut("Your list file only contains " + toString(thisSize) + " sequences. Setting size to " + toString(thisSize) + "."); m->mothurOutEndLine();
627                         size = thisSize;
628                 }
629                 
630                 vector<nameToBin> seqs;
631                 for (int i = 0; i < numBins; i++) {
632                         string names = list->get(i);
633                         
634                         //parse names
635                         string individual = "";
636                         int length = names.length();
637                         for(int j=0;j<length;j++){
638                                 if(names[j] == ','){
639                                         nameToBin temp(individual, i);
640                                         seqs.push_back(temp);
641                                         individual = "";                                
642                                 }
643                                 else{
644                                         individual += names[j];
645                                 }
646                         }
647                         nameToBin temp(individual, i);
648                         seqs.push_back(temp);
649                 }
650                 
651                                         
652                 ListVector* temp = new ListVector(numBins);
653                 temp->setLabel(list->getLabel());
654                 
655                 delete list; 
656                 list = temp;
657                 
658                 set<int> alreadySelected; //dont want repeat sequence names added
659                 alreadySelected.insert(-1);
660                 for (int j = 0; j < size; j++) {
661                         
662                         if (m->control_pressed) { return 0; }
663                         
664                         //get random sequence to add, making sure we have not already added it
665                         int myrand = -1;
666                         while (alreadySelected.count(myrand) != 0) {
667                                 myrand = (int)((float)(rand()) / (RAND_MAX / (thisSize-1) + 1));
668                         }
669                         alreadySelected.insert(myrand);
670                         
671                         //update new list
672                         string oldNames = temp->get(seqs[myrand].bin);
673                         if (oldNames == "") { oldNames += seqs[myrand].name; }
674                         else { oldNames += "," + seqs[myrand].name; }
675                         
676                         temp->set(seqs[myrand].bin, oldNames);
677                         
678                         //update group file
679                         string group = groupMap->getGroup(seqs[myrand].name);
680                         if (group == "not found") { m->mothurOut("[ERROR]: " + seqs[myrand].name + " is not in your groupfile. please correct."); m->mothurOutEndLine(); group = "NOTFOUND"; }
681                         
682                         outGroup << seqs[myrand].name << '\t' << group << endl;
683                 }       
684
685                 if (m->control_pressed) { return 0; }
686                 
687                 list->print(out);
688                 
689                 return 0;
690                 
691         }
692         catch(exception& e) {
693                 m->errorOut(e, "SubSampleCommand", "processListGroup");
694                 exit(1);
695         }
696 }
697 //**********************************************************************************************************************
698 int SubSampleCommand::eliminateZeroOTUS(vector<SharedRAbundVector*>& thislookup) {
699         try {
700                 
701                 vector<SharedRAbundVector*> newLookup;
702                 for (int i = 0; i < thislookup.size(); i++) {
703                         SharedRAbundVector* temp = new SharedRAbundVector();
704                         temp->setLabel(thislookup[i]->getLabel());
705                         temp->setGroup(thislookup[i]->getGroup());
706                         newLookup.push_back(temp);
707                 }
708                 
709                 //for each bin
710                 for (int i = 0; i < thislookup[0]->getNumBins(); i++) {
711                         if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) {  delete newLookup[j];  } return 0; }
712                         
713                         //look at each sharedRabund and make sure they are not all zero
714                         bool allZero = true;
715                         for (int j = 0; j < thislookup.size(); j++) {
716                                 if (thislookup[j]->getAbundance(i) != 0) { allZero = false;  break;  }
717                         }
718                         
719                         //if they are not all zero add this bin
720                         if (!allZero) {
721                                 for (int j = 0; j < thislookup.size(); j++) {
722                                         newLookup[j]->push_back(thislookup[j]->getAbundance(i), thislookup[j]->getGroup());
723                                 }
724                         }
725                 }
726                 
727                 for (int j = 0; j < thislookup.size(); j++) {  delete thislookup[j];  }
728                 thislookup.clear();
729                 
730                 thislookup = newLookup;
731                 
732                 return 0;
733                 
734         }
735         catch(exception& e) {
736                 m->errorOut(e, "SubSampleCommand", "eliminateZeroOTUS");
737                 exit(1);
738         }
739 }
740
741 //**********************************************************************************************************************
742
743
744