]> git.donarmstrong.com Git - mothur.git/blob - createdatabasecommand.cpp
Merge remote-tracking branch 'mothur/master'
[mothur.git] / createdatabasecommand.cpp
1 //
2 //  createdatabasecommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 3/28/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "createdatabasecommand.h"
10 #include "inputdata.h"
11
12 //**********************************************************************************************************************
13 vector<string> CreateDatabaseCommand::setParameters(){  
14         try {
15                 CommandParameter pfasta("repfasta", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pfasta);
16                 CommandParameter pname("repname", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pname);
17                 CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pcontaxonomy);
18                 CommandParameter plist("list", "InputTypes", "", "", "ListShared", "ListShared", "none",false,false); parameters.push_back(plist);
19         CommandParameter pshared("shared", "InputTypes", "", "", "ListShared", "ListShared", "none",false,false); parameters.push_back(pshared);
20                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pgroup);
21                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
22                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
23                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
24                 
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "CreateDatabaseCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string CreateDatabaseCommand::getHelpString(){  
36         try {
37                 string helpString = "";
38                 helpString += "The create.database command reads a list file or a shared file, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, and creates a database file.\n";
39                 helpString += "The create.database command parameters are repfasta, list, shared, repname, contaxonomy, group and label. List, repfasta, repnames, and contaxonomy are required.\n";
40         helpString += "The repfasta file is fasta file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
41         helpString += "The repname file is the name file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
42         helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile).\n";
43         helpString += "The group file is optional and will just give you the abundance breakdown by group.\n";
44         helpString += "The label parameter allows you to specify a label to be used from your listfile.\n";
45         helpString += "NOTE: Make SURE the repfasta, repnames and contaxonomy are for the same label as the listfile.\n";
46         helpString += "The create.database command should be in the following format: \n";
47                 helpString += "create.database(repfasta=yourFastaFileFromGetOTURep, repname=yourNameFileFromGetOTURep, contaxonomy=yourConTaxFileFromClassifyOTU, list=yourListFile) \n";       
48                 helpString += "Example: create.database(repfasta=final.an.0.03.rep.fasta, name=final.an.0.03.rep.names, list=fina.an.list, label=0.03, contaxonomy=final.an.0.03.cons.taxonomy) \n";
49                 helpString += "Note: No spaces between parameter labels (i.e. repfasta), '=' and parameters (i.e.yourFastaFileFromGetOTURep).\n";       
50                 return helpString;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "CreateDatabaseCommand", "getHelpString");
54                 exit(1);
55         }
56 }
57 //**********************************************************************************************************************
58 string CreateDatabaseCommand::getOutputFileNameTag(string type, string inputName=""){   
59         try {
60         string outputFileName = "";
61                 map<string, vector<string> >::iterator it;
62         
63         //is this a type this command creates
64         it = outputTypes.find(type);
65         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
66         else {
67             if (type == "database") {  outputFileName =  "database"; }
68             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
69         }
70         return outputFileName;
71         }
72         catch(exception& e) {
73                 m->errorOut(e, "CreateDatabaseCommand", "getOutputFileNameTag");
74                 exit(1);
75         }
76 }
77
78 //**********************************************************************************************************************
79 CreateDatabaseCommand::CreateDatabaseCommand(){ 
80         try {
81                 abort = true; calledHelp = true; 
82                 setParameters();
83                 vector<string> tempOutNames;
84                 outputTypes["database"] = tempOutNames;
85         }
86         catch(exception& e) {
87                 m->errorOut(e, "CreateDatabaseCommand", "CreateDatabaseCommand");
88                 exit(1);
89         }
90 }
91
92 //**********************************************************************************************************************
93 CreateDatabaseCommand::CreateDatabaseCommand(string option)  {
94         try{
95                 abort = false; calledHelp = false;   
96         
97                 //allow user to run help
98                 if (option == "help") { 
99                         help(); abort = true; calledHelp = true;
100                 }else if(option == "citation") { citation(); abort = true; calledHelp = true;} 
101                 else {
102                         vector<string> myArray = setParameters();
103                         
104                         OptionParser parser(option);
105                         map<string, string> parameters = parser.getParameters();
106                         
107                         ValidParameters validParameter;
108                         map<string, string>::iterator it;
109             
110                         //check to make sure all parameters are valid for command
111                         for (it = parameters.begin(); it != parameters.end(); it++) { 
112                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
113                         }
114                         
115                         //initialize outputTypes
116                         vector<string> tempOutNames;
117                         outputTypes["database"] = tempOutNames;
118             
119                         //if the user changes the input directory command factory will send this info to us in the output parameter 
120                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
121                         if (inputDir == "not found"){   inputDir = "";          }
122                         else {
123                                 string path;
124                                 it = parameters.find("list");
125                                 //user has given a template file
126                                 if(it != parameters.end()){ 
127                                         path = m->hasPath(it->second);
128                                         //if the user has not given a path then, add inputdir. else leave path alone.
129                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
130                                 }
131                                 
132                                 it = parameters.find("repname");
133                                 //user has given a template file
134                                 if(it != parameters.end()){ 
135                                         path = m->hasPath(it->second);
136                                         //if the user has not given a path then, add inputdir. else leave path alone.
137                                         if (path == "") {       parameters["repname"] = inputDir + it->second;          }
138                                 }
139                                 
140                                 it = parameters.find("contaxonomy");
141                                 //user has given a template file
142                                 if(it != parameters.end()){ 
143                                         path = m->hasPath(it->second);
144                                         //if the user has not given a path then, add inputdir. else leave path alone.
145                                         if (path == "") {       parameters["contaxonomy"] = inputDir + it->second;              }
146                                 }
147                                 
148                                 it = parameters.find("repfasta");
149                                 //user has given a template file
150                                 if(it != parameters.end()){ 
151                                         path = m->hasPath(it->second);
152                                         //if the user has not given a path then, add inputdir. else leave path alone.
153                                         if (path == "") {       parameters["repfasta"] = inputDir + it->second;         }
154                                 }
155                                 
156                                 it = parameters.find("group");
157                                 //user has given a template file
158                                 if(it != parameters.end()){ 
159                                         path = m->hasPath(it->second);
160                                         //if the user has not given a path then, add inputdir. else leave path alone.
161                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
162                                 }
163                 
164                 it = parameters.find("shared");
165                                 //user has given a template file
166                                 if(it != parameters.end()){ 
167                                         path = m->hasPath(it->second);
168                                         //if the user has not given a path then, add inputdir. else leave path alone.
169                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
170                                 }
171                         }
172             
173                         
174                         //if the user changes the output directory command factory will send this info to us in the output parameter 
175                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
176                         
177                         //check for required parameters
178                         listfile = validParameter.validFile(parameters, "list", true);
179                         if (listfile == "not found") {  listfile = "";                  }
180                         else if (listfile == "not open") { listfile = ""; abort = true; }       
181                         else { m->setListFile(listfile); }
182             
183             sharedfile = validParameter.validFile(parameters, "shared", true);
184                         if (sharedfile == "not found") {        sharedfile = "";                        }
185                         else if (sharedfile == "not open") { sharedfile = ""; abort = true; }   
186                         else { m->setSharedFile(sharedfile); }
187             
188             if ((sharedfile == "") && (listfile == "")) { 
189                                 //is there are current file available for either of these?
190                                 //give priority to list, then shared
191                                 listfile = m->getListFile(); 
192                                 if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
193                                 else { 
194                                         sharedfile = m->getSharedFile(); 
195                                         if (sharedfile != "") {  m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
196                                         else { 
197                                                 m->mothurOut("No valid current files. You must provide a shared or list file before you can use the create.database command."); m->mothurOutEndLine(); 
198                                                 abort = true;
199                                         }
200                                 }
201                         }
202                         else if ((sharedfile != "") && (listfile != "")) { m->mothurOut("When executing a create.database command you must enter ONLY ONE of the following: shared or list."); m->mothurOutEndLine(); abort = true; }
203             
204             if (sharedfile != "") { if (outputDir == "") { outputDir = m->hasPath(sharedfile); } }
205             else { if (outputDir == "") { outputDir = m->hasPath(listfile); } }
206                         
207                         contaxonomyfile = validParameter.validFile(parameters, "contaxonomy", true);
208                         if (contaxonomyfile == "not found") {  //if there is a current list file, use it
209                contaxonomyfile = "";  m->mothurOut("The contaxonomy parameter is required, aborting."); m->mothurOutEndLine(); abort = true; 
210                         }
211                         else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; }
212
213             repfastafile = validParameter.validFile(parameters, "repfasta", true);
214                         if (repfastafile == "not found") {  //if there is a current list file, use it
215                 repfastafile = "";  m->mothurOut("The repfasta parameter is required, aborting."); m->mothurOutEndLine(); abort = true; 
216                         }
217                         else if (repfastafile == "not open") { repfastafile = ""; abort = true; }
218
219             repnamesfile = validParameter.validFile(parameters, "repname", true);
220                         if (repnamesfile == "not found") {  //if there is a current list file, use it
221                 repnamesfile = "";  m->mothurOut("The repnames parameter is required, aborting."); m->mothurOutEndLine(); abort = true; 
222                         }
223                         else if (repnamesfile == "not open") { repnamesfile = ""; abort = true; }
224
225                         groupfile = validParameter.validFile(parameters, "group", true);
226                         if (groupfile == "not open") { groupfile = ""; abort = true; }  
227                         else if (groupfile == "not found") { groupfile = ""; }
228                         else { m->setGroupFile(groupfile); }
229                         
230                         //check for optional parameter and set defaults
231                         // ...at some point should added some additional type checking...
232             label = validParameter.validFile(parameters, "label", false);                       
233                         if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your listfile.\n");}
234         }
235         }
236         catch(exception& e) {
237                 m->errorOut(e, "CreateDatabaseCommand", "CreateDatabaseCommand");
238                 exit(1);
239         }
240 }
241 //**********************************************************************************************************************
242 int CreateDatabaseCommand::execute(){
243         try {
244                 
245                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
246         
247         //taxonomies holds the taxonomy info for each Otu
248         //classifyOtuSizes holds the size info of each Otu to help with error checking
249         vector<string> taxonomies;
250         vector<string> otuLabels;
251         vector<int> classifyOtuSizes = readTax(taxonomies, otuLabels);
252         
253         if (m->control_pressed) { return 0; }
254         
255         vector<Sequence> seqs;
256         vector<int> repOtusSizes = readFasta(seqs);
257         
258         if (m->control_pressed) { return 0; }
259         
260         //names redundants to uniques. backwards to how we normally do it, but each bin is the list file will be a key entry in the map.
261         map<string, string> repNames;
262         int numUniqueNamesFile = m->readNames(repnamesfile, repNames, 1);
263         
264         //are there the same number of otus in the fasta and name files
265         if (repOtusSizes.size() != numUniqueNamesFile) { m->mothurOut("[ERROR]: you have " + toString(numUniqueNamesFile) + " unique seqs in your repname file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file.  These should match.\n"); m->control_pressed = true; }
266         
267         if (m->control_pressed) { return 0; }
268         
269         //are there the same number of OTUs in the tax and fasta file
270         if (classifyOtuSizes.size() != repOtusSizes.size()) { m->mothurOut("[ERROR]: you have " + toString(classifyOtuSizes.size()) + " taxonomies in your contaxonomy file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file.  These should match.\n"); m->control_pressed = true; }
271
272         if (m->control_pressed) { return 0; }
273         
274         //at this point we have the same number of OTUs. Are the sizes we have found so far accurate?
275         for (int i = 0; i < classifyOtuSizes.size(); i++) {
276             if (classifyOtuSizes[i] != repOtusSizes[i]) {
277                m->mothurOut("[ERROR]: OTU size info does not match for bin " + toString(i+1) + ". The contaxonomy file indicated the OTU represented " + toString(classifyOtuSizes[i]) + " sequences, but the repfasta file had " + toString(repOtusSizes[i]) + ".  These should match. Make sure you are using files for the same distance.\n"); m->control_pressed = true; 
278             }
279         }
280         
281         if (m->control_pressed) { return 0; }
282         
283         
284         string outputFileName = "";
285         if (listfile != "") { outputFileName = outputDir + m->getRootName(m->getSimpleName(listfile)) + getOutputFileNameTag("database"); }
286         else { outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + getOutputFileNameTag("database"); }
287         outputNames.push_back(outputFileName); outputTypes["database"].push_back(outputFileName);
288         
289         ofstream out;
290         m->openOutputFile(outputFileName, out);
291         
292         string header = "OTUNumber\tAbundance\t";
293
294         
295         if (listfile != "") {
296             //at this point we are fairly sure the repfasta, repnames and contaxonomy files match so lets proceed with the listfile
297             ListVector* list = getList();
298             
299             if (otuLabels.size() != list->getNumBins()) { 
300                 m->mothurOut("[ERROR]: you have " + toString(otuLabels.size()) + " otus in your contaxonomy file, but your list file has " + toString(list->getNumBins()) + " otus. These should match. Make sure you are using files for the same distance.\n"); m->control_pressed = true;  }
301             
302             if (m->control_pressed) { delete list; return 0; }
303             
304             GroupMap* groupmap = NULL;
305             if (groupfile != "") {
306                 groupmap = new GroupMap(groupfile);
307                 groupmap->readMap();
308             }
309             
310             if (m->control_pressed) { delete list; if (groupfile != "") { delete groupmap; } return 0; }
311             
312             if (groupfile != "") { 
313                 header = "OTUNumber\t";
314                 for (int i = 0; i < groupmap->getNamesOfGroups().size(); i++) { header += (groupmap->getNamesOfGroups())[i] + '\t'; }
315             }
316             header += "repSeqName\trepSeq\tOTUConTaxonomy";
317             out << header << endl;
318             
319             for (int i = 0; i < list->getNumBins(); i++) {
320                 
321                 if (m->control_pressed) { break; }
322                 
323                 out << otuLabels[i] << '\t';
324                 
325                 vector<string> binNames;
326                 string bin = list->get(i);
327                 
328                 map<string, string>::iterator it = repNames.find(bin);
329                 if (it == repNames.end()) {
330                     m->mothurOut("[ERROR: OTU " + otuLabels[i] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
331                 }
332                 
333                 m->splitAtComma(bin, binNames);
334                 
335                 //sanity check
336                 if (binNames.size() != classifyOtuSizes[i]) {
337                     m->mothurOut("[ERROR: OTU " + otuLabels[i] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
338                 }
339                 
340                 //output abundances
341                 if (groupfile != "") {
342                     string groupAbunds = "";
343                     map<string, int> counts;
344                     //initialize counts to 0
345                     for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { counts[(groupmap->getNamesOfGroups())[j]] = 0; }
346                     
347                     //find abundances by group
348                     bool error = false;
349                     for (int j = 0; j < binNames.size(); j++) {
350                         string group = groupmap->getGroup(binNames[j]);
351                         if (group == "not found") {
352                             m->mothurOut("[ERROR]: " + binNames[j] + " is not in your groupfile, please correct.\n");
353                             error = true;
354                         }else { counts[group]++; }
355                     }
356                     
357                     //output counts
358                     for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { out << counts[(groupmap->getNamesOfGroups())[j]] << '\t';  }
359                     
360                     if (error) { m->control_pressed = true; }
361                 }else { out << binNames.size() << '\t'; }
362                 
363                 //output repSeq
364                 out << it->second << '\t' << seqs[i].getAligned() << '\t' << taxonomies[i] << endl;
365             }
366             
367             
368             delete list;
369             if (groupfile != "") { delete groupmap; }
370            
371         }else {
372             vector<SharedRAbundVector*> lookup = getShared();
373             
374             header = "OTUNumber\t";
375             for (int i = 0; i < lookup.size(); i++) { header += lookup[i]->getGroup() + '\t'; }
376             header += "repSeqName\trepSeq\tOTUConTaxonomy";
377             out << header << endl;
378             
379             for (int h = 0; h < lookup[0]->getNumBins(); h++) {
380                 
381                 if (m->control_pressed) { break; }
382                 
383                 int index = findIndex(otuLabels, m->currentBinLabels[h]);
384                 if (index == -1) {  m->mothurOut("[ERROR]: " + m->currentBinLabels[h] + " is not in your constaxonomy file, aborting.\n"); m->control_pressed = true; }
385                 
386                 if (m->control_pressed) { break; }
387                 
388                 out << otuLabels[index] << '\t';
389                 
390                 int totalAbund = 0;
391                 for (int i = 0; i < lookup.size(); i++) { 
392                     int abund = lookup[i]->getAbundance(h);
393                     totalAbund += abund; 
394                     out << abund << '\t';
395                 }
396                 
397                 //sanity check
398                 if (totalAbund != classifyOtuSizes[index]) {
399                     m->mothurOut("[WARNING]: OTU " + m->currentBinLabels[h] + " contains " + toString(totalAbund) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); //m->control_pressed = true;   break;
400                 }
401                 
402                 //output repSeq
403                 out << seqs[index].getName() << '\t' << seqs[index].getAligned() << '\t' << taxonomies[index] << endl;
404             }
405         }
406         out.close();
407         if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
408         
409         m->mothurOutEndLine();
410                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
411                 m->mothurOut(outputFileName); m->mothurOutEndLine();    
412                 m->mothurOutEndLine();
413         
414         return 0;
415         
416     }
417         catch(exception& e) {
418                 m->errorOut(e, "CreateDatabaseCommand", "execute");
419                 exit(1);
420         }
421 }
422 //**********************************************************************************************************************
423 int CreateDatabaseCommand::findIndex(vector<string>& otuLabels, string label){
424         try {
425         int index = -1;
426         for (int i = 0; i < otuLabels.size(); i++) {
427             if (otuLabels[i] == label) { index = i; break; }
428         }
429                 return index;
430     }
431         catch(exception& e) {
432                 m->errorOut(e, "CreateDatabaseCommand", "findIndex");
433                 exit(1);
434         }
435 }
436 //**********************************************************************************************************************
437 vector<int> CreateDatabaseCommand::readTax(vector<string>& taxonomies, vector<string>& otuLabels){
438         try {
439                 
440         vector<int> sizes; 
441         
442         ifstream in;
443         m->openInputFile(contaxonomyfile, in);
444         
445         //read headers
446         m->getline(in);
447         
448         while (!in.eof()) {
449             
450             if (m->control_pressed) { break; }
451             
452             string otu = ""; string tax = "unknown";
453             int size = 0;
454             
455             in >> otu >> size >> tax; m->gobble(in);
456             
457             sizes.push_back(size);
458             taxonomies.push_back(tax);
459             otuLabels.push_back(otu);
460         }
461         in.close();
462         
463         return sizes;
464     }
465         catch(exception& e) {
466                 m->errorOut(e, "CreateDatabaseCommand", "readTax");
467                 exit(1);
468         }
469 }
470 //**********************************************************************************************************************
471 vector<int> CreateDatabaseCommand::readFasta(vector<Sequence>& seqs){
472         try {
473                 
474         vector<int> sizes; 
475         
476         ifstream in;
477         m->openInputFile(repfastafile, in);
478         
479         while (!in.eof()) {
480             
481             if (m->control_pressed) { break; }
482             
483             string binInfo;
484             Sequence seq(in, binInfo, true);  m->gobble(in);
485             
486             //the binInfo should look like - binNumber|size ie. 1|200 if it is binNumber|size|group then the user gave us the wrong repfasta file
487             vector<string> info;
488             m->splitAtChar(binInfo, info, '|');
489             if (info.size() != 2) { m->mothurOut("[ERROR]: your repfasta file is not the right format.  The create database command is designed to be used with the output from get.oturep.  When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n");  m->control_pressed = true; break;}
490             
491             int size = 0;
492             m->mothurConvert(info[1], size);
493             
494             sizes.push_back(size);
495             seqs.push_back(seq);
496         }
497         in.close();
498         
499         return sizes;
500     }
501         catch(exception& e) {
502                 m->errorOut(e, "CreateDatabaseCommand", "readFasta");
503                 exit(1);
504         }
505 }
506 //**********************************************************************************************************************
507 ListVector* CreateDatabaseCommand::getList(){
508         try {
509                 InputData* input = new InputData(listfile, "list");
510                 ListVector* list = input->getListVector();
511                 string lastLabel = list->getLabel();
512                 
513                 if (label == "") { label = lastLabel; delete input; return list; }
514                 
515                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
516                 set<string> labels; labels.insert(label);
517                 set<string> processedLabels;
518                 set<string> userLabels = labels;
519                 
520                 //as long as you are not at the end of the file or done wih the lines you want
521                 while((list != NULL) && (userLabels.size() != 0)) {
522                         if (m->control_pressed) {  delete input; return list;  }
523                         
524                         if(labels.count(list->getLabel()) == 1){
525                                 processedLabels.insert(list->getLabel());
526                                 userLabels.erase(list->getLabel());
527                                 break;
528                         }
529                         
530                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
531                                 string saveLabel = list->getLabel();
532                                 
533                                 delete list;
534                                 list = input->getListVector(lastLabel);
535                                 
536                                 processedLabels.insert(list->getLabel());
537                                 userLabels.erase(list->getLabel());
538                                 
539                                 //restore real lastlabel to save below
540                                 list->setLabel(saveLabel);
541                                 break;
542                         }
543                         
544                         lastLabel = list->getLabel();                   
545                         
546                         //get next line to process
547                         //prevent memory leak
548                         delete list; 
549                         list = input->getListVector();
550                 }
551                 
552                 
553                 if (m->control_pressed) { delete input; return list;  }
554                 
555                 //output error messages about any remaining user labels
556                 set<string>::iterator it;
557                 bool needToRun = false;
558                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
559                         m->mothurOut("Your file does not include the label " + *it); 
560                         if (processedLabels.count(lastLabel) != 1) {
561                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
562                                 needToRun = true;
563                         }else {
564                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
565                         }
566                 }
567                 
568                 //run last label if you need to
569                 if (needToRun == true)  {
570                         delete list;
571                         list = input->getListVector(lastLabel);
572                 }       
573                 
574                 delete input;
575
576         return list;
577     }
578         catch(exception& e) {
579                 m->errorOut(e, "CreateDatabaseCommand", "getList");
580                 exit(1);
581         }
582 }
583 //**********************************************************************************************************************
584 vector<SharedRAbundVector*> CreateDatabaseCommand::getShared(){
585         try {
586                 InputData input(sharedfile, "sharedfile");
587                 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
588                 string lastLabel = lookup[0]->getLabel();
589                 
590                 if (label == "") { label = lastLabel; return lookup; }
591                 
592                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
593                 set<string> labels; labels.insert(label);
594                 set<string> processedLabels;
595                 set<string> userLabels = labels;
596                 
597                 //as long as you are not at the end of the file or done wih the lines you want
598                 while((lookup[0] != NULL) && (userLabels.size() != 0)) {
599                         if (m->control_pressed) {  return lookup;  }
600                         
601                         if(labels.count(lookup[0]->getLabel()) == 1){
602                                 processedLabels.insert(lookup[0]->getLabel());
603                                 userLabels.erase(lookup[0]->getLabel());
604                                 break;
605                         }
606                         
607                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
608                                 string saveLabel = lookup[0]->getLabel();
609                                 
610                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
611                                 lookup = input.getSharedRAbundVectors(lastLabel);
612                                 
613                                 processedLabels.insert(lookup[0]->getLabel());
614                                 userLabels.erase(lookup[0]->getLabel());
615                                 
616                                 //restore real lastlabel to save below
617                                 lookup[0]->setLabel(saveLabel);
618                                 break;
619                         }
620                         
621                         lastLabel = lookup[0]->getLabel();                      
622                         
623                         //get next line to process
624                         //prevent memory leak
625                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
626                         lookup = input.getSharedRAbundVectors();
627                 }
628                 
629                 
630                 if (m->control_pressed) { return lookup;  }
631                 
632                 //output error messages about any remaining user labels
633                 set<string>::iterator it;
634                 bool needToRun = false;
635                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
636                         m->mothurOut("Your file does not include the label " + *it); 
637                         if (processedLabels.count(lastLabel) != 1) {
638                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
639                                 needToRun = true;
640                         }else {
641                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
642                         }
643                 }
644                 
645                 //run last label if you need to
646                 if (needToRun == true)  {
647                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
648                         lookup = input.getSharedRAbundVectors(lastLabel);
649                 }       
650         
651         return lookup;
652     }
653         catch(exception& e) {
654                 m->errorOut(e, "CreateDatabaseCommand", "getList");
655                 exit(1);
656         }
657 }
658
659 //**********************************************************************************************************************
660
661