]> git.donarmstrong.com Git - mothur.git/blob - createdatabasecommand.cpp
sffinfo bug with flow grams right index when clipQualRight=0
[mothur.git] / createdatabasecommand.cpp
1 //
2 //  createdatabasecommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 3/28/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "createdatabasecommand.h"
10 #include "inputdata.h"
11
12 //**********************************************************************************************************************
13 vector<string> CreateDatabaseCommand::setParameters(){  
14         try {
15                 CommandParameter pfasta("repfasta", "InputTypes", "", "", "none", "none", "none","database",false,true,true); parameters.push_back(pfasta);
16                 CommandParameter pname("repname", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pname);
17                 CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pcontaxonomy);
18                 CommandParameter plist("list", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(plist);
19         CommandParameter pshared("shared", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(pshared);
20                 CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pgroup);
21                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
22                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
23                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
24                 
25                 vector<string> myArray;
26                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
27                 return myArray;
28         }
29         catch(exception& e) {
30                 m->errorOut(e, "CreateDatabaseCommand", "setParameters");
31                 exit(1);
32         }
33 }
34 //**********************************************************************************************************************
35 string CreateDatabaseCommand::getHelpString(){  
36         try {
37                 string helpString = "";
38                 helpString += "The create.database command reads a list file or a shared file, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, and creates a database file.\n";
39                 helpString += "The create.database command parameters are repfasta, list, shared, repname, contaxonomy, group and label. List, repfasta, repnames, and contaxonomy are required.\n";
40         helpString += "The repfasta file is fasta file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
41         helpString += "The repname file is the name file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
42         helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile).\n";
43         helpString += "The group file is optional and will just give you the abundance breakdown by group.\n";
44         helpString += "The label parameter allows you to specify a label to be used from your listfile.\n";
45         helpString += "NOTE: Make SURE the repfasta, repnames and contaxonomy are for the same label as the listfile.\n";
46         helpString += "The create.database command should be in the following format: \n";
47                 helpString += "create.database(repfasta=yourFastaFileFromGetOTURep, repname=yourNameFileFromGetOTURep, contaxonomy=yourConTaxFileFromClassifyOTU, list=yourListFile) \n";       
48                 helpString += "Example: create.database(repfasta=final.an.0.03.rep.fasta, name=final.an.0.03.rep.names, list=fina.an.list, label=0.03, contaxonomy=final.an.0.03.cons.taxonomy) \n";
49                 helpString += "Note: No spaces between parameter labels (i.e. repfasta), '=' and parameters (i.e.yourFastaFileFromGetOTURep).\n";       
50                 return helpString;
51         }
52         catch(exception& e) {
53                 m->errorOut(e, "CreateDatabaseCommand", "getHelpString");
54                 exit(1);
55         }
56 }
57 //**********************************************************************************************************************
58 string CreateDatabaseCommand::getOutputPattern(string type) {
59     try {
60         string pattern = "";
61         
62         if (type == "database") {  pattern = "[filename],database"; }
63         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
64         
65         return pattern;
66     }
67     catch(exception& e) {
68         m->errorOut(e, "CreateDatabaseCommand", "getOutputPattern");
69         exit(1);
70     }
71 }
72 //**********************************************************************************************************************
73 CreateDatabaseCommand::CreateDatabaseCommand(){ 
74         try {
75                 abort = true; calledHelp = true; 
76                 setParameters();
77                 vector<string> tempOutNames;
78                 outputTypes["database"] = tempOutNames;
79         }
80         catch(exception& e) {
81                 m->errorOut(e, "CreateDatabaseCommand", "CreateDatabaseCommand");
82                 exit(1);
83         }
84 }
85
86 //**********************************************************************************************************************
87 CreateDatabaseCommand::CreateDatabaseCommand(string option)  {
88         try{
89                 abort = false; calledHelp = false;   
90         
91                 //allow user to run help
92                 if (option == "help") { 
93                         help(); abort = true; calledHelp = true;
94                 }else if(option == "citation") { citation(); abort = true; calledHelp = true;} 
95                 else {
96                         vector<string> myArray = setParameters();
97                         
98                         OptionParser parser(option);
99                         map<string, string> parameters = parser.getParameters();
100                         
101                         ValidParameters validParameter;
102                         map<string, string>::iterator it;
103             
104                         //check to make sure all parameters are valid for command
105                         for (it = parameters.begin(); it != parameters.end(); it++) { 
106                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
107                         }
108                         
109                         //initialize outputTypes
110                         vector<string> tempOutNames;
111                         outputTypes["database"] = tempOutNames;
112             
113                         //if the user changes the input directory command factory will send this info to us in the output parameter 
114                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
115                         if (inputDir == "not found"){   inputDir = "";          }
116                         else {
117                                 string path;
118                                 it = parameters.find("list");
119                                 //user has given a template file
120                                 if(it != parameters.end()){ 
121                                         path = m->hasPath(it->second);
122                                         //if the user has not given a path then, add inputdir. else leave path alone.
123                                         if (path == "") {       parameters["list"] = inputDir + it->second;             }
124                                 }
125                                 
126                                 it = parameters.find("repname");
127                                 //user has given a template file
128                                 if(it != parameters.end()){ 
129                                         path = m->hasPath(it->second);
130                                         //if the user has not given a path then, add inputdir. else leave path alone.
131                                         if (path == "") {       parameters["repname"] = inputDir + it->second;          }
132                                 }
133                                 
134                                 it = parameters.find("contaxonomy");
135                                 //user has given a template file
136                                 if(it != parameters.end()){ 
137                                         path = m->hasPath(it->second);
138                                         //if the user has not given a path then, add inputdir. else leave path alone.
139                                         if (path == "") {       parameters["contaxonomy"] = inputDir + it->second;              }
140                                 }
141                                 
142                                 it = parameters.find("repfasta");
143                                 //user has given a template file
144                                 if(it != parameters.end()){ 
145                                         path = m->hasPath(it->second);
146                                         //if the user has not given a path then, add inputdir. else leave path alone.
147                                         if (path == "") {       parameters["repfasta"] = inputDir + it->second;         }
148                                 }
149                                 
150                                 it = parameters.find("group");
151                                 //user has given a template file
152                                 if(it != parameters.end()){ 
153                                         path = m->hasPath(it->second);
154                                         //if the user has not given a path then, add inputdir. else leave path alone.
155                                         if (path == "") {       parameters["group"] = inputDir + it->second;            }
156                                 }
157                 
158                 it = parameters.find("shared");
159                                 //user has given a template file
160                                 if(it != parameters.end()){ 
161                                         path = m->hasPath(it->second);
162                                         //if the user has not given a path then, add inputdir. else leave path alone.
163                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
164                                 }
165                         }
166             
167                         
168                         //if the user changes the output directory command factory will send this info to us in the output parameter 
169                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = "";         }
170                         
171                         //check for required parameters
172                         listfile = validParameter.validFile(parameters, "list", true);
173                         if (listfile == "not found") {  listfile = "";                  }
174                         else if (listfile == "not open") { listfile = ""; abort = true; }       
175                         else { m->setListFile(listfile); }
176             
177             sharedfile = validParameter.validFile(parameters, "shared", true);
178                         if (sharedfile == "not found") {        sharedfile = "";                        }
179                         else if (sharedfile == "not open") { sharedfile = ""; abort = true; }   
180                         else { m->setSharedFile(sharedfile); }
181             
182             if ((sharedfile == "") && (listfile == "")) { 
183                                 //is there are current file available for either of these?
184                                 //give priority to list, then shared
185                                 listfile = m->getListFile(); 
186                                 if (listfile != "") {  m->mothurOut("Using " + listfile + " as input file for the list parameter."); m->mothurOutEndLine(); }
187                                 else { 
188                                         sharedfile = m->getSharedFile(); 
189                                         if (sharedfile != "") {  m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
190                                         else { 
191                                                 m->mothurOut("No valid current files. You must provide a shared or list file before you can use the create.database command."); m->mothurOutEndLine(); 
192                                                 abort = true;
193                                         }
194                                 }
195                         }
196                         else if ((sharedfile != "") && (listfile != "")) { m->mothurOut("When executing a create.database command you must enter ONLY ONE of the following: shared or list."); m->mothurOutEndLine(); abort = true; }
197             
198             if (sharedfile != "") { if (outputDir == "") { outputDir = m->hasPath(sharedfile); } }
199             else { if (outputDir == "") { outputDir = m->hasPath(listfile); } }
200                         
201                         contaxonomyfile = validParameter.validFile(parameters, "contaxonomy", true);
202                         if (contaxonomyfile == "not found") {  //if there is a current list file, use it
203                contaxonomyfile = "";  m->mothurOut("The contaxonomy parameter is required, aborting."); m->mothurOutEndLine(); abort = true; 
204                         }
205                         else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; }
206
207             repfastafile = validParameter.validFile(parameters, "repfasta", true);
208                         if (repfastafile == "not found") {  //if there is a current list file, use it
209                 repfastafile = "";  m->mothurOut("The repfasta parameter is required, aborting."); m->mothurOutEndLine(); abort = true; 
210                         }
211                         else if (repfastafile == "not open") { repfastafile = ""; abort = true; }
212
213             repnamesfile = validParameter.validFile(parameters, "repname", true);
214                         if (repnamesfile == "not found") {  //if there is a current list file, use it
215                 repnamesfile = "";  m->mothurOut("The repnames parameter is required, aborting."); m->mothurOutEndLine(); abort = true; 
216                         }
217                         else if (repnamesfile == "not open") { repnamesfile = ""; abort = true; }
218
219                         groupfile = validParameter.validFile(parameters, "group", true);
220                         if (groupfile == "not open") { groupfile = ""; abort = true; }  
221                         else if (groupfile == "not found") { groupfile = ""; }
222                         else { m->setGroupFile(groupfile); }
223                         
224                         //check for optional parameter and set defaults
225                         // ...at some point should added some additional type checking...
226             label = validParameter.validFile(parameters, "label", false);                       
227                         if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your listfile.\n");}
228         }
229         }
230         catch(exception& e) {
231                 m->errorOut(e, "CreateDatabaseCommand", "CreateDatabaseCommand");
232                 exit(1);
233         }
234 }
235 //**********************************************************************************************************************
236 int CreateDatabaseCommand::execute(){
237         try {
238                 
239                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
240         
241         //taxonomies holds the taxonomy info for each Otu
242         //classifyOtuSizes holds the size info of each Otu to help with error checking
243         vector<string> taxonomies;
244         vector<string> otuLabels;
245         vector<int> classifyOtuSizes = readTax(taxonomies, otuLabels);
246         
247         if (m->control_pressed) { return 0; }
248         
249         vector<Sequence> seqs;
250         vector<int> repOtusSizes = readFasta(seqs);
251         
252         if (m->control_pressed) { return 0; }
253         
254         //names redundants to uniques. backwards to how we normally do it, but each bin is the list file will be a key entry in the map.
255         map<string, string> repNames;
256         int numUniqueNamesFile = m->readNames(repnamesfile, repNames, 1);
257         
258         //are there the same number of otus in the fasta and name files
259         if (repOtusSizes.size() != numUniqueNamesFile) { m->mothurOut("[ERROR]: you have " + toString(numUniqueNamesFile) + " unique seqs in your repname file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file.  These should match.\n"); m->control_pressed = true; }
260         
261         if (m->control_pressed) { return 0; }
262         
263         //are there the same number of OTUs in the tax and fasta file
264         if (classifyOtuSizes.size() != repOtusSizes.size()) { m->mothurOut("[ERROR]: you have " + toString(classifyOtuSizes.size()) + " taxonomies in your contaxonomy file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file.  These should match.\n"); m->control_pressed = true; }
265
266         if (m->control_pressed) { return 0; }
267         
268         //at this point we have the same number of OTUs. Are the sizes we have found so far accurate?
269         for (int i = 0; i < classifyOtuSizes.size(); i++) {
270             if (classifyOtuSizes[i] != repOtusSizes[i]) {
271                m->mothurOut("[ERROR]: OTU size info does not match for bin " + toString(i+1) + ". The contaxonomy file indicated the OTU represented " + toString(classifyOtuSizes[i]) + " sequences, but the repfasta file had " + toString(repOtusSizes[i]) + ".  These should match. Make sure you are using files for the same distance.\n"); m->control_pressed = true; 
272             }
273         }
274         
275         if (m->control_pressed) { return 0; }
276         
277         
278         map<string, string> variables; 
279         if (listfile != "") {  variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(listfile)); }
280         else { variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); }
281         string outputFileName = getOutputFileName("database", variables); 
282         outputNames.push_back(outputFileName); outputTypes["database"].push_back(outputFileName);
283         
284         ofstream out;
285         m->openOutputFile(outputFileName, out);
286         
287         string header = "OTUNumber\tAbundance\t";
288
289         
290         if (listfile != "") {
291             //at this point we are fairly sure the repfasta, repnames and contaxonomy files match so lets proceed with the listfile
292             ListVector* list = getList();
293             
294             if (otuLabels.size() != list->getNumBins()) { 
295                 m->mothurOut("[ERROR]: you have " + toString(otuLabels.size()) + " otus in your contaxonomy file, but your list file has " + toString(list->getNumBins()) + " otus. These should match. Make sure you are using files for the same distance.\n"); m->control_pressed = true;  }
296             
297             if (m->control_pressed) { delete list; return 0; }
298             
299             GroupMap* groupmap = NULL;
300             if (groupfile != "") {
301                 groupmap = new GroupMap(groupfile);
302                 groupmap->readMap();
303             }
304             
305             if (m->control_pressed) { delete list; if (groupfile != "") { delete groupmap; } return 0; }
306             
307             if (groupfile != "") { 
308                 header = "OTUNumber\t";
309                 for (int i = 0; i < groupmap->getNamesOfGroups().size(); i++) { header += (groupmap->getNamesOfGroups())[i] + '\t'; }
310             }
311             header += "repSeqName\trepSeq\tOTUConTaxonomy";
312             out << header << endl;
313             
314             for (int i = 0; i < list->getNumBins(); i++) {
315                 
316                 if (m->control_pressed) { break; }
317                 
318                 out << otuLabels[i] << '\t';
319                 
320                 vector<string> binNames;
321                 string bin = list->get(i);
322                 
323                 map<string, string>::iterator it = repNames.find(bin);
324                 if (it == repNames.end()) {
325                     m->mothurOut("[ERROR: OTU " + otuLabels[i] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
326                 }
327                 
328                 m->splitAtComma(bin, binNames);
329                 
330                 //sanity check
331                 if (binNames.size() != classifyOtuSizes[i]) {
332                     m->mothurOut("[ERROR: OTU " + otuLabels[i] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true;   break;
333                 }
334                 
335                 //output abundances
336                 if (groupfile != "") {
337                     string groupAbunds = "";
338                     map<string, int> counts;
339                     //initialize counts to 0
340                     for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { counts[(groupmap->getNamesOfGroups())[j]] = 0; }
341                     
342                     //find abundances by group
343                     bool error = false;
344                     for (int j = 0; j < binNames.size(); j++) {
345                         string group = groupmap->getGroup(binNames[j]);
346                         if (group == "not found") {
347                             m->mothurOut("[ERROR]: " + binNames[j] + " is not in your groupfile, please correct.\n");
348                             error = true;
349                         }else { counts[group]++; }
350                     }
351                     
352                     //output counts
353                     for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { out << counts[(groupmap->getNamesOfGroups())[j]] << '\t';  }
354                     
355                     if (error) { m->control_pressed = true; }
356                 }else { out << binNames.size() << '\t'; }
357                 
358                 //output repSeq
359                 out << it->second << '\t' << seqs[i].getAligned() << '\t' << taxonomies[i] << endl;
360             }
361             
362             
363             delete list;
364             if (groupfile != "") { delete groupmap; }
365            
366         }else {
367             vector<SharedRAbundVector*> lookup = getShared();
368             
369             header = "OTUNumber\t";
370             for (int i = 0; i < lookup.size(); i++) { header += lookup[i]->getGroup() + '\t'; }
371             header += "repSeqName\trepSeq\tOTUConTaxonomy";
372             out << header << endl;
373             
374             for (int h = 0; h < lookup[0]->getNumBins(); h++) {
375                 
376                 if (m->control_pressed) { break; }
377                 
378                 int index = findIndex(otuLabels, m->currentBinLabels[h]);
379                 if (index == -1) {  m->mothurOut("[ERROR]: " + m->currentBinLabels[h] + " is not in your constaxonomy file, aborting.\n"); m->control_pressed = true; }
380                 
381                 if (m->control_pressed) { break; }
382                 
383                 out << otuLabels[index] << '\t';
384                 
385                 int totalAbund = 0;
386                 for (int i = 0; i < lookup.size(); i++) { 
387                     int abund = lookup[i]->getAbundance(h);
388                     totalAbund += abund; 
389                     out << abund << '\t';
390                 }
391                 
392                 //sanity check
393                 if (totalAbund != classifyOtuSizes[index]) {
394                     m->mothurOut("[WARNING]: OTU " + m->currentBinLabels[h] + " contains " + toString(totalAbund) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[index]) + ". Make sure you are using files for the same distance.\n"); //m->control_pressed = true;   break;
395                 }
396                 
397                 //output repSeq
398                 out << seqs[index].getName() << '\t' << seqs[index].getAligned() << '\t' << taxonomies[index] << endl;
399             }
400         }
401         out.close();
402         if (m->control_pressed) { m->mothurRemove(outputFileName); return 0; }
403         
404         m->mothurOutEndLine();
405                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
406                 m->mothurOut(outputFileName); m->mothurOutEndLine();    
407                 m->mothurOutEndLine();
408         
409         return 0;
410         
411     }
412         catch(exception& e) {
413                 m->errorOut(e, "CreateDatabaseCommand", "execute");
414                 exit(1);
415         }
416 }
417 //**********************************************************************************************************************
418 int CreateDatabaseCommand::findIndex(vector<string>& otuLabels, string label){
419         try {
420         int index = -1;
421         for (int i = 0; i < otuLabels.size(); i++) {
422             if (otuLabels[i] == label) { index = i; break; }
423         }
424                 return index;
425     }
426         catch(exception& e) {
427                 m->errorOut(e, "CreateDatabaseCommand", "findIndex");
428                 exit(1);
429         }
430 }
431 //**********************************************************************************************************************
432 vector<int> CreateDatabaseCommand::readTax(vector<string>& taxonomies, vector<string>& otuLabels){
433         try {
434                 
435         vector<int> sizes; 
436         
437         ifstream in;
438         m->openInputFile(contaxonomyfile, in);
439         
440         //read headers
441         m->getline(in);
442         
443         while (!in.eof()) {
444             
445             if (m->control_pressed) { break; }
446             
447             string otu = ""; string tax = "unknown";
448             int size = 0;
449             
450             in >> otu >> size >> tax; m->gobble(in);
451             
452             sizes.push_back(size);
453             taxonomies.push_back(tax);
454             otuLabels.push_back(otu);
455         }
456         in.close();
457         
458         return sizes;
459     }
460         catch(exception& e) {
461                 m->errorOut(e, "CreateDatabaseCommand", "readTax");
462                 exit(1);
463         }
464 }
465 //**********************************************************************************************************************
466 vector<int> CreateDatabaseCommand::readFasta(vector<Sequence>& seqs){
467         try {
468                 
469         vector<int> sizes; 
470         
471         ifstream in;
472         m->openInputFile(repfastafile, in);
473         
474         while (!in.eof()) {
475             
476             if (m->control_pressed) { break; }
477             
478             string binInfo;
479             Sequence seq(in, binInfo, true);  m->gobble(in);
480             
481             //the binInfo should look like - binNumber|size ie. 1|200 if it is binNumber|size|group then the user gave us the wrong repfasta file
482             vector<string> info;
483             m->splitAtChar(binInfo, info, '|');
484             if (info.size() != 2) { m->mothurOut("[ERROR]: your repfasta file is not the right format.  The create database command is designed to be used with the output from get.oturep.  When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n");  m->control_pressed = true; break;}
485             
486             int size = 0;
487             m->mothurConvert(info[1], size);
488             
489             sizes.push_back(size);
490             seqs.push_back(seq);
491         }
492         in.close();
493         
494         return sizes;
495     }
496         catch(exception& e) {
497                 m->errorOut(e, "CreateDatabaseCommand", "readFasta");
498                 exit(1);
499         }
500 }
501 //**********************************************************************************************************************
502 ListVector* CreateDatabaseCommand::getList(){
503         try {
504                 InputData* input = new InputData(listfile, "list");
505                 ListVector* list = input->getListVector();
506                 string lastLabel = list->getLabel();
507                 
508                 if (label == "") { label = lastLabel; delete input; return list; }
509                 
510                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
511                 set<string> labels; labels.insert(label);
512                 set<string> processedLabels;
513                 set<string> userLabels = labels;
514                 
515                 //as long as you are not at the end of the file or done wih the lines you want
516                 while((list != NULL) && (userLabels.size() != 0)) {
517                         if (m->control_pressed) {  delete input; return list;  }
518                         
519                         if(labels.count(list->getLabel()) == 1){
520                                 processedLabels.insert(list->getLabel());
521                                 userLabels.erase(list->getLabel());
522                                 break;
523                         }
524                         
525                         if ((m->anyLabelsToProcess(list->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
526                                 string saveLabel = list->getLabel();
527                                 
528                                 delete list;
529                                 list = input->getListVector(lastLabel);
530                                 
531                                 processedLabels.insert(list->getLabel());
532                                 userLabels.erase(list->getLabel());
533                                 
534                                 //restore real lastlabel to save below
535                                 list->setLabel(saveLabel);
536                                 break;
537                         }
538                         
539                         lastLabel = list->getLabel();                   
540                         
541                         //get next line to process
542                         //prevent memory leak
543                         delete list; 
544                         list = input->getListVector();
545                 }
546                 
547                 
548                 if (m->control_pressed) { delete input; return list;  }
549                 
550                 //output error messages about any remaining user labels
551                 set<string>::iterator it;
552                 bool needToRun = false;
553                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
554                         m->mothurOut("Your file does not include the label " + *it); 
555                         if (processedLabels.count(lastLabel) != 1) {
556                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
557                                 needToRun = true;
558                         }else {
559                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
560                         }
561                 }
562                 
563                 //run last label if you need to
564                 if (needToRun == true)  {
565                         delete list;
566                         list = input->getListVector(lastLabel);
567                 }       
568                 
569                 delete input;
570
571         return list;
572     }
573         catch(exception& e) {
574                 m->errorOut(e, "CreateDatabaseCommand", "getList");
575                 exit(1);
576         }
577 }
578 //**********************************************************************************************************************
579 vector<SharedRAbundVector*> CreateDatabaseCommand::getShared(){
580         try {
581                 InputData input(sharedfile, "sharedfile");
582                 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
583                 string lastLabel = lookup[0]->getLabel();
584                 
585                 if (label == "") { label = lastLabel; return lookup; }
586                 
587                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
588                 set<string> labels; labels.insert(label);
589                 set<string> processedLabels;
590                 set<string> userLabels = labels;
591                 
592                 //as long as you are not at the end of the file or done wih the lines you want
593                 while((lookup[0] != NULL) && (userLabels.size() != 0)) {
594                         if (m->control_pressed) {  return lookup;  }
595                         
596                         if(labels.count(lookup[0]->getLabel()) == 1){
597                                 processedLabels.insert(lookup[0]->getLabel());
598                                 userLabels.erase(lookup[0]->getLabel());
599                                 break;
600                         }
601                         
602                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
603                                 string saveLabel = lookup[0]->getLabel();
604                                 
605                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
606                                 lookup = input.getSharedRAbundVectors(lastLabel);
607                                 
608                                 processedLabels.insert(lookup[0]->getLabel());
609                                 userLabels.erase(lookup[0]->getLabel());
610                                 
611                                 //restore real lastlabel to save below
612                                 lookup[0]->setLabel(saveLabel);
613                                 break;
614                         }
615                         
616                         lastLabel = lookup[0]->getLabel();                      
617                         
618                         //get next line to process
619                         //prevent memory leak
620                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
621                         lookup = input.getSharedRAbundVectors();
622                 }
623                 
624                 
625                 if (m->control_pressed) { return lookup;  }
626                 
627                 //output error messages about any remaining user labels
628                 set<string>::iterator it;
629                 bool needToRun = false;
630                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
631                         m->mothurOut("Your file does not include the label " + *it); 
632                         if (processedLabels.count(lastLabel) != 1) {
633                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
634                                 needToRun = true;
635                         }else {
636                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
637                         }
638                 }
639                 
640                 //run last label if you need to
641                 if (needToRun == true)  {
642                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
643                         lookup = input.getSharedRAbundVectors(lastLabel);
644                 }       
645         
646         return lookup;
647     }
648         catch(exception& e) {
649                 m->errorOut(e, "CreateDatabaseCommand", "getList");
650                 exit(1);
651         }
652 }
653
654 //**********************************************************************************************************************
655
656