]> git.donarmstrong.com Git - mothur.git/blob - makebiomcommand.cpp
Merge remote-tracking branch 'origin/master'
[mothur.git] / makebiomcommand.cpp
1 //
2 //  makebiomcommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 4/16/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "makebiomcommand.h"
10 #include "sharedrabundvector.h"
11 #include "inputdata.h"
12 #include "sharedutilities.h"
13
14 //taken from http://biom-format.org/documentation/biom_format.html
15 /* Minimal Sparse 
16  {
17  "id":null,
18  "format": "Biological Observation Matrix 0.9.1",
19  "format_url": "http://biom-format.org",
20  "type": "OTU table",
21  "generated_by": "QIIME revision 1.4.0-dev",
22  "date": "2011-12-19T19:00:00",
23  "rows":[
24  {"id":"GG_OTU_1", "metadata":null},
25  {"id":"GG_OTU_2", "metadata":null},
26  {"id":"GG_OTU_3", "metadata":null},
27  {"id":"GG_OTU_4", "metadata":null},
28  {"id":"GG_OTU_5", "metadata":null}
29  ],
30  "columns": [
31  {"id":"Sample1", "metadata":null},
32  {"id":"Sample2", "metadata":null},
33  {"id":"Sample3", "metadata":null},
34  {"id":"Sample4", "metadata":null},
35  {"id":"Sample5", "metadata":null},
36  {"id":"Sample6", "metadata":null}
37  ],
38  "matrix_type": "sparse",
39  "matrix_element_type": "int",
40  "shape": [5, 6],
41  "data":[[0,2,1],
42  [1,0,5],
43  [1,1,1],
44  [1,3,2],
45  [1,4,3],
46  [1,5,1],
47  [2,2,1],
48  [2,3,4],
49  [2,4,2],
50  [3,0,2],
51  [3,1,1],
52  [3,2,1],
53  [3,5,1],
54  [4,1,1],
55  [4,2,1]
56  ]
57  }
58  */
59 /* Minimal dense
60  {
61  "id":null,
62  "format": "Biological Observation Matrix 0.9.1",
63  "format_url": "http://biom-format.org",
64  "type": "OTU table",
65  "generated_by": "QIIME revision 1.4.0-dev",
66  "date": "2011-12-19T19:00:00",
67  "rows":[
68  {"id":"GG_OTU_1", "metadata":null},
69  {"id":"GG_OTU_2", "metadata":null},
70  {"id":"GG_OTU_3", "metadata":null},
71  {"id":"GG_OTU_4", "metadata":null},
72  {"id":"GG_OTU_5", "metadata":null}
73  ],
74  "columns": [
75  {"id":"Sample1", "metadata":null},
76  {"id":"Sample2", "metadata":null},
77  {"id":"Sample3", "metadata":null},
78  {"id":"Sample4", "metadata":null},
79  {"id":"Sample5", "metadata":null},
80  {"id":"Sample6", "metadata":null}
81  ],
82  "matrix_type": "dense",
83  "matrix_element_type": "int",
84  "shape": [5,6],
85  "data":  [[0,0,1,0,0,0],
86  [5,1,0,2,3,1],
87  [0,0,1,4,2,0],
88  [2,1,1,0,0,1],
89  [0,1,1,0,0,0]]
90  }
91  */
92 //**********************************************************************************************************************
93 vector<string> MakeBiomCommand::setParameters(){        
94         try {
95                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
96         CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pcontaxonomy);
97         CommandParameter pmetadata("metadata", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pmetadata);
98                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
99                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
100                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
101                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
102         CommandParameter pmatrixtype("matrixtype", "Multiple", "sparse-dense", "sparse", "", "", "",false,false); parameters.push_back(pmatrixtype);
103
104                 vector<string> myArray;
105                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
106                 return myArray;
107         }
108         catch(exception& e) {
109                 m->errorOut(e, "MakeBiomCommand", "setParameters");
110                 exit(1);
111         }
112 }
113 //**********************************************************************************************************************
114 string MakeBiomCommand::getHelpString(){        
115         try {
116                 string helpString = "";
117                 helpString += "The make.biom command parameters are shared, contaxonomy, metadata, groups, matrixtype and label.  shared is required, unless you have a valid current file.\n";
118                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
119                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
120                 helpString += "The matrixtype parameter allows you to select what type you would like to make. Choices are sparse and dense, default is sparse.\n";
121         helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance.  ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled. It is used to assign taxonomy information to the metadata of rows.\n";
122         helpString += "The metadata parameter is used to provide experimental parameters to the columns.  Things like 'sample1 gut human_gut'. \n";
123                 helpString += "The make.biom command should be in the following format: make.biom(shared=yourShared, groups=yourGroups, label=yourLabels).\n";
124                 helpString += "Example make.biom(shared=abrecovery.an.shared, groups=A-B-C).\n";
125                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
126                 helpString += "The make.biom command outputs a .biom file.\n";
127                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
128                 return helpString;
129         }
130         catch(exception& e) {
131                 m->errorOut(e, "MakeBiomCommand", "getHelpString");
132                 exit(1);
133         }
134 }
135 //**********************************************************************************************************************
136 string MakeBiomCommand::getOutputFileNameTag(string type, string inputName=""){ 
137         try {
138         string outputFileName = "";
139                 map<string, vector<string> >::iterator it;
140         
141         //is this a type this command creates
142         it = outputTypes.find(type);
143         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
144         else {
145             if (type == "biom")             {   outputFileName =  "biom";       }
146             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
147         }
148         return outputFileName;
149         }
150         catch(exception& e) {
151                 m->errorOut(e, "MakeBiomCommand", "getOutputFileNameTag");
152                 exit(1);
153         }
154 }
155
156 //**********************************************************************************************************************
157 MakeBiomCommand::MakeBiomCommand(){     
158         try {
159                 abort = true; calledHelp = true; 
160                 setParameters();
161                 vector<string> tempOutNames;
162                 outputTypes["biom"] = tempOutNames;
163         }
164         catch(exception& e) {
165                 m->errorOut(e, "MakeBiomCommand", "MakeBiomCommand");
166                 exit(1);
167         }
168 }
169 //**********************************************************************************************************************
170 MakeBiomCommand::MakeBiomCommand(string option) {
171         try {
172                 abort = false; calledHelp = false;   
173                 allLines = 1;
174         
175                 //allow user to run help
176                 if(option == "help") { help(); abort = true; calledHelp = true; }
177                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
178                 
179                 else {
180                         vector<string> myArray = setParameters();
181                         
182                         OptionParser parser(option);
183                         map<string,string> parameters = parser.getParameters();
184                         map<string,string>::iterator it;
185                         
186                         ValidParameters validParameter;
187                         
188                         //check to make sure all parameters are valid for command
189                         for (it = parameters.begin(); it != parameters.end(); it++) { 
190                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
191                         }
192
193                         //initialize outputTypes
194                         vector<string> tempOutNames;
195                         outputTypes["biom"] = tempOutNames;
196                         
197                         //if the user changes the input directory command factory will send this info to us in the output parameter 
198                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
199                         if (inputDir == "not found"){   inputDir = "";          }
200                         else {
201                                 string path;
202                                 it = parameters.find("shared");
203                                 //user has given a template file
204                                 if(it != parameters.end()){ 
205                                         path = m->hasPath(it->second);
206                                         //if the user has not given a path then, add inputdir. else leave path alone.
207                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
208                                 }
209                 
210                 it = parameters.find("contaxonomy");
211                                 //user has given a template file
212                                 if(it != parameters.end()){ 
213                                         path = m->hasPath(it->second);
214                                         //if the user has not given a path then, add inputdir. else leave path alone.
215                                         if (path == "") {       parameters["contaxonomy"] = inputDir + it->second;              }
216                                 }
217                 
218                 it = parameters.find("metadata");
219                                 //user has given a template file
220                                 if(it != parameters.end()){ 
221                                         path = m->hasPath(it->second);
222                                         //if the user has not given a path then, add inputdir. else leave path alone.
223                                         if (path == "") {       parameters["metadata"] = inputDir + it->second;         }
224                                 }
225                         }
226             
227                         //get shared file
228                         sharedfile = validParameter.validFile(parameters, "shared", true);
229                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
230                         else if (sharedfile == "not found") { 
231                                 //if there is a current shared file, use it
232                                 sharedfile = m->getSharedFile(); 
233                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
234                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
235                         }else { m->setSharedFile(sharedfile); }
236                         
237                         
238                         //if the user changes the output directory command factory will send this info to us in the output parameter 
239                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
240             
241             contaxonomyfile = validParameter.validFile(parameters, "contaxonomy", true);
242                         if (contaxonomyfile == "not found") {  contaxonomyfile = "";  }
243                         else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; }
244
245             metadatafile = validParameter.validFile(parameters, "metadata", true);
246                         if (metadatafile == "not found") {  metadatafile = "";  }
247                         else if (metadatafile == "not open") { metadatafile = ""; abort = true; }
248             
249                         //check for optional parameter and set defaults
250                         // ...at some point should added some additional type checking...
251                         label = validParameter.validFile(parameters, "label", false);                   
252                         if (label == "not found") { label = ""; }
253                         else { 
254                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
255                                 else { allLines = 1;  }
256                         }
257                         
258                         groups = validParameter.validFile(parameters, "groups", false);                 
259                         if (groups == "not found") { groups = ""; }
260                         else { 
261                                 m->splitAtDash(groups, Groups);
262                                 m->setGroups(Groups);
263                         }
264                         
265             if ((contaxonomyfile != "") && (labels.size() > 1)) { m->mothurOut("[ERROR]: the contaxonomy parameter cannot be used with multiple labels."); m->mothurOutEndLine(); abort = true; }
266             
267                         format = validParameter.validFile(parameters, "matrixtype", false);                             if (format == "not found") { format = "sparse"; }
268                         
269                         if ((format != "sparse") && (format != "dense")) {
270                                 m->mothurOut(format + " is not a valid option for the matrixtype parameter. Options are sparse and dense."); m->mothurOutEndLine(); abort = true; 
271                         }
272                 }
273         
274         }
275         catch(exception& e) {
276                 m->errorOut(e, "MakeBiomCommand", "MakeBiomCommand");
277                 exit(1);
278         }
279 }
280 //**********************************************************************************************************************
281
282 int MakeBiomCommand::execute(){
283         try {
284         
285                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
286             
287                 InputData input(sharedfile, "sharedfile");
288                 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
289                 string lastLabel = lookup[0]->getLabel();
290         
291         //if user did not specify a label, then use first one
292         if ((contaxonomyfile != "") && (labels.size() == 0)) {
293             allLines = 0;
294             labels.insert(lastLabel);
295         }
296                 
297         getSampleMetaData(lookup);
298         
299                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
300                 set<string> processedLabels;
301                 set<string> userLabels = labels;
302         
303                 //as long as you are not at the end of the file or done wih the lines you want
304                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
305                         
306                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
307             
308                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
309                 
310                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
311                                 getBiom(lookup);
312                                 
313                                 processedLabels.insert(lookup[0]->getLabel());
314                                 userLabels.erase(lookup[0]->getLabel());
315                         }
316                         
317                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
318                                 string saveLabel = lookup[0]->getLabel();
319                 
320                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
321                                 lookup = input.getSharedRAbundVectors(lastLabel);
322                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
323                                 
324                                 getBiom(lookup);
325                                 
326                                 processedLabels.insert(lookup[0]->getLabel());
327                                 userLabels.erase(lookup[0]->getLabel());
328                                 
329                                 //restore real lastlabel to save below
330                                 lookup[0]->setLabel(saveLabel);
331                         }
332                         
333                         lastLabel = lookup[0]->getLabel();
334             
335                         //prevent memory leak and get next set
336                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
337                         lookup = input.getSharedRAbundVectors();                                
338                 }
339                 
340         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }     
341         
342                 //output error messages about any remaining user labels
343                 set<string>::iterator it;
344                 bool needToRun = false;
345                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
346                         m->mothurOut("Your file does not include the label " + *it); 
347                         if (processedLabels.count(lastLabel) != 1) {
348                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
349                                 needToRun = true;
350                         }else {
351                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
352                         }
353                 }
354         
355                 //run last label if you need to
356                 if (needToRun == true)  {
357                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
358                         lookup = input.getSharedRAbundVectors(lastLabel);
359                         
360                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
361             getBiom(lookup);
362                         
363                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
364                 }
365                 
366         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }     
367                 
368         //set sabund file as new current sabundfile
369         string current = "";
370                 itTypes = outputTypes.find("biom");
371                 if (itTypes != outputTypes.end()) {
372                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setBiomFile(current); }
373                 }
374
375         
376                 m->mothurOutEndLine();
377                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
378                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
379                 m->mothurOutEndLine();
380                 
381                 return 0;
382         }
383         catch(exception& e) {
384                 m->errorOut(e, "MakeBiomCommand", "execute");
385                 exit(1);
386         }
387 }
388 //**********************************************************************************************************************
389 int MakeBiomCommand::getBiom(vector<SharedRAbundVector*>& lookup){
390         try {
391         
392         string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + lookup[0]->getLabel() + "." + getOutputFileNameTag("biom");
393                 ofstream out;
394                 m->openOutputFile(outputFileName, out);
395                 outputNames.push_back(outputFileName); outputTypes["biom"].push_back(outputFileName);
396
397         string mothurString = "mothur" + toString(m->getVersion());
398         time_t rawtime;
399         struct tm * timeinfo;
400         time ( &rawtime );
401         timeinfo = localtime ( &rawtime );
402         string dateString = asctime (timeinfo);
403         int pos = dateString.find('\n');
404         if (pos != string::npos) { dateString = dateString.substr(0, pos);}
405         string spaces = "      ";
406         
407         //standard 
408         out << "{\n" + spaces + "\"id\":\"" + sharedfile + "-" + lookup[0]->getLabel() + "\",\n" + spaces + "\"format\": \"Biological Observation Matrix 0.9.1\",\n" + spaces + "\"format_url\": \"http://biom-format.org\",\n";
409         out << spaces + "\"type\": \"OTU table\",\n" + spaces + "\"generated_by\": \"" << mothurString << "\",\n" + spaces + "\"date\": \"" << dateString << "\",\n";
410         
411         vector<string> metadata = getMetaData(lookup);  
412         
413         if (m->control_pressed) {  out.close(); return 0; }
414         
415         //get row info
416         /*"rows":[
417                 {"id":"GG_OTU_1", "metadata":null},
418                 {"id":"GG_OTU_2", "metadata":null},
419                 {"id":"GG_OTU_3", "metadata":null},
420                 {"id":"GG_OTU_4", "metadata":null},
421                 {"id":"GG_OTU_5", "metadata":null}
422                 ],*/
423         out << spaces + "\"rows\":[\n";
424         string rowFront = spaces + spaces + "{\"id\":\"";
425         string rowBack = "\", \"metadata\":";
426         for (int i = 0; i < m->currentBinLabels.size()-1; i++) {
427             if (m->control_pressed) {  out.close(); return 0; }
428             out << rowFront << m->currentBinLabels[i] << rowBack << metadata[i] << "},\n";
429         }
430         out << rowFront << m->currentBinLabels[(m->currentBinLabels.size()-1)] << rowBack << metadata[(m->currentBinLabels.size()-1)] << "}\n" + spaces + "],\n";
431         
432         //get column info
433         /*"columns": [
434                     {"id":"Sample1", "metadata":null},
435                     {"id":"Sample2", "metadata":null},
436                     {"id":"Sample3", "metadata":null},
437                     {"id":"Sample4", "metadata":null},
438                     {"id":"Sample5", "metadata":null},
439                     {"id":"Sample6", "metadata":null}
440                     ],*/
441         
442         string colBack = "\", \"metadata\":";
443         out << spaces + "\"columns\":[\n";
444         for (int i = 0; i < lookup.size()-1; i++) {
445             if (m->control_pressed) {  out.close(); return 0; }
446             out << rowFront << lookup[i]->getGroup() << colBack << sampleMetadata[i] << "},\n";
447         }
448         out << rowFront << lookup[(lookup.size()-1)]->getGroup() << colBack << sampleMetadata[lookup.size()-1] << "}\n" + spaces + "],\n";
449         
450         out << spaces + "\"matrix_type\": \"" << format << "\",\n" + spaces + "\"matrix_element_type\": \"int\",\n";
451         out <<  spaces + "\"shape\": [" << m->currentBinLabels.size() << "," << lookup.size() << "],\n";
452         out << spaces + "\"data\":  [";
453         
454         vector<string> dataRows;
455         if (format == "sparse") {
456             /*"data":[[0,2,1],
457              [1,0,5],
458              [1,1,1],
459              [1,3,2],
460              [1,4,3],
461              [1,5,1],
462              [2,2,1],
463              [2,3,4],
464              [2,4,2],
465              [3,0,2],
466              [3,1,1],
467              [3,2,1],
468              [3,5,1],
469              [4,1,1],
470              [4,2,1]
471              ]*/
472             string output = "";
473             for (int i = 0; i < lookup[0]->getNumBins(); i++) {
474                 
475                 if (m->control_pressed) { out.close(); return 0; }
476                 
477                 for (int j = 0; j < lookup.size(); j++) {
478                     string binInfo = "[" + toString(i) + "," + toString(j) + "," + toString(lookup[j]->getAbundance(i)) + "]";
479                     //only print non zero values
480                     if (lookup[j]->getAbundance(i) != 0) { dataRows.push_back(binInfo); }
481                 }
482             }
483         }else {
484             
485             /* "matrix_type": "dense",
486              "matrix_element_type": "int",
487              "shape": [5,6],
488              "data":  [[0,0,1,0,0,0],
489              [5,1,0,2,3,1],
490              [0,0,1,4,2,0],
491              [2,1,1,0,0,1],
492              [0,1,1,0,0,0]]*/
493             
494             for (int i = 0; i < lookup[0]->getNumBins(); i++) {
495                 
496                 if (m->control_pressed) { out.close(); return 0; }
497                 
498                 string binInfo = "[";
499                 for (int j = 0; j < lookup.size()-1; j++) {
500                     binInfo += toString(lookup[j]->getAbundance(i)) + ",";
501                 }
502                 binInfo += toString(lookup[lookup.size()-1]->getAbundance(i)) + "]";
503                 dataRows.push_back(binInfo);
504             }
505         }
506         
507         for (int i = 0; i < dataRows.size()-1; i++) {
508             out << dataRows[i] << ",\n" + spaces  + spaces;
509         }
510         out << dataRows[dataRows.size()-1] << "]\n";
511         
512         out << "}\n";
513         out.close();
514         
515         return 0;
516     }
517         catch(exception& e) {
518                 m->errorOut(e, "MakeBiomCommand", "getBiom");
519                 exit(1);
520         }
521 }
522 //**********************************************************************************************************************
523 vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup){
524         try {
525         vector<string> metadata;
526         
527         if (contaxonomyfile == "") { for (int i = 0; i < lookup[0]->getNumBins(); i++) {  metadata.push_back("null");  } }
528         else {
529             
530             //read constaxonomy file storing in a map, otulabel -> taxonomy
531             //constaxonomy file will most likely contain more labels than the shared file, because sharedfile could have been subsampled.
532             ifstream in;
533             m->openInputFile(contaxonomyfile, in);
534             
535             //grab headers
536             m->getline(in); m->gobble(in);
537             
538             string otuLabel, tax;
539             int size;
540             vector<string> otuLabels;
541             vector<string> taxs;
542             while (!in.eof()) {
543                 
544                 if (m->control_pressed) { in.close(); return metadata; }
545                 
546                 in >> otuLabel >> size >> tax; m->gobble(in);
547                 
548                 otuLabels.push_back(otuLabel);
549                 taxs.push_back(tax);
550             }
551             in.close();
552             
553             //should the labels be Otu001 or PhyloType001
554             string firstBin = m->currentBinLabels[0];
555             string binTag = "Otu";
556             if ((firstBin.find("Otu")) == string::npos) { binTag = "PhyloType";  }
557             
558             //convert list file bin labels to shared file bin labels
559             //parse tax strings
560             //save in map
561             map<string, string> labelTaxMap;
562             string snumBins = toString(otuLabels.size());
563             for (int i = 0; i < otuLabels.size(); i++) {  
564                 
565                 if (m->control_pressed) { return metadata; }
566                 
567                 //if there is a bin label use it otherwise make one
568                 if (m->isContainingOnlyDigits(otuLabels[i])) {
569                     string binLabel = binTag;
570                     string sbinNumber = otuLabels[i];
571                     if (sbinNumber.length() < snumBins.length()) { 
572                         int diff = snumBins.length() - sbinNumber.length();
573                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
574                     }
575                     binLabel += sbinNumber;
576                     labelTaxMap[binLabel] = taxs[i];
577                 }else {  labelTaxMap[otuLabels[i]] = taxs[i]; }
578             }
579             
580             
581             //{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]}
582             
583             //traverse the binLabels forming the metadata strings and saving them
584             //make sure to sanity check
585             map<string, string>::iterator it;
586             for (int i = 0; i < m->currentBinLabels.size(); i++) {
587                 
588                 if (m->control_pressed) { return metadata; }
589                 
590                 it = labelTaxMap.find(m->currentBinLabels[i]);
591                 
592                 if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + m->currentBinLabels[i] + ".\n"); m->control_pressed = true; }
593                 else {
594                     vector<string> bootstrapValues;
595                     string data = "{\"taxonomy\":[";
596             
597                     vector<string> scores;
598                     vector<string> taxonomies = parseTax(it->second, scores);
599                     
600                     for (int j = 0; j < taxonomies.size()-1; j ++) { data += "\"" + taxonomies[j] + "\", "; }
601                     data += "\"" + taxonomies[taxonomies.size()-1] + "\"]";
602                     
603                     //add bootstrap values if available
604                     if (scores[0] != "null") {
605                         data += ", \"bootstrap\":[";
606                         
607                         for (int j = 0; j < scores.size()-1; j ++) { data += scores[j] + ", "; }
608                         data += scores[scores.size()-1] + "]";
609
610                     }
611                     data += "}";
612                     
613                     metadata.push_back(data);
614                 }
615             }
616         }
617         
618         return metadata;
619         
620     }
621         catch(exception& e) {
622                 m->errorOut(e, "MakeBiomCommand", "getMetadata");
623                 exit(1);
624         }
625
626 }
627 //**********************************************************************************************************************
628 int MakeBiomCommand::getSampleMetaData(vector<SharedRAbundVector*>& lookup){
629         try {
630         
631         if (metadatafile == "") { for (int i = 0; i < lookup.size(); i++) {  sampleMetadata.push_back("null");  } }
632         else {
633             ifstream in;
634             m->openInputFile(metadatafile, in);
635             
636             vector<string> groupNames, metadataLabels;
637             map<string, vector<string> > lines;
638             
639             string headerLine = m->getline(in); m->gobble(in);
640             vector<string> pieces = m->splitWhiteSpace(headerLine);
641             
642             //save names of columns you are reading
643             for (int i = 1; i < pieces.size(); i++) {
644                 metadataLabels.push_back(pieces[i]);
645             }
646             int count = metadataLabels.size();
647                         
648             vector<string> groups = m->getGroups();
649             
650             //read rest of file
651             while (!in.eof()) {
652                 
653                 if (m->control_pressed) { in.close(); return 0; }
654                 
655                 string group = "";
656                 in >> group; m->gobble(in);
657                 groupNames.push_back(group);
658                 
659                 string line = m->getline(in); m->gobble(in);
660                 vector<string> thisPieces = m->splitWhiteSpaceWithQuotes(line);
661         
662                 if (thisPieces.size() != count) { m->mothurOut("[ERROR]: expected " + toString(count) + " items of data for sample " + group + " read " + toString(thisPieces.size()) + ", quitting.\n"); }
663                 else {  if (m->inUsersGroups(group, groups)) { lines[group] = thisPieces; } }
664                 
665                 m->gobble(in);
666             }
667             in.close();
668             
669             map<string, vector<string> >::iterator it;
670             for (int i = 0; i < lookup.size(); i++) {
671                 
672                 if (m->control_pressed) { return 0; }
673                 
674                 it = lines.find(lookup[i]->getGroup());
675                 
676                 if (it == lines.end()) { m->mothurOut("[ERROR]: can't find metadata information for " + lookup[i]->getGroup() + ", quitting.\n"); m->control_pressed = true; }
677                 else {
678                     vector<string> values = it->second;
679                     
680                     string data = "{";
681                     for (int j = 0; j < metadataLabels.size()-1; j++) { 
682                         values[j] = m->removeQuotes(values[j]); 
683                         data += "\"" + metadataLabels[j] + "\":\"" + values[j] + "\", "; 
684                     }
685                     values[metadataLabels.size()-1] = m->removeQuotes(values[metadataLabels.size()-1]);
686                     data += "\"" + metadataLabels[metadataLabels.size()-1] + "\":\"" + values[metadataLabels.size()-1] + "\"}";
687                     sampleMetadata.push_back(data);
688                 }
689             }
690         }
691         
692         return 0;
693         
694     }
695         catch(exception& e) {
696                 m->errorOut(e, "MakeBiomCommand", "getSampleMetaData");
697                 exit(1);
698         }
699     
700 }
701
702 /**************************************************************************************************/
703 //returns {Bacteria, Bacteroidetes, ..} and scores is filled with {100, 98, ...} or {null, null, null}
704 vector<string> MakeBiomCommand::parseTax(string tax, vector<string>& scores) {
705         try {
706                 
707                 string taxon;
708         vector<string> taxs;
709                 
710                 while (tax.find_first_of(';') != -1) {
711                         
712                         if (m->control_pressed) { return taxs; }
713                         
714                         //get taxon
715                         taxon = tax.substr(0,tax.find_first_of(';'));
716             
717                         int pos = taxon.find_last_of('(');
718                         if (pos != -1) {
719                                 //is it a number?
720                                 int pos2 = taxon.find_last_of(')');
721                                 if (pos2 != -1) {
722                                         string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
723                                         if (m->isNumeric1(confidenceScore)) {
724                                                 taxon = taxon.substr(0, pos); //rip off confidence 
725                         scores.push_back(confidenceScore);
726                                         }else{ scores.push_back("null"); }
727                                 }
728                         }else{ scores.push_back("null"); }
729                         
730             //strip "" if they are there
731             pos = taxon.find("\"");
732             if (pos != string::npos) {
733                 string newTax = "";
734                 for (int k = 0; k < taxon.length(); k++) {
735                     if (taxon[k] != '\"') { newTax += taxon[k]; }
736                 }
737                 taxon = newTax;
738             }
739             
740             //look for bootstrap value
741                         taxs.push_back(taxon);
742             tax = tax.substr(tax.find_first_of(';')+1, tax.length());
743                 }
744                 
745                 return taxs;
746         }
747         catch(exception& e) {
748                 m->errorOut(e, "MakeBiomCommand", "parseTax");
749                 exit(1);
750         }
751 }
752
753 //**********************************************************************************************************************
754
755
756