]> git.donarmstrong.com Git - mothur.git/blob - makebiomcommand.cpp
changing command name classify.shared to classifyrf.shared
[mothur.git] / makebiomcommand.cpp
1 //
2 //  makebiomcommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 4/16/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "makebiomcommand.h"
10 #include "sharedrabundvector.h"
11 #include "inputdata.h"
12 #include "sharedutilities.h"
13
14 //taken from http://biom-format.org/documentation/biom_format.html
15 /* Minimal Sparse 
16  {
17  "id":null,
18  "format": "Biological Observation Matrix 0.9.1",
19  "format_url": "http://biom-format.org",
20  "type": "OTU table",
21  "generated_by": "QIIME revision 1.4.0-dev",
22  "date": "2011-12-19T19:00:00",
23  "rows":[
24  {"id":"GG_OTU_1", "metadata":null},
25  {"id":"GG_OTU_2", "metadata":null},
26  {"id":"GG_OTU_3", "metadata":null},
27  {"id":"GG_OTU_4", "metadata":null},
28  {"id":"GG_OTU_5", "metadata":null}
29  ],
30  "columns": [
31  {"id":"Sample1", "metadata":null},
32  {"id":"Sample2", "metadata":null},
33  {"id":"Sample3", "metadata":null},
34  {"id":"Sample4", "metadata":null},
35  {"id":"Sample5", "metadata":null},
36  {"id":"Sample6", "metadata":null}
37  ],
38  "matrix_type": "sparse",
39  "matrix_element_type": "int",
40  "shape": [5, 6],
41  "data":[[0,2,1],
42  [1,0,5],
43  [1,1,1],
44  [1,3,2],
45  [1,4,3],
46  [1,5,1],
47  [2,2,1],
48  [2,3,4],
49  [2,4,2],
50  [3,0,2],
51  [3,1,1],
52  [3,2,1],
53  [3,5,1],
54  [4,1,1],
55  [4,2,1]
56  ]
57  }
58  */
59 /* Minimal dense
60  {
61  "id":null,
62  "format": "Biological Observation Matrix 0.9.1",
63  "format_url": "http://biom-format.org",
64  "type": "OTU table",
65  "generated_by": "QIIME revision 1.4.0-dev",
66  "date": "2011-12-19T19:00:00",
67  "rows":[
68  {"id":"GG_OTU_1", "metadata":null},
69  {"id":"GG_OTU_2", "metadata":null},
70  {"id":"GG_OTU_3", "metadata":null},
71  {"id":"GG_OTU_4", "metadata":null},
72  {"id":"GG_OTU_5", "metadata":null}
73  ],
74  "columns": [
75  {"id":"Sample1", "metadata":null},
76  {"id":"Sample2", "metadata":null},
77  {"id":"Sample3", "metadata":null},
78  {"id":"Sample4", "metadata":null},
79  {"id":"Sample5", "metadata":null},
80  {"id":"Sample6", "metadata":null}
81  ],
82  "matrix_type": "dense",
83  "matrix_element_type": "int",
84  "shape": [5,6],
85  "data":  [[0,0,1,0,0,0],
86  [5,1,0,2,3,1],
87  [0,0,1,4,2,0],
88  [2,1,1,0,0,1],
89  [0,1,1,0,0,0]]
90  }
91  */
92 //**********************************************************************************************************************
93 vector<string> MakeBiomCommand::setParameters(){        
94         try {
95                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","biom",false,true,true); parameters.push_back(pshared);
96         CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pcontaxonomy);
97         CommandParameter pmetadata("metadata", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pmetadata);
98                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
99                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
100                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
101                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
102         CommandParameter pmatrixtype("matrixtype", "Multiple", "sparse-dense", "sparse", "", "", "","",false,false); parameters.push_back(pmatrixtype);
103
104                 vector<string> myArray;
105                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
106                 return myArray;
107         }
108         catch(exception& e) {
109                 m->errorOut(e, "MakeBiomCommand", "setParameters");
110                 exit(1);
111         }
112 }
113 //**********************************************************************************************************************
114 string MakeBiomCommand::getHelpString(){        
115         try {
116                 string helpString = "";
117                 helpString += "The make.biom command parameters are shared, contaxonomy, metadata, groups, matrixtype and label.  shared is required, unless you have a valid current file.\n";
118                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
119                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
120                 helpString += "The matrixtype parameter allows you to select what type you would like to make. Choices are sparse and dense, default is sparse.\n";
121         helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance.  ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled. It is used to assign taxonomy information to the metadata of rows.\n";
122         helpString += "The metadata parameter is used to provide experimental parameters to the columns.  Things like 'sample1 gut human_gut'. \n";
123                 helpString += "The make.biom command should be in the following format: make.biom(shared=yourShared, groups=yourGroups, label=yourLabels).\n";
124                 helpString += "Example make.biom(shared=abrecovery.an.shared, groups=A-B-C).\n";
125                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
126                 helpString += "The make.biom command outputs a .biom file.\n";
127                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
128                 return helpString;
129         }
130         catch(exception& e) {
131                 m->errorOut(e, "MakeBiomCommand", "getHelpString");
132                 exit(1);
133         }
134 }
135 //**********************************************************************************************************************
136 string MakeBiomCommand::getOutputPattern(string type) {
137     try {
138         string pattern = "";
139         
140         if (type == "biom") {  pattern = "[filename],[distance],biom"; } 
141         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
142         
143         return pattern;
144     }
145     catch(exception& e) {
146         m->errorOut(e, "MakeBiomCommand", "getOutputPattern");
147         exit(1);
148     }
149 }
150
151 //**********************************************************************************************************************
152 MakeBiomCommand::MakeBiomCommand(){     
153         try {
154                 abort = true; calledHelp = true; 
155                 setParameters();
156                 vector<string> tempOutNames;
157                 outputTypes["biom"] = tempOutNames;
158         }
159         catch(exception& e) {
160                 m->errorOut(e, "MakeBiomCommand", "MakeBiomCommand");
161                 exit(1);
162         }
163 }
164 //**********************************************************************************************************************
165 MakeBiomCommand::MakeBiomCommand(string option) {
166         try {
167                 abort = false; calledHelp = false;   
168                 allLines = 1;
169         
170                 //allow user to run help
171                 if(option == "help") { help(); abort = true; calledHelp = true; }
172                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
173                 
174                 else {
175                         vector<string> myArray = setParameters();
176                         
177                         OptionParser parser(option);
178                         map<string,string> parameters = parser.getParameters();
179                         map<string,string>::iterator it;
180                         
181                         ValidParameters validParameter;
182                         
183                         //check to make sure all parameters are valid for command
184                         for (it = parameters.begin(); it != parameters.end(); it++) { 
185                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
186                         }
187
188                         //initialize outputTypes
189                         vector<string> tempOutNames;
190                         outputTypes["biom"] = tempOutNames;
191                         
192                         //if the user changes the input directory command factory will send this info to us in the output parameter 
193                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
194                         if (inputDir == "not found"){   inputDir = "";          }
195                         else {
196                                 string path;
197                                 it = parameters.find("shared");
198                                 //user has given a template file
199                                 if(it != parameters.end()){ 
200                                         path = m->hasPath(it->second);
201                                         //if the user has not given a path then, add inputdir. else leave path alone.
202                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
203                                 }
204                 
205                 it = parameters.find("contaxonomy");
206                                 //user has given a template file
207                                 if(it != parameters.end()){ 
208                                         path = m->hasPath(it->second);
209                                         //if the user has not given a path then, add inputdir. else leave path alone.
210                                         if (path == "") {       parameters["contaxonomy"] = inputDir + it->second;              }
211                                 }
212                 
213                 it = parameters.find("metadata");
214                                 //user has given a template file
215                                 if(it != parameters.end()){ 
216                                         path = m->hasPath(it->second);
217                                         //if the user has not given a path then, add inputdir. else leave path alone.
218                                         if (path == "") {       parameters["metadata"] = inputDir + it->second;         }
219                                 }
220                         }
221             
222                         //get shared file
223                         sharedfile = validParameter.validFile(parameters, "shared", true);
224                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
225                         else if (sharedfile == "not found") { 
226                                 //if there is a current shared file, use it
227                                 sharedfile = m->getSharedFile(); 
228                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
229                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
230                         }else { m->setSharedFile(sharedfile); }
231                         
232                         
233                         //if the user changes the output directory command factory will send this info to us in the output parameter 
234                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
235             
236             contaxonomyfile = validParameter.validFile(parameters, "contaxonomy", true);
237                         if (contaxonomyfile == "not found") {  contaxonomyfile = "";  }
238                         else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; }
239
240             metadatafile = validParameter.validFile(parameters, "metadata", true);
241                         if (metadatafile == "not found") {  metadatafile = "";  }
242                         else if (metadatafile == "not open") { metadatafile = ""; abort = true; }
243             
244                         //check for optional parameter and set defaults
245                         // ...at some point should added some additional type checking...
246                         label = validParameter.validFile(parameters, "label", false);                   
247                         if (label == "not found") { label = ""; }
248                         else { 
249                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
250                                 else { allLines = 1;  }
251                         }
252                         
253                         groups = validParameter.validFile(parameters, "groups", false);                 
254                         if (groups == "not found") { groups = ""; }
255                         else { 
256                                 m->splitAtDash(groups, Groups);
257                                 m->setGroups(Groups);
258                         }
259                         
260             if ((contaxonomyfile != "") && (labels.size() > 1)) { m->mothurOut("[ERROR]: the contaxonomy parameter cannot be used with multiple labels."); m->mothurOutEndLine(); abort = true; }
261             
262                         format = validParameter.validFile(parameters, "matrixtype", false);                             if (format == "not found") { format = "sparse"; }
263                         
264                         if ((format != "sparse") && (format != "dense")) {
265                                 m->mothurOut(format + " is not a valid option for the matrixtype parameter. Options are sparse and dense."); m->mothurOutEndLine(); abort = true; 
266                         }
267                 }
268         
269         }
270         catch(exception& e) {
271                 m->errorOut(e, "MakeBiomCommand", "MakeBiomCommand");
272                 exit(1);
273         }
274 }
275 //**********************************************************************************************************************
276
277 int MakeBiomCommand::execute(){
278         try {
279         
280                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
281             
282                 InputData input(sharedfile, "sharedfile");
283                 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
284                 string lastLabel = lookup[0]->getLabel();
285         
286         //if user did not specify a label, then use first one
287         if ((contaxonomyfile != "") && (labels.size() == 0)) {
288             allLines = 0;
289             labels.insert(lastLabel);
290         }
291                 
292         getSampleMetaData(lookup);
293         
294                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
295                 set<string> processedLabels;
296                 set<string> userLabels = labels;
297         
298                 //as long as you are not at the end of the file or done wih the lines you want
299                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
300                         
301                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
302             
303                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
304                 
305                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
306                                 getBiom(lookup);
307                                 
308                                 processedLabels.insert(lookup[0]->getLabel());
309                                 userLabels.erase(lookup[0]->getLabel());
310                         }
311                         
312                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
313                                 string saveLabel = lookup[0]->getLabel();
314                 
315                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
316                                 lookup = input.getSharedRAbundVectors(lastLabel);
317                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
318                                 
319                                 getBiom(lookup);
320                                 
321                                 processedLabels.insert(lookup[0]->getLabel());
322                                 userLabels.erase(lookup[0]->getLabel());
323                                 
324                                 //restore real lastlabel to save below
325                                 lookup[0]->setLabel(saveLabel);
326                         }
327                         
328                         lastLabel = lookup[0]->getLabel();
329             
330                         //prevent memory leak and get next set
331                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
332                         lookup = input.getSharedRAbundVectors();                                
333                 }
334                 
335         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }     
336         
337                 //output error messages about any remaining user labels
338                 set<string>::iterator it;
339                 bool needToRun = false;
340                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
341                         m->mothurOut("Your file does not include the label " + *it); 
342                         if (processedLabels.count(lastLabel) != 1) {
343                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
344                                 needToRun = true;
345                         }else {
346                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
347                         }
348                 }
349         
350                 //run last label if you need to
351                 if (needToRun == true)  {
352                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
353                         lookup = input.getSharedRAbundVectors(lastLabel);
354                         
355                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
356             getBiom(lookup);
357                         
358                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
359                 }
360                 
361         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }     
362                 
363         //set sabund file as new current sabundfile
364         string current = "";
365                 itTypes = outputTypes.find("biom");
366                 if (itTypes != outputTypes.end()) {
367                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setBiomFile(current); }
368                 }
369
370         
371                 m->mothurOutEndLine();
372                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
373                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
374                 m->mothurOutEndLine();
375                 
376                 return 0;
377         }
378         catch(exception& e) {
379                 m->errorOut(e, "MakeBiomCommand", "execute");
380                 exit(1);
381         }
382 }
383 //**********************************************************************************************************************
384 int MakeBiomCommand::getBiom(vector<SharedRAbundVector*>& lookup){
385         try {
386         map<string, string> variables; 
387         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
388         variables["[distance]"] = lookup[0]->getLabel();
389         string outputFileName = getOutputFileName("biom",variables);
390                 ofstream out;
391                 m->openOutputFile(outputFileName, out);
392                 outputNames.push_back(outputFileName); outputTypes["biom"].push_back(outputFileName);
393
394         string mothurString = "mothur" + toString(m->getVersion());
395         time_t rawtime;
396         struct tm * timeinfo;
397         time ( &rawtime );
398         timeinfo = localtime ( &rawtime );
399         string dateString = asctime (timeinfo);
400         int pos = dateString.find('\n');
401         if (pos != string::npos) { dateString = dateString.substr(0, pos);}
402         string spaces = "      ";
403         
404         //standard 
405         out << "{\n" + spaces + "\"id\":\"" + sharedfile + "-" + lookup[0]->getLabel() + "\",\n" + spaces + "\"format\": \"Biological Observation Matrix 0.9.1\",\n" + spaces + "\"format_url\": \"http://biom-format.org\",\n";
406         out << spaces + "\"type\": \"OTU table\",\n" + spaces + "\"generated_by\": \"" << mothurString << "\",\n" + spaces + "\"date\": \"" << dateString << "\",\n";
407         
408         vector<string> metadata = getMetaData(lookup);  
409         
410         if (m->control_pressed) {  out.close(); return 0; }
411         
412         //get row info
413         /*"rows":[
414                 {"id":"GG_OTU_1", "metadata":null},
415                 {"id":"GG_OTU_2", "metadata":null},
416                 {"id":"GG_OTU_3", "metadata":null},
417                 {"id":"GG_OTU_4", "metadata":null},
418                 {"id":"GG_OTU_5", "metadata":null}
419                 ],*/
420         out << spaces + "\"rows\":[\n";
421         string rowFront = spaces + spaces + "{\"id\":\"";
422         string rowBack = "\", \"metadata\":";
423         for (int i = 0; i < m->currentBinLabels.size()-1; i++) {
424             if (m->control_pressed) {  out.close(); return 0; }
425             out << rowFront << m->currentBinLabels[i] << rowBack << metadata[i] << "},\n";
426         }
427         out << rowFront << m->currentBinLabels[(m->currentBinLabels.size()-1)] << rowBack << metadata[(m->currentBinLabels.size()-1)] << "}\n" + spaces + "],\n";
428         
429         //get column info
430         /*"columns": [
431                     {"id":"Sample1", "metadata":null},
432                     {"id":"Sample2", "metadata":null},
433                     {"id":"Sample3", "metadata":null},
434                     {"id":"Sample4", "metadata":null},
435                     {"id":"Sample5", "metadata":null},
436                     {"id":"Sample6", "metadata":null}
437                     ],*/
438         
439         string colBack = "\", \"metadata\":";
440         out << spaces + "\"columns\":[\n";
441         for (int i = 0; i < lookup.size()-1; i++) {
442             if (m->control_pressed) {  out.close(); return 0; }
443             out << rowFront << lookup[i]->getGroup() << colBack << sampleMetadata[i] << "},\n";
444         }
445         out << rowFront << lookup[(lookup.size()-1)]->getGroup() << colBack << sampleMetadata[lookup.size()-1] << "}\n" + spaces + "],\n";
446         
447         out << spaces + "\"matrix_type\": \"" << format << "\",\n" + spaces + "\"matrix_element_type\": \"int\",\n";
448         out <<  spaces + "\"shape\": [" << m->currentBinLabels.size() << "," << lookup.size() << "],\n";
449         out << spaces + "\"data\":  [";
450         
451         vector<string> dataRows;
452         if (format == "sparse") {
453             /*"data":[[0,2,1],
454              [1,0,5],
455              [1,1,1],
456              [1,3,2],
457              [1,4,3],
458              [1,5,1],
459              [2,2,1],
460              [2,3,4],
461              [2,4,2],
462              [3,0,2],
463              [3,1,1],
464              [3,2,1],
465              [3,5,1],
466              [4,1,1],
467              [4,2,1]
468              ]*/
469             string output = "";
470             for (int i = 0; i < lookup[0]->getNumBins(); i++) {
471                 
472                 if (m->control_pressed) { out.close(); return 0; }
473                 
474                 for (int j = 0; j < lookup.size(); j++) {
475                     string binInfo = "[" + toString(i) + "," + toString(j) + "," + toString(lookup[j]->getAbundance(i)) + "]";
476                     //only print non zero values
477                     if (lookup[j]->getAbundance(i) != 0) { dataRows.push_back(binInfo); }
478                 }
479             }
480         }else {
481             
482             /* "matrix_type": "dense",
483              "matrix_element_type": "int",
484              "shape": [5,6],
485              "data":  [[0,0,1,0,0,0],
486              [5,1,0,2,3,1],
487              [0,0,1,4,2,0],
488              [2,1,1,0,0,1],
489              [0,1,1,0,0,0]]*/
490             
491             for (int i = 0; i < lookup[0]->getNumBins(); i++) {
492                 
493                 if (m->control_pressed) { out.close(); return 0; }
494                 
495                 string binInfo = "[";
496                 for (int j = 0; j < lookup.size()-1; j++) {
497                     binInfo += toString(lookup[j]->getAbundance(i)) + ",";
498                 }
499                 binInfo += toString(lookup[lookup.size()-1]->getAbundance(i)) + "]";
500                 dataRows.push_back(binInfo);
501             }
502         }
503         
504         for (int i = 0; i < dataRows.size()-1; i++) {
505             out << dataRows[i] << ",\n" + spaces  + spaces;
506         }
507         out << dataRows[dataRows.size()-1] << "]\n";
508         
509         out << "}\n";
510         out.close();
511         
512         return 0;
513     }
514         catch(exception& e) {
515                 m->errorOut(e, "MakeBiomCommand", "getBiom");
516                 exit(1);
517         }
518 }
519 //**********************************************************************************************************************
520 vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup){
521         try {
522         vector<string> metadata;
523         
524         if (contaxonomyfile == "") { for (int i = 0; i < lookup[0]->getNumBins(); i++) {  metadata.push_back("null");  } }
525         else {
526             
527             //read constaxonomy file storing in a map, otulabel -> taxonomy
528             //constaxonomy file will most likely contain more labels than the shared file, because sharedfile could have been subsampled.
529             ifstream in;
530             m->openInputFile(contaxonomyfile, in);
531             
532             //grab headers
533             m->getline(in); m->gobble(in);
534             
535             string otuLabel, tax;
536             int size;
537             vector<string> otuLabels;
538             vector<string> taxs;
539             while (!in.eof()) {
540                 
541                 if (m->control_pressed) { in.close(); return metadata; }
542                 
543                 in >> otuLabel >> size >> tax; m->gobble(in);
544                 
545                 otuLabels.push_back(otuLabel);
546                 taxs.push_back(tax);
547             }
548             in.close();
549             
550             //should the labels be Otu001 or PhyloType001
551             string firstBin = m->currentBinLabels[0];
552             string binTag = "Otu";
553             if ((firstBin.find("Otu")) == string::npos) { binTag = "PhyloType";  }
554             
555             //convert list file bin labels to shared file bin labels
556             //parse tax strings
557             //save in map
558             map<string, string> labelTaxMap;
559             string snumBins = toString(otuLabels.size());
560             for (int i = 0; i < otuLabels.size(); i++) {  
561                 
562                 if (m->control_pressed) { return metadata; }
563                 
564                 //if there is a bin label use it otherwise make one
565                 if (m->isContainingOnlyDigits(otuLabels[i])) {
566                     string binLabel = binTag;
567                     string sbinNumber = otuLabels[i];
568                     if (sbinNumber.length() < snumBins.length()) { 
569                         int diff = snumBins.length() - sbinNumber.length();
570                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
571                     }
572                     binLabel += sbinNumber;
573                     labelTaxMap[binLabel] = taxs[i];
574                 }else {  labelTaxMap[otuLabels[i]] = taxs[i]; }
575             }
576             
577             
578             //{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]}
579             
580             //traverse the binLabels forming the metadata strings and saving them
581             //make sure to sanity check
582             map<string, string>::iterator it;
583             for (int i = 0; i < m->currentBinLabels.size(); i++) {
584                 
585                 if (m->control_pressed) { return metadata; }
586                 
587                 it = labelTaxMap.find(m->currentBinLabels[i]);
588                 
589                 if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + m->currentBinLabels[i] + ".\n"); m->control_pressed = true; }
590                 else {
591                     vector<string> bootstrapValues;
592                     string data = "{\"taxonomy\":[";
593             
594                     vector<string> scores;
595                     vector<string> taxonomies = parseTax(it->second, scores);
596                     
597                     for (int j = 0; j < taxonomies.size()-1; j ++) { data += "\"" + taxonomies[j] + "\", "; }
598                     data += "\"" + taxonomies[taxonomies.size()-1] + "\"]";
599                     
600                     //add bootstrap values if available
601                     if (scores[0] != "null") {
602                         data += ", \"bootstrap\":[";
603                         
604                         for (int j = 0; j < scores.size()-1; j ++) { data += scores[j] + ", "; }
605                         data += scores[scores.size()-1] + "]";
606
607                     }
608                     data += "}";
609                     
610                     metadata.push_back(data);
611                 }
612             }
613         }
614         
615         return metadata;
616         
617     }
618         catch(exception& e) {
619                 m->errorOut(e, "MakeBiomCommand", "getMetadata");
620                 exit(1);
621         }
622
623 }
624 //**********************************************************************************************************************
625 int MakeBiomCommand::getSampleMetaData(vector<SharedRAbundVector*>& lookup){
626         try {
627         
628         if (metadatafile == "") { for (int i = 0; i < lookup.size(); i++) {  sampleMetadata.push_back("null");  } }
629         else {
630             ifstream in;
631             m->openInputFile(metadatafile, in);
632             
633             vector<string> groupNames, metadataLabels;
634             map<string, vector<string> > lines;
635             
636             string headerLine = m->getline(in); m->gobble(in);
637             vector<string> pieces = m->splitWhiteSpace(headerLine);
638             
639             //save names of columns you are reading
640             for (int i = 1; i < pieces.size(); i++) {
641                 metadataLabels.push_back(pieces[i]);
642             }
643             int count = metadataLabels.size();
644                         
645             vector<string> groups = m->getGroups();
646             
647             //read rest of file
648             while (!in.eof()) {
649                 
650                 if (m->control_pressed) { in.close(); return 0; }
651                 
652                 string group = "";
653                 in >> group; m->gobble(in);
654                 groupNames.push_back(group);
655                 
656                 string line = m->getline(in); m->gobble(in);
657                 vector<string> thisPieces = m->splitWhiteSpaceWithQuotes(line);
658         
659                 if (thisPieces.size() != count) { m->mothurOut("[ERROR]: expected " + toString(count) + " items of data for sample " + group + " read " + toString(thisPieces.size()) + ", quitting.\n"); }
660                 else {  if (m->inUsersGroups(group, groups)) { lines[group] = thisPieces; } }
661                 
662                 m->gobble(in);
663             }
664             in.close();
665             
666             map<string, vector<string> >::iterator it;
667             for (int i = 0; i < lookup.size(); i++) {
668                 
669                 if (m->control_pressed) { return 0; }
670                 
671                 it = lines.find(lookup[i]->getGroup());
672                 
673                 if (it == lines.end()) { m->mothurOut("[ERROR]: can't find metadata information for " + lookup[i]->getGroup() + ", quitting.\n"); m->control_pressed = true; }
674                 else {
675                     vector<string> values = it->second;
676                     
677                     string data = "{";
678                     for (int j = 0; j < metadataLabels.size()-1; j++) { 
679                         values[j] = m->removeQuotes(values[j]); 
680                         data += "\"" + metadataLabels[j] + "\":\"" + values[j] + "\", "; 
681                     }
682                     values[metadataLabels.size()-1] = m->removeQuotes(values[metadataLabels.size()-1]);
683                     data += "\"" + metadataLabels[metadataLabels.size()-1] + "\":\"" + values[metadataLabels.size()-1] + "\"}";
684                     sampleMetadata.push_back(data);
685                 }
686             }
687         }
688         
689         return 0;
690         
691     }
692         catch(exception& e) {
693                 m->errorOut(e, "MakeBiomCommand", "getSampleMetaData");
694                 exit(1);
695         }
696     
697 }
698
699 /**************************************************************************************************/
700 //returns {Bacteria, Bacteroidetes, ..} and scores is filled with {100, 98, ...} or {null, null, null}
701 vector<string> MakeBiomCommand::parseTax(string tax, vector<string>& scores) {
702         try {
703                 
704                 string taxon;
705         vector<string> taxs;
706                 
707                 while (tax.find_first_of(';') != -1) {
708                         
709                         if (m->control_pressed) { return taxs; }
710                         
711                         //get taxon
712                         taxon = tax.substr(0,tax.find_first_of(';'));
713             
714                         int pos = taxon.find_last_of('(');
715                         if (pos != -1) {
716                                 //is it a number?
717                                 int pos2 = taxon.find_last_of(')');
718                                 if (pos2 != -1) {
719                                         string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
720                                         if (m->isNumeric1(confidenceScore)) {
721                                                 taxon = taxon.substr(0, pos); //rip off confidence 
722                         scores.push_back(confidenceScore);
723                                         }else{ scores.push_back("null"); }
724                                 }
725                         }else{ scores.push_back("null"); }
726                         
727             //strip "" if they are there
728             pos = taxon.find("\"");
729             if (pos != string::npos) {
730                 string newTax = "";
731                 for (int k = 0; k < taxon.length(); k++) {
732                     if (taxon[k] != '\"') { newTax += taxon[k]; }
733                 }
734                 taxon = newTax;
735             }
736             
737             //look for bootstrap value
738                         taxs.push_back(taxon);
739             tax = tax.substr(tax.find_first_of(';')+1, tax.length());
740                 }
741                 
742                 return taxs;
743         }
744         catch(exception& e) {
745                 m->errorOut(e, "MakeBiomCommand", "parseTax");
746                 exit(1);
747         }
748 }
749
750 //**********************************************************************************************************************
751
752
753