]> git.donarmstrong.com Git - mothur.git/blob - makebiomcommand.cpp
a1018833e6268b6d0fe04aa25d4598f37df9cb4d
[mothur.git] / makebiomcommand.cpp
1 //
2 //  makebiomcommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 4/16/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "makebiomcommand.h"
10 #include "sharedrabundvector.h"
11 #include "inputdata.h"
12 #include "sharedutilities.h"
13
14 //taken from http://biom-format.org/documentation/biom_format.html
15 /* Minimal Sparse 
16  {
17  "id":null,
18  "format": "Biological Observation Matrix 0.9.1",
19  "format_url": "http://biom-format.org",
20  "type": "OTU table",
21  "generated_by": "QIIME revision 1.4.0-dev",
22  "date": "2011-12-19T19:00:00",
23  "rows":[
24  {"id":"GG_OTU_1", "metadata":null},
25  {"id":"GG_OTU_2", "metadata":null},
26  {"id":"GG_OTU_3", "metadata":null},
27  {"id":"GG_OTU_4", "metadata":null},
28  {"id":"GG_OTU_5", "metadata":null}
29  ],
30  "columns": [
31  {"id":"Sample1", "metadata":null},
32  {"id":"Sample2", "metadata":null},
33  {"id":"Sample3", "metadata":null},
34  {"id":"Sample4", "metadata":null},
35  {"id":"Sample5", "metadata":null},
36  {"id":"Sample6", "metadata":null}
37  ],
38  "matrix_type": "sparse",
39  "matrix_element_type": "int",
40  "shape": [5, 6],
41  "data":[[0,2,1],
42  [1,0,5],
43  [1,1,1],
44  [1,3,2],
45  [1,4,3],
46  [1,5,1],
47  [2,2,1],
48  [2,3,4],
49  [2,4,2],
50  [3,0,2],
51  [3,1,1],
52  [3,2,1],
53  [3,5,1],
54  [4,1,1],
55  [4,2,1]
56  ]
57  }
58  */
59 /* Minimal dense
60  {
61  "id":null,
62  "format": "Biological Observation Matrix 0.9.1",
63  "format_url": "http://biom-format.org",
64  "type": "OTU table",
65  "generated_by": "QIIME revision 1.4.0-dev",
66  "date": "2011-12-19T19:00:00",
67  "rows":[
68  {"id":"GG_OTU_1", "metadata":null},
69  {"id":"GG_OTU_2", "metadata":null},
70  {"id":"GG_OTU_3", "metadata":null},
71  {"id":"GG_OTU_4", "metadata":null},
72  {"id":"GG_OTU_5", "metadata":null}
73  ],
74  "columns": [
75  {"id":"Sample1", "metadata":null},
76  {"id":"Sample2", "metadata":null},
77  {"id":"Sample3", "metadata":null},
78  {"id":"Sample4", "metadata":null},
79  {"id":"Sample5", "metadata":null},
80  {"id":"Sample6", "metadata":null}
81  ],
82  "matrix_type": "dense",
83  "matrix_element_type": "int",
84  "shape": [5,6],
85  "data":  [[0,0,1,0,0,0],
86  [5,1,0,2,3,1],
87  [0,0,1,4,2,0],
88  [2,1,1,0,0,1],
89  [0,1,1,0,0,0]]
90  }
91  */
92 //**********************************************************************************************************************
93 vector<string> MakeBiomCommand::setParameters(){        
94         try {
95                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","biom",false,true,true); parameters.push_back(pshared);
96         CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pcontaxonomy);
97         CommandParameter pmetadata("metadata", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pmetadata);
98                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
99                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
100                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
101                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
102         CommandParameter pmatrixtype("matrixtype", "Multiple", "sparse-dense", "sparse", "", "", "","",false,false); parameters.push_back(pmatrixtype);
103
104                 vector<string> myArray;
105                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
106                 return myArray;
107         }
108         catch(exception& e) {
109                 m->errorOut(e, "MakeBiomCommand", "setParameters");
110                 exit(1);
111         }
112 }
113 //**********************************************************************************************************************
114 string MakeBiomCommand::getHelpString(){        
115         try {
116                 string helpString = "";
117                 helpString += "The make.biom command parameters are shared, contaxonomy, metadata, groups, matrixtype and label.  shared is required, unless you have a valid current file.\n";
118                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
119                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
120                 helpString += "The matrixtype parameter allows you to select what type you would like to make. Choices are sparse and dense, default is sparse.\n";
121         helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance.  ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled. It is used to assign taxonomy information to the metadata of rows.\n";
122         helpString += "The metadata parameter is used to provide experimental parameters to the columns.  Things like 'sample1 gut human_gut'. \n";
123                 helpString += "The make.biom command should be in the following format: make.biom(shared=yourShared, groups=yourGroups, label=yourLabels).\n";
124                 helpString += "Example make.biom(shared=abrecovery.an.shared, groups=A-B-C).\n";
125                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
126                 helpString += "The make.biom command outputs a .biom file.\n";
127                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
128                 return helpString;
129         }
130         catch(exception& e) {
131                 m->errorOut(e, "MakeBiomCommand", "getHelpString");
132                 exit(1);
133         }
134 }
135 //**********************************************************************************************************************
136 string MakeBiomCommand::getOutputPattern(string type) {
137     try {
138         string pattern = "";
139         
140         if (type == "biom") {  pattern = "[filename],[distance],biom"; } 
141         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
142         
143         return pattern;
144     }
145     catch(exception& e) {
146         m->errorOut(e, "MakeBiomCommand", "getOutputPattern");
147         exit(1);
148     }
149 }
150
151 //**********************************************************************************************************************
152 MakeBiomCommand::MakeBiomCommand(){     
153         try {
154                 abort = true; calledHelp = true; 
155                 setParameters();
156                 vector<string> tempOutNames;
157                 outputTypes["biom"] = tempOutNames;
158         }
159         catch(exception& e) {
160                 m->errorOut(e, "MakeBiomCommand", "MakeBiomCommand");
161                 exit(1);
162         }
163 }
164 //**********************************************************************************************************************
165 MakeBiomCommand::MakeBiomCommand(string option) {
166         try {
167                 abort = false; calledHelp = false;   
168                 allLines = 1;
169         
170                 //allow user to run help
171                 if(option == "help") { help(); abort = true; calledHelp = true; }
172                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
173                 
174                 else {
175                         vector<string> myArray = setParameters();
176                         
177                         OptionParser parser(option);
178                         map<string,string> parameters = parser.getParameters();
179                         map<string,string>::iterator it;
180                         
181                         ValidParameters validParameter;
182                         
183                         //check to make sure all parameters are valid for command
184                         for (it = parameters.begin(); it != parameters.end(); it++) { 
185                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
186                         }
187
188                         //initialize outputTypes
189                         vector<string> tempOutNames;
190                         outputTypes["biom"] = tempOutNames;
191                         
192                         //if the user changes the input directory command factory will send this info to us in the output parameter 
193                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
194                         if (inputDir == "not found"){   inputDir = "";          }
195                         else {
196                                 string path;
197                                 it = parameters.find("shared");
198                                 //user has given a template file
199                                 if(it != parameters.end()){ 
200                                         path = m->hasPath(it->second);
201                                         //if the user has not given a path then, add inputdir. else leave path alone.
202                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
203                                 }
204                 
205                 it = parameters.find("contaxonomy");
206                                 //user has given a template file
207                                 if(it != parameters.end()){ 
208                                         path = m->hasPath(it->second);
209                                         //if the user has not given a path then, add inputdir. else leave path alone.
210                                         if (path == "") {       parameters["contaxonomy"] = inputDir + it->second;              }
211                                 }
212                 
213                 it = parameters.find("metadata");
214                                 //user has given a template file
215                                 if(it != parameters.end()){ 
216                                         path = m->hasPath(it->second);
217                                         //if the user has not given a path then, add inputdir. else leave path alone.
218                                         if (path == "") {       parameters["metadata"] = inputDir + it->second;         }
219                                 }
220                         }
221             
222                         //get shared file
223                         sharedfile = validParameter.validFile(parameters, "shared", true);
224                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
225                         else if (sharedfile == "not found") { 
226                                 //if there is a current shared file, use it
227                                 sharedfile = m->getSharedFile(); 
228                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
229                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
230                         }else { m->setSharedFile(sharedfile); }
231                         
232                         
233                         //if the user changes the output directory command factory will send this info to us in the output parameter 
234                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
235             
236             contaxonomyfile = validParameter.validFile(parameters, "contaxonomy", true);
237                         if (contaxonomyfile == "not found") {  contaxonomyfile = "";  }
238                         else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; }
239
240             metadatafile = validParameter.validFile(parameters, "metadata", true);
241                         if (metadatafile == "not found") {  metadatafile = "";  }
242                         else if (metadatafile == "not open") { metadatafile = ""; abort = true; }
243             
244                         //check for optional parameter and set defaults
245                         // ...at some point should added some additional type checking...
246                         label = validParameter.validFile(parameters, "label", false);                   
247                         if (label == "not found") { label = ""; }
248                         else { 
249                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
250                                 else { allLines = 1;  }
251                         }
252                         
253                         groups = validParameter.validFile(parameters, "groups", false);                 
254                         if (groups == "not found") { groups = ""; }
255                         else { 
256                                 m->splitAtDash(groups, Groups);
257                                 m->setGroups(Groups);
258                         }
259                         
260             if ((contaxonomyfile != "") && (labels.size() > 1)) { m->mothurOut("[ERROR]: the contaxonomy parameter cannot be used with multiple labels."); m->mothurOutEndLine(); abort = true; }
261             
262                         format = validParameter.validFile(parameters, "matrixtype", false);                             if (format == "not found") { format = "sparse"; }
263                         
264                         if ((format != "sparse") && (format != "dense")) {
265                                 m->mothurOut(format + " is not a valid option for the matrixtype parameter. Options are sparse and dense."); m->mothurOutEndLine(); abort = true; 
266                         }
267                 }
268         
269         }
270         catch(exception& e) {
271                 m->errorOut(e, "MakeBiomCommand", "MakeBiomCommand");
272                 exit(1);
273         }
274 }
275 //**********************************************************************************************************************
276
277 int MakeBiomCommand::execute(){
278         try {
279         
280                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
281             
282                 InputData input(sharedfile, "sharedfile");
283                 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
284                 string lastLabel = lookup[0]->getLabel();
285         
286         //if user did not specify a label, then use first one
287         if ((contaxonomyfile != "") && (labels.size() == 0)) {
288             allLines = 0;
289             labels.insert(lastLabel);
290         }
291                 
292         getSampleMetaData(lookup);
293         
294                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
295                 set<string> processedLabels;
296                 set<string> userLabels = labels;
297         
298                 //as long as you are not at the end of the file or done wih the lines you want
299                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
300                         
301                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
302             
303                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
304                 
305                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
306                                 getBiom(lookup);
307                                 
308                                 processedLabels.insert(lookup[0]->getLabel());
309                                 userLabels.erase(lookup[0]->getLabel());
310                         }
311                         
312                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
313                                 string saveLabel = lookup[0]->getLabel();
314                 
315                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
316                                 lookup = input.getSharedRAbundVectors(lastLabel);
317                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
318                                 
319                                 getBiom(lookup);
320                                 
321                                 processedLabels.insert(lookup[0]->getLabel());
322                                 userLabels.erase(lookup[0]->getLabel());
323                                 
324                                 //restore real lastlabel to save below
325                                 lookup[0]->setLabel(saveLabel);
326                         }
327                         
328                         lastLabel = lookup[0]->getLabel();
329             
330                         //prevent memory leak and get next set
331                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
332                         lookup = input.getSharedRAbundVectors();                                
333                 }
334                 
335         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }     
336         
337                 //output error messages about any remaining user labels
338                 set<string>::iterator it;
339                 bool needToRun = false;
340                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
341                         m->mothurOut("Your file does not include the label " + *it); 
342                         if (processedLabels.count(lastLabel) != 1) {
343                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
344                                 needToRun = true;
345                         }else {
346                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
347                         }
348                 }
349         
350                 //run last label if you need to
351                 if (needToRun == true)  {
352                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
353                         lookup = input.getSharedRAbundVectors(lastLabel);
354                         
355                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
356             getBiom(lookup);
357                         
358                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
359                 }
360                 
361         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }     
362                 
363         //set sabund file as new current sabundfile
364         string current = "";
365                 itTypes = outputTypes.find("biom");
366                 if (itTypes != outputTypes.end()) {
367                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setBiomFile(current); }
368                 }
369
370         
371                 m->mothurOutEndLine();
372                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
373                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
374                 m->mothurOutEndLine();
375                 
376                 return 0;
377         }
378         catch(exception& e) {
379                 m->errorOut(e, "MakeBiomCommand", "execute");
380                 exit(1);
381         }
382 }
383 //**********************************************************************************************************************
384 int MakeBiomCommand::getBiom(vector<SharedRAbundVector*>& lookup){
385         try {
386         map<string, string> variables; 
387         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
388         variables["[distance]"] = lookup[0]->getLabel();
389         string outputFileName = getOutputFileName("biom",variables);
390                 ofstream out;
391                 m->openOutputFile(outputFileName, out);
392                 outputNames.push_back(outputFileName); outputTypes["biom"].push_back(outputFileName);
393
394         string mothurString = "mothur" + toString(m->getVersion());
395         time_t rawtime;
396         struct tm * timeinfo;
397         time ( &rawtime );
398         timeinfo = localtime ( &rawtime );
399         string dateString = asctime (timeinfo);
400         int pos = dateString.find('\n');
401         if (pos != string::npos) { dateString = dateString.substr(0, pos);}
402         string spaces = "      ";
403         
404         //standard 
405         out << "{\n" + spaces + "\"id\":\"" + sharedfile + "-" + lookup[0]->getLabel() + "\",\n" + spaces + "\"format\": \"Biological Observation Matrix 0.9.1\",\n" + spaces + "\"format_url\": \"http://biom-format.org\",\n";
406         out << spaces + "\"type\": \"OTU table\",\n" + spaces + "\"generated_by\": \"" << mothurString << "\",\n" + spaces + "\"date\": \"" << dateString << "\",\n";
407         
408         int numBins = lookup[0]->getNumBins();
409         vector<string> metadata = getMetaData(lookup);  
410         
411         if (m->control_pressed) {  out.close(); return 0; }
412         
413         //get row info
414         /*"rows":[
415                 {"id":"GG_OTU_1", "metadata":null},
416                 {"id":"GG_OTU_2", "metadata":null},
417                 {"id":"GG_OTU_3", "metadata":null},
418                 {"id":"GG_OTU_4", "metadata":null},
419                 {"id":"GG_OTU_5", "metadata":null}
420                 ],*/
421         out << spaces + "\"rows\":[\n";
422         string rowFront = spaces + spaces + "{\"id\":\"";
423         string rowBack = "\", \"metadata\":";
424         for (int i = 0; i < numBins-1; i++) {
425             if (m->control_pressed) {  out.close(); return 0; }
426             out << rowFront << m->currentBinLabels[i] << rowBack << metadata[i] << "},\n";
427         }
428         out << rowFront << m->currentBinLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n";
429         
430         //get column info
431         /*"columns": [
432                     {"id":"Sample1", "metadata":null},
433                     {"id":"Sample2", "metadata":null},
434                     {"id":"Sample3", "metadata":null},
435                     {"id":"Sample4", "metadata":null},
436                     {"id":"Sample5", "metadata":null},
437                     {"id":"Sample6", "metadata":null}
438                     ],*/
439         
440         string colBack = "\", \"metadata\":";
441         out << spaces + "\"columns\":[\n";
442         for (int i = 0; i < lookup.size()-1; i++) {
443             if (m->control_pressed) {  out.close(); return 0; }
444             out << rowFront << lookup[i]->getGroup() << colBack << sampleMetadata[i] << "},\n";
445         }
446         out << rowFront << lookup[(lookup.size()-1)]->getGroup() << colBack << sampleMetadata[lookup.size()-1] << "}\n" + spaces + "],\n";
447         
448         out << spaces + "\"matrix_type\": \"" << format << "\",\n" + spaces + "\"matrix_element_type\": \"int\",\n";
449         out <<  spaces + "\"shape\": [" << m->currentBinLabels.size() << "," << lookup.size() << "],\n";
450         out << spaces + "\"data\":  [";
451         
452         vector<string> dataRows;
453         if (format == "sparse") {
454             /*"data":[[0,2,1],
455              [1,0,5],
456              [1,1,1],
457              [1,3,2],
458              [1,4,3],
459              [1,5,1],
460              [2,2,1],
461              [2,3,4],
462              [2,4,2],
463              [3,0,2],
464              [3,1,1],
465              [3,2,1],
466              [3,5,1],
467              [4,1,1],
468              [4,2,1]
469              ]*/
470             string output = "";
471             for (int i = 0; i < lookup[0]->getNumBins(); i++) {
472                 
473                 if (m->control_pressed) { out.close(); return 0; }
474                 
475                 for (int j = 0; j < lookup.size(); j++) {
476                     string binInfo = "[" + toString(i) + "," + toString(j) + "," + toString(lookup[j]->getAbundance(i)) + "]";
477                     //only print non zero values
478                     if (lookup[j]->getAbundance(i) != 0) { dataRows.push_back(binInfo); }
479                 }
480             }
481         }else {
482             
483             /* "matrix_type": "dense",
484              "matrix_element_type": "int",
485              "shape": [5,6],
486              "data":  [[0,0,1,0,0,0],
487              [5,1,0,2,3,1],
488              [0,0,1,4,2,0],
489              [2,1,1,0,0,1],
490              [0,1,1,0,0,0]]*/
491             
492             for (int i = 0; i < lookup[0]->getNumBins(); i++) {
493                 
494                 if (m->control_pressed) { out.close(); return 0; }
495                 
496                 string binInfo = "[";
497                 for (int j = 0; j < lookup.size()-1; j++) {
498                     binInfo += toString(lookup[j]->getAbundance(i)) + ",";
499                 }
500                 binInfo += toString(lookup[lookup.size()-1]->getAbundance(i)) + "]";
501                 dataRows.push_back(binInfo);
502             }
503         }
504         
505         for (int i = 0; i < dataRows.size()-1; i++) {
506             out << dataRows[i] << ",\n" + spaces  + spaces;
507         }
508         out << dataRows[dataRows.size()-1] << "]\n";
509         
510         out << "}\n";
511         out.close();
512         
513         return 0;
514     }
515         catch(exception& e) {
516                 m->errorOut(e, "MakeBiomCommand", "getBiom");
517                 exit(1);
518         }
519 }
520 //**********************************************************************************************************************
521 vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup){
522         try {
523         vector<string> metadata;
524         
525         if (contaxonomyfile == "") { for (int i = 0; i < lookup[0]->getNumBins(); i++) {  metadata.push_back("null");  } }
526         else {
527             
528             //read constaxonomy file storing in a map, otulabel -> taxonomy
529             //constaxonomy file will most likely contain more labels than the shared file, because sharedfile could have been subsampled.
530             ifstream in;
531             m->openInputFile(contaxonomyfile, in);
532             
533             //grab headers
534             m->getline(in); m->gobble(in);
535             
536             string otuLabel, tax;
537             int size;
538             vector<string> otuLabels;
539             vector<string> taxs;
540             while (!in.eof()) {
541                 
542                 if (m->control_pressed) { in.close(); return metadata; }
543                 
544                 in >> otuLabel >> size >> tax; m->gobble(in);
545                 
546                 otuLabels.push_back(otuLabel);
547                 taxs.push_back(tax);
548             }
549             in.close();
550             
551             //should the labels be Otu001 or PhyloType001
552             string firstBin = m->currentBinLabels[0];
553             string binTag = "Otu";
554             if ((firstBin.find("Otu")) == string::npos) { binTag = "PhyloType";  }
555             
556             //convert list file bin labels to shared file bin labels
557             //parse tax strings
558             //save in map
559             map<string, string> labelTaxMap;
560             string snumBins = toString(otuLabels.size());
561             for (int i = 0; i < otuLabels.size(); i++) {  
562                 
563                 if (m->control_pressed) { return metadata; }
564                 
565                 //if there is a bin label use it otherwise make one
566                 if (m->isContainingOnlyDigits(otuLabels[i])) {
567                     string binLabel = binTag;
568                     string sbinNumber = otuLabels[i];
569                     if (sbinNumber.length() < snumBins.length()) { 
570                         int diff = snumBins.length() - sbinNumber.length();
571                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
572                     }
573                     binLabel += sbinNumber;
574                     labelTaxMap[binLabel] = taxs[i];
575                 }else {  labelTaxMap[otuLabels[i]] = taxs[i]; }
576             }
577             
578             
579             //{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]}
580             
581             //traverse the binLabels forming the metadata strings and saving them
582             //make sure to sanity check
583             map<string, string>::iterator it;
584             for (int i = 0; i < m->currentBinLabels.size(); i++) {
585                 
586                 if (m->control_pressed) { return metadata; }
587                 
588                 it = labelTaxMap.find(m->currentBinLabels[i]);
589                 
590                 if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + m->currentBinLabels[i] + ".\n"); m->control_pressed = true; }
591                 else {
592                     vector<string> bootstrapValues;
593                     string data = "{\"taxonomy\":[";
594             
595                     vector<string> scores;
596                     vector<string> taxonomies = parseTax(it->second, scores);
597                     
598                     for (int j = 0; j < taxonomies.size()-1; j ++) { data += "\"" + taxonomies[j] + "\", "; }
599                     data += "\"" + taxonomies[taxonomies.size()-1] + "\"]";
600                     
601                     //add bootstrap values if available
602                     if (scores[0] != "null") {
603                         data += ", \"bootstrap\":[";
604                         
605                         for (int j = 0; j < scores.size()-1; j ++) { data += scores[j] + ", "; }
606                         data += scores[scores.size()-1] + "]";
607
608                     }
609                     data += "}";
610                     
611                     metadata.push_back(data);
612                 }
613             }
614         }
615         
616         return metadata;
617         
618     }
619         catch(exception& e) {
620                 m->errorOut(e, "MakeBiomCommand", "getMetadata");
621                 exit(1);
622         }
623
624 }
625 //**********************************************************************************************************************
626 int MakeBiomCommand::getSampleMetaData(vector<SharedRAbundVector*>& lookup){
627         try {
628         sampleMetadata.clear();
629         if (metadatafile == "") {  for (int i = 0; i < lookup.size(); i++) {  sampleMetadata.push_back("null");  } }
630         else {
631             ifstream in;
632             m->openInputFile(metadatafile, in);
633             
634             vector<string> groupNames, metadataLabels;
635             map<string, vector<string> > lines;
636             
637             string headerLine = m->getline(in); m->gobble(in);
638             vector<string> pieces = m->splitWhiteSpace(headerLine);
639             
640             //save names of columns you are reading
641             for (int i = 1; i < pieces.size(); i++) {
642                 metadataLabels.push_back(pieces[i]);
643             }
644             int count = metadataLabels.size();
645                         
646             vector<string> groups = m->getGroups();
647             
648             //read rest of file
649             while (!in.eof()) {
650                 
651                 if (m->control_pressed) { in.close(); return 0; }
652                 
653                 string group = "";
654                 in >> group; m->gobble(in);
655                 groupNames.push_back(group);
656                 
657                 string line = m->getline(in); m->gobble(in);
658                 vector<string> thisPieces = m->splitWhiteSpaceWithQuotes(line);
659         
660                 if (thisPieces.size() != count) { m->mothurOut("[ERROR]: expected " + toString(count) + " items of data for sample " + group + " read " + toString(thisPieces.size()) + ", quitting.\n"); }
661                 else {  if (m->inUsersGroups(group, groups)) { lines[group] = thisPieces; } }
662                 
663                 m->gobble(in);
664             }
665             in.close();
666             
667             map<string, vector<string> >::iterator it;
668             for (int i = 0; i < lookup.size(); i++) {
669                 
670                 if (m->control_pressed) { return 0; }
671                 
672                 it = lines.find(lookup[i]->getGroup());
673                 
674                 if (it == lines.end()) { m->mothurOut("[ERROR]: can't find metadata information for " + lookup[i]->getGroup() + ", quitting.\n"); m->control_pressed = true; }
675                 else {
676                     vector<string> values = it->second;
677                     
678                     string data = "{";
679                     for (int j = 0; j < metadataLabels.size()-1; j++) { 
680                         values[j] = m->removeQuotes(values[j]); 
681                         data += "\"" + metadataLabels[j] + "\":\"" + values[j] + "\", "; 
682                     }
683                     values[metadataLabels.size()-1] = m->removeQuotes(values[metadataLabels.size()-1]);
684                     data += "\"" + metadataLabels[metadataLabels.size()-1] + "\":\"" + values[metadataLabels.size()-1] + "\"}";
685                     sampleMetadata.push_back(data);
686                 }
687             }
688         }
689         
690         return 0;
691         
692     }
693         catch(exception& e) {
694                 m->errorOut(e, "MakeBiomCommand", "getSampleMetaData");
695                 exit(1);
696         }
697     
698 }
699
700 /**************************************************************************************************/
701 //returns {Bacteria, Bacteroidetes, ..} and scores is filled with {100, 98, ...} or {null, null, null}
702 vector<string> MakeBiomCommand::parseTax(string tax, vector<string>& scores) {
703         try {
704                 
705                 string taxon;
706         vector<string> taxs;
707                 
708                 while (tax.find_first_of(';') != -1) {
709                         
710                         if (m->control_pressed) { return taxs; }
711                         
712                         //get taxon
713                         taxon = tax.substr(0,tax.find_first_of(';'));
714             
715                         int pos = taxon.find_last_of('(');
716                         if (pos != -1) {
717                                 //is it a number?
718                                 int pos2 = taxon.find_last_of(')');
719                                 if (pos2 != -1) {
720                                         string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
721                                         if (m->isNumeric1(confidenceScore)) {
722                                                 taxon = taxon.substr(0, pos); //rip off confidence 
723                         scores.push_back(confidenceScore);
724                                         }else{ scores.push_back("null"); }
725                                 }
726                         }else{ scores.push_back("null"); }
727                         
728             //strip "" if they are there
729             pos = taxon.find("\"");
730             if (pos != string::npos) {
731                 string newTax = "";
732                 for (int k = 0; k < taxon.length(); k++) {
733                     if (taxon[k] != '\"') { newTax += taxon[k]; }
734                 }
735                 taxon = newTax;
736             }
737             
738             //look for bootstrap value
739                         taxs.push_back(taxon);
740             tax = tax.substr(tax.find_first_of(';')+1, tax.length());
741                 }
742                 
743                 return taxs;
744         }
745         catch(exception& e) {
746                 m->errorOut(e, "MakeBiomCommand", "parseTax");
747                 exit(1);
748         }
749 }
750
751 //**********************************************************************************************************************
752
753
754