]> git.donarmstrong.com Git - mothur.git/blob - makebiomcommand.cpp
added count file to get.oturep, pre.cluster, screen.seqs, tree.shared. made remove...
[mothur.git] / makebiomcommand.cpp
1 //
2 //  makebiomcommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 4/16/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "makebiomcommand.h"
10 #include "sharedrabundvector.h"
11 #include "inputdata.h"
12
13 //taken from http://biom-format.org/documentation/biom_format.html
14 /* Minimal Sparse 
15  {
16  "id":null,
17  "format": "Biological Observation Matrix 0.9.1",
18  "format_url": "http://biom-format.org",
19  "type": "OTU table",
20  "generated_by": "QIIME revision 1.4.0-dev",
21  "date": "2011-12-19T19:00:00",
22  "rows":[
23  {"id":"GG_OTU_1", "metadata":null},
24  {"id":"GG_OTU_2", "metadata":null},
25  {"id":"GG_OTU_3", "metadata":null},
26  {"id":"GG_OTU_4", "metadata":null},
27  {"id":"GG_OTU_5", "metadata":null}
28  ],
29  "columns": [
30  {"id":"Sample1", "metadata":null},
31  {"id":"Sample2", "metadata":null},
32  {"id":"Sample3", "metadata":null},
33  {"id":"Sample4", "metadata":null},
34  {"id":"Sample5", "metadata":null},
35  {"id":"Sample6", "metadata":null}
36  ],
37  "matrix_type": "sparse",
38  "matrix_element_type": "int",
39  "shape": [5, 6],
40  "data":[[0,2,1],
41  [1,0,5],
42  [1,1,1],
43  [1,3,2],
44  [1,4,3],
45  [1,5,1],
46  [2,2,1],
47  [2,3,4],
48  [2,4,2],
49  [3,0,2],
50  [3,1,1],
51  [3,2,1],
52  [3,5,1],
53  [4,1,1],
54  [4,2,1]
55  ]
56  }
57  */
58 /* Minimal dense
59  {
60  "id":null,
61  "format": "Biological Observation Matrix 0.9.1",
62  "format_url": "http://biom-format.org",
63  "type": "OTU table",
64  "generated_by": "QIIME revision 1.4.0-dev",
65  "date": "2011-12-19T19:00:00",
66  "rows":[
67  {"id":"GG_OTU_1", "metadata":null},
68  {"id":"GG_OTU_2", "metadata":null},
69  {"id":"GG_OTU_3", "metadata":null},
70  {"id":"GG_OTU_4", "metadata":null},
71  {"id":"GG_OTU_5", "metadata":null}
72  ],
73  "columns": [
74  {"id":"Sample1", "metadata":null},
75  {"id":"Sample2", "metadata":null},
76  {"id":"Sample3", "metadata":null},
77  {"id":"Sample4", "metadata":null},
78  {"id":"Sample5", "metadata":null},
79  {"id":"Sample6", "metadata":null}
80  ],
81  "matrix_type": "dense",
82  "matrix_element_type": "int",
83  "shape": [5,6],
84  "data":  [[0,0,1,0,0,0],
85  [5,1,0,2,3,1],
86  [0,0,1,4,2,0],
87  [2,1,1,0,0,1],
88  [0,1,1,0,0,0]]
89  }
90  */
91 //**********************************************************************************************************************
92 vector<string> MakeBiomCommand::setParameters(){        
93         try {
94                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pshared);
95         CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pcontaxonomy);
96                 CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups);
97                 CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel);
98                 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir);
99                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir);
100         CommandParameter pmatrixtype("matrixtype", "Multiple", "sparse-dense", "sparse", "", "", "",false,false); parameters.push_back(pmatrixtype);
101
102                 vector<string> myArray;
103                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
104                 return myArray;
105         }
106         catch(exception& e) {
107                 m->errorOut(e, "MakeBiomCommand", "setParameters");
108                 exit(1);
109         }
110 }
111 //**********************************************************************************************************************
112 string MakeBiomCommand::getHelpString(){        
113         try {
114                 string helpString = "";
115                 helpString += "The make.biom command parameters are shared, contaxonomy, groups, matrixtype and label.  shared is required, unless you have a valid current file.\n";
116                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
117                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
118                 helpString += "The matrixtype parameter allows you to select what type you would like to make. Choices are sparse and dense, default is sparse.\n";
119         helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance.  ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled.\n";
120                 helpString += "The make.biom command should be in the following format: make.biom(shared=yourShared, groups=yourGroups, label=yourLabels).\n";
121                 helpString += "Example make.biom(shared=abrecovery.an.shared, groups=A-B-C).\n";
122                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
123                 helpString += "The make.biom command outputs a .biom file.\n";
124                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
125                 return helpString;
126         }
127         catch(exception& e) {
128                 m->errorOut(e, "MakeBiomCommand", "getHelpString");
129                 exit(1);
130         }
131 }
132 //**********************************************************************************************************************
133 string MakeBiomCommand::getOutputFileNameTag(string type, string inputName=""){ 
134         try {
135         string outputFileName = "";
136                 map<string, vector<string> >::iterator it;
137         
138         //is this a type this command creates
139         it = outputTypes.find(type);
140         if (it == outputTypes.end()) {  m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); }
141         else {
142             if (type == "biom")             {   outputFileName =  "biom";       }
143             else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true;  }
144         }
145         return outputFileName;
146         }
147         catch(exception& e) {
148                 m->errorOut(e, "MakeBiomCommand", "getOutputFileNameTag");
149                 exit(1);
150         }
151 }
152
153 //**********************************************************************************************************************
154 MakeBiomCommand::MakeBiomCommand(){     
155         try {
156                 abort = true; calledHelp = true; 
157                 setParameters();
158                 vector<string> tempOutNames;
159                 outputTypes["biom"] = tempOutNames;
160         }
161         catch(exception& e) {
162                 m->errorOut(e, "MakeBiomCommand", "MakeBiomCommand");
163                 exit(1);
164         }
165 }
166 //**********************************************************************************************************************
167 MakeBiomCommand::MakeBiomCommand(string option) {
168         try {
169                 abort = false; calledHelp = false;   
170                 allLines = 1;
171         
172                 //allow user to run help
173                 if(option == "help") { help(); abort = true; calledHelp = true; }
174                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
175                 
176                 else {
177                         vector<string> myArray = setParameters();
178                         
179                         OptionParser parser(option);
180                         map<string,string> parameters = parser.getParameters();
181                         map<string,string>::iterator it;
182                         
183                         ValidParameters validParameter;
184                         
185                         //check to make sure all parameters are valid for command
186                         for (it = parameters.begin(); it != parameters.end(); it++) { 
187                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
188                         }
189
190                         //initialize outputTypes
191                         vector<string> tempOutNames;
192                         outputTypes["biom"] = tempOutNames;
193                         
194                         //if the user changes the input directory command factory will send this info to us in the output parameter 
195                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
196                         if (inputDir == "not found"){   inputDir = "";          }
197                         else {
198                                 string path;
199                                 it = parameters.find("shared");
200                                 //user has given a template file
201                                 if(it != parameters.end()){ 
202                                         path = m->hasPath(it->second);
203                                         //if the user has not given a path then, add inputdir. else leave path alone.
204                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
205                                 }
206                 
207                 it = parameters.find("contaxonomy");
208                                 //user has given a template file
209                                 if(it != parameters.end()){ 
210                                         path = m->hasPath(it->second);
211                                         //if the user has not given a path then, add inputdir. else leave path alone.
212                                         if (path == "") {       parameters["contaxonomy"] = inputDir + it->second;              }
213                                 }
214                         }
215             
216                         //get shared file
217                         sharedfile = validParameter.validFile(parameters, "shared", true);
218                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
219                         else if (sharedfile == "not found") { 
220                                 //if there is a current shared file, use it
221                                 sharedfile = m->getSharedFile(); 
222                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
223                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
224                         }else { m->setSharedFile(sharedfile); }
225                         
226                         
227                         //if the user changes the output directory command factory will send this info to us in the output parameter 
228                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
229             
230             contaxonomyfile = validParameter.validFile(parameters, "contaxonomy", true);
231                         if (contaxonomyfile == "not found") {  contaxonomyfile = "";  }
232                         else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; }
233
234             
235                         //check for optional parameter and set defaults
236                         // ...at some point should added some additional type checking...
237                         label = validParameter.validFile(parameters, "label", false);                   
238                         if (label == "not found") { label = ""; }
239                         else { 
240                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
241                                 else { allLines = 1;  }
242                         }
243                         
244                         groups = validParameter.validFile(parameters, "groups", false);                 
245                         if (groups == "not found") { groups = ""; }
246                         else { 
247                                 m->splitAtDash(groups, Groups);
248                                 m->setGroups(Groups);
249                         }
250                         
251             if ((contaxonomyfile != "") && (labels.size() > 1)) { m->mothurOut("[ERROR]: the contaxonomy parameter cannot be used with multiple labels."); m->mothurOutEndLine(); abort = true; }
252             
253                         format = validParameter.validFile(parameters, "matrixtype", false);                             if (format == "not found") { format = "sparse"; }
254                         
255                         if ((format != "sparse") && (format != "dense")) {
256                                 m->mothurOut(format + " is not a valid option for the matrixtype parameter. Options are sparse and dense."); m->mothurOutEndLine(); abort = true; 
257                         }
258                 }
259         
260         }
261         catch(exception& e) {
262                 m->errorOut(e, "MakeBiomCommand", "MakeBiomCommand");
263                 exit(1);
264         }
265 }
266 //**********************************************************************************************************************
267
268 int MakeBiomCommand::execute(){
269         try {
270         
271                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
272             
273                 InputData input(sharedfile, "sharedfile");
274                 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
275                 string lastLabel = lookup[0]->getLabel();
276         
277         //if user did not specify a label, then use first one
278         if ((contaxonomyfile != "") && (labels.size() == 0)) {
279             allLines = 0;
280             labels.insert(lastLabel);
281         }
282                 
283                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
284                 set<string> processedLabels;
285                 set<string> userLabels = labels;
286         
287                 //as long as you are not at the end of the file or done wih the lines you want
288                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
289                         
290                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
291             
292                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
293                 
294                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
295                                 getBiom(lookup);
296                                 
297                                 processedLabels.insert(lookup[0]->getLabel());
298                                 userLabels.erase(lookup[0]->getLabel());
299                         }
300                         
301                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
302                                 string saveLabel = lookup[0]->getLabel();
303                 
304                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
305                                 lookup = input.getSharedRAbundVectors(lastLabel);
306                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
307                                 
308                                 getBiom(lookup);
309                                 
310                                 processedLabels.insert(lookup[0]->getLabel());
311                                 userLabels.erase(lookup[0]->getLabel());
312                                 
313                                 //restore real lastlabel to save below
314                                 lookup[0]->setLabel(saveLabel);
315                         }
316                         
317                         lastLabel = lookup[0]->getLabel();
318             
319                         //prevent memory leak and get next set
320                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
321                         lookup = input.getSharedRAbundVectors();                                
322                 }
323                 
324         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }     
325         
326                 //output error messages about any remaining user labels
327                 set<string>::iterator it;
328                 bool needToRun = false;
329                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
330                         m->mothurOut("Your file does not include the label " + *it); 
331                         if (processedLabels.count(lastLabel) != 1) {
332                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
333                                 needToRun = true;
334                         }else {
335                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
336                         }
337                 }
338         
339                 //run last label if you need to
340                 if (needToRun == true)  {
341                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
342                         lookup = input.getSharedRAbundVectors(lastLabel);
343                         
344                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
345             getBiom(lookup);
346                         
347                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
348                 }
349                 
350         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }     
351                 
352         //set sabund file as new current sabundfile
353         string current = "";
354                 itTypes = outputTypes.find("biom");
355                 if (itTypes != outputTypes.end()) {
356                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setBiomFile(current); }
357                 }
358
359         
360                 m->mothurOutEndLine();
361                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
362                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
363                 m->mothurOutEndLine();
364                 
365                 return 0;
366         }
367         catch(exception& e) {
368                 m->errorOut(e, "MakeBiomCommand", "execute");
369                 exit(1);
370         }
371 }
372 //**********************************************************************************************************************
373 int MakeBiomCommand::getBiom(vector<SharedRAbundVector*>& lookup){
374         try {
375         
376         string outputFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + lookup[0]->getLabel() + "." + getOutputFileNameTag("biom");
377                 ofstream out;
378                 m->openOutputFile(outputFileName, out);
379                 outputNames.push_back(outputFileName); outputTypes["biom"].push_back(outputFileName);
380
381         string mothurString = "mothur" + toString(m->getVersion());
382         time_t rawtime;
383         struct tm * timeinfo;
384         time ( &rawtime );
385         timeinfo = localtime ( &rawtime );
386         string dateString = asctime (timeinfo);
387         int pos = dateString.find('\n');
388         if (pos != string::npos) { dateString = dateString.substr(0, pos);}
389         string spaces = "      ";
390         
391         //standard 
392         out << "{\n" + spaces + "\"id\":\"" + sharedfile + "-" + lookup[0]->getLabel() + "\",\n" + spaces + "\"format\": \"Biological Observation Matrix 0.9.1\",\n" + spaces + "\"format_url\": \"http://biom-format.org\",\n";
393         out << spaces + "\"type\": \"OTU table\",\n" + spaces + "\"generated_by\": \"" << mothurString << "\",\n" + spaces + "\"date\": \"" << dateString << "\",\n";
394         
395         vector<string> metadata = getMetaData(lookup);  
396         
397         if (m->control_pressed) {  out.close(); return 0; }
398         
399         //get row info
400         /*"rows":[
401                 {"id":"GG_OTU_1", "metadata":null},
402                 {"id":"GG_OTU_2", "metadata":null},
403                 {"id":"GG_OTU_3", "metadata":null},
404                 {"id":"GG_OTU_4", "metadata":null},
405                 {"id":"GG_OTU_5", "metadata":null}
406                 ],*/
407         out << spaces + "\"rows\":[\n";
408         string rowFront = spaces + spaces + "{\"id\":\"";
409         string rowBack = "\", \"metadata\":";
410         for (int i = 0; i < m->currentBinLabels.size()-1; i++) {
411             if (m->control_pressed) {  out.close(); return 0; }
412             out << rowFront << m->currentBinLabels[i] << rowBack << metadata[i] << "},\n";
413         }
414         out << rowFront << m->currentBinLabels[(m->currentBinLabels.size()-1)] << rowBack << metadata[(m->currentBinLabels.size()-1)] << "}\n" + spaces + "],\n";
415         
416         //get column info
417         /*"columns": [
418                     {"id":"Sample1", "metadata":null},
419                     {"id":"Sample2", "metadata":null},
420                     {"id":"Sample3", "metadata":null},
421                     {"id":"Sample4", "metadata":null},
422                     {"id":"Sample5", "metadata":null},
423                     {"id":"Sample6", "metadata":null}
424                     ],*/
425         
426         string colBack = "\", \"metadata\":null}";
427         out << spaces + "\"columns\":[\n";
428         for (int i = 0; i < lookup.size()-1; i++) {
429             if (m->control_pressed) {  out.close(); return 0; }
430             out << rowFront << lookup[i]->getGroup() << colBack << ",\n";
431         }
432         out << rowFront << lookup[(lookup.size()-1)]->getGroup() << colBack << "\n" + spaces + "],\n";
433         
434         out << spaces + "\"matrix_type\": \"" << format << "\",\n" + spaces + "\"matrix_element_type\": \"int\",\n";
435         out <<  spaces + "\"shape\": [" << m->currentBinLabels.size() << "," << lookup.size() << "],\n";
436         out << spaces + "\"data\":  [";
437         
438         vector<string> dataRows;
439         if (format == "sparse") {
440             /*"data":[[0,2,1],
441              [1,0,5],
442              [1,1,1],
443              [1,3,2],
444              [1,4,3],
445              [1,5,1],
446              [2,2,1],
447              [2,3,4],
448              [2,4,2],
449              [3,0,2],
450              [3,1,1],
451              [3,2,1],
452              [3,5,1],
453              [4,1,1],
454              [4,2,1]
455              ]*/
456             string output = "";
457             for (int i = 0; i < lookup[0]->getNumBins(); i++) {
458                 
459                 if (m->control_pressed) { out.close(); return 0; }
460                 
461                 for (int j = 0; j < lookup.size(); j++) {
462                     string binInfo = "[" + toString(i) + "," + toString(j) + "," + toString(lookup[j]->getAbundance(i)) + "]";
463                     //only print non zero values
464                     if (lookup[j]->getAbundance(i) != 0) { dataRows.push_back(binInfo); }
465                 }
466             }
467         }else {
468             
469             /* "matrix_type": "dense",
470              "matrix_element_type": "int",
471              "shape": [5,6],
472              "data":  [[0,0,1,0,0,0],
473              [5,1,0,2,3,1],
474              [0,0,1,4,2,0],
475              [2,1,1,0,0,1],
476              [0,1,1,0,0,0]]*/
477             
478             for (int i = 0; i < lookup[0]->getNumBins(); i++) {
479                 
480                 if (m->control_pressed) { out.close(); return 0; }
481                 
482                 string binInfo = "[";
483                 for (int j = 0; j < lookup.size()-1; j++) {
484                     binInfo += toString(lookup[j]->getAbundance(i)) + ",";
485                 }
486                 binInfo += toString(lookup[lookup.size()-1]->getAbundance(i)) + "]";
487                 dataRows.push_back(binInfo);
488             }
489         }
490         
491         for (int i = 0; i < dataRows.size()-1; i++) {
492             out << dataRows[i] << ",\n" + spaces  + spaces;
493         }
494         out << dataRows[dataRows.size()-1] << "]\n";
495         
496         out << "}\n";
497         out.close();
498         
499         return 0;
500     }
501         catch(exception& e) {
502                 m->errorOut(e, "MakeBiomCommand", "getBiom");
503                 exit(1);
504         }
505 }
506 //**********************************************************************************************************************
507 vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup){
508         try {
509         vector<string> metadata;
510         
511         if (contaxonomyfile == "") { for (int i = 0; i < lookup[0]->getNumBins(); i++) {  metadata.push_back("null");  } }
512         else {
513             
514             //read constaxonomy file storing in a map, otulabel -> taxonomy
515             //constaxonomy file will most likely contain more labels than the shared file, because sharedfile could have been subsampled.
516             ifstream in;
517             m->openInputFile(contaxonomyfile, in);
518             
519             //grab headers
520             m->getline(in); m->gobble(in);
521             
522             string otuLabel, tax;
523             int size;
524             vector<string> otuLabels;
525             vector<string> taxs;
526             while (!in.eof()) {
527                 
528                 if (m->control_pressed) { in.close(); return metadata; }
529                 
530                 in >> otuLabel >> size >> tax; m->gobble(in);
531                 
532                 otuLabels.push_back(otuLabel);
533                 taxs.push_back(tax);
534             }
535             in.close();
536             
537             //should the labels be Otu001 or PhyloType001
538             string firstBin = m->currentBinLabels[0];
539             string binTag = "Otu";
540             if ((firstBin.find("Otu")) == string::npos) { binTag = "PhyloType";  }
541             
542             //convert list file bin labels to shared file bin labels
543             //parse tax strings
544             //save in map
545             map<string, string> labelTaxMap;
546             string snumBins = toString(otuLabels.size());
547             for (int i = 0; i < otuLabels.size(); i++) {  
548                 
549                 if (m->control_pressed) { return metadata; }
550                 
551                 //if there is a bin label use it otherwise make one
552                 if (m->isContainingOnlyDigits(otuLabels[i])) {
553                     string binLabel = binTag;
554                     string sbinNumber = otuLabels[i];
555                     if (sbinNumber.length() < snumBins.length()) { 
556                         int diff = snumBins.length() - sbinNumber.length();
557                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
558                     }
559                     binLabel += sbinNumber;
560                     labelTaxMap[binLabel] = taxs[i];
561                 }else {  labelTaxMap[otuLabels[i]] = taxs[i]; }
562             }
563             
564             
565             //{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]}
566             
567             //traverse the binLabels forming the metadata strings and saving them
568             //make sure to sanity check
569             map<string, string>::iterator it;
570             for (int i = 0; i < m->currentBinLabels.size(); i++) {
571                 
572                 if (m->control_pressed) { return metadata; }
573                 
574                 it = labelTaxMap.find(m->currentBinLabels[i]);
575                 
576                 if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + m->currentBinLabels[i] + ".\n"); m->control_pressed = true; }
577                 else {
578                     vector<string> bootstrapValues;
579                     string data = "{\"taxonomy\":[";
580             
581                     vector<string> scores;
582                     vector<string> taxonomies = parseTax(it->second, scores);
583                     
584                     for (int j = 0; j < taxonomies.size()-1; j ++) { data += "\"" + taxonomies[j] + "\", "; }
585                     data += "\"" + taxonomies[taxonomies.size()-1] + "\"]";
586                     
587                     //add bootstrap values if available
588                     if (scores[0] != "null") {
589                         data += ", \"bootstrap\":[";
590                         
591                         for (int j = 0; j < scores.size()-1; j ++) { data += scores[j] + ", "; }
592                         data += scores[scores.size()-1] + "]";
593
594                     }
595                     data += "}";
596                     
597                     metadata.push_back(data);
598                 }
599             }
600         }
601         
602         return metadata;
603         
604     }
605         catch(exception& e) {
606                 m->errorOut(e, "MakeBiomCommand", "getMetadata");
607                 exit(1);
608         }
609
610 }
611 /**************************************************************************************************/
612 //returns {Bacteria, Bacteroidetes, ..} and scores is filled with {100, 98, ...} or {null, null, null}
613 vector<string> MakeBiomCommand::parseTax(string tax, vector<string>& scores) {
614         try {
615                 
616                 string taxon;
617         vector<string> taxs;
618                 
619                 while (tax.find_first_of(';') != -1) {
620                         
621                         if (m->control_pressed) { return taxs; }
622                         
623                         //get taxon
624                         taxon = tax.substr(0,tax.find_first_of(';'));
625             
626                         int pos = taxon.find_last_of('(');
627                         if (pos != -1) {
628                                 //is it a number?
629                                 int pos2 = taxon.find_last_of(')');
630                                 if (pos2 != -1) {
631                                         string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
632                                         if (m->isNumeric1(confidenceScore)) {
633                                                 taxon = taxon.substr(0, pos); //rip off confidence 
634                         scores.push_back(confidenceScore);
635                                         }else{ scores.push_back("null"); }
636                                 }
637                         }else{ scores.push_back("null"); }
638                         
639             //strip "" if they are there
640             pos = taxon.find("\"");
641             if (pos != string::npos) {
642                 string newTax = "";
643                 for (int k = 0; k < taxon.length(); k++) {
644                     if (taxon[k] != '\"') { newTax += taxon[k]; }
645                 }
646                 taxon = newTax;
647             }
648             
649             //look for bootstrap value
650                         taxs.push_back(taxon);
651             tax = tax.substr(tax.find_first_of(';')+1, tax.length());
652                 }
653                 
654                 return taxs;
655         }
656         catch(exception& e) {
657                 m->errorOut(e, "MakeBiomCommand", "parseTax");
658                 exit(1);
659         }
660 }
661
662 //**********************************************************************************************************************
663
664
665