]> git.donarmstrong.com Git - mothur.git/blob - makebiomcommand.cpp
adding labels to list file.
[mothur.git] / makebiomcommand.cpp
1 //
2 //  makebiomcommand.cpp
3 //  Mothur
4 //
5 //  Created by Sarah Westcott on 4/16/12.
6 //  Copyright (c) 2012 Schloss Lab. All rights reserved.
7 //
8
9 #include "makebiomcommand.h"
10 #include "sharedrabundvector.h"
11 #include "inputdata.h"
12 #include "sharedutilities.h"
13
14 //taken from http://biom-format.org/documentation/biom_format.html
15 /* Minimal Sparse 
16  {
17  "id":null,
18  "format": "Biological Observation Matrix 0.9.1",
19  "format_url": "http://biom-format.org",
20  "type": "OTU table",
21  "generated_by": "QIIME revision 1.4.0-dev",
22  "date": "2011-12-19T19:00:00",
23  "rows":[
24  {"id":"GG_OTU_1", "metadata":null},
25  {"id":"GG_OTU_2", "metadata":null},
26  {"id":"GG_OTU_3", "metadata":null},
27  {"id":"GG_OTU_4", "metadata":null},
28  {"id":"GG_OTU_5", "metadata":null}
29  ],
30  "columns": [
31  {"id":"Sample1", "metadata":null},
32  {"id":"Sample2", "metadata":null},
33  {"id":"Sample3", "metadata":null},
34  {"id":"Sample4", "metadata":null},
35  {"id":"Sample5", "metadata":null},
36  {"id":"Sample6", "metadata":null}
37  ],
38  "matrix_type": "sparse",
39  "matrix_element_type": "int",
40  "shape": [5, 6],
41  "data":[[0,2,1],
42  [1,0,5],
43  [1,1,1],
44  [1,3,2],
45  [1,4,3],
46  [1,5,1],
47  [2,2,1],
48  [2,3,4],
49  [2,4,2],
50  [3,0,2],
51  [3,1,1],
52  [3,2,1],
53  [3,5,1],
54  [4,1,1],
55  [4,2,1]
56  ]
57  }
58  */
59 /* Minimal dense
60  {
61  "id":null,
62  "format": "Biological Observation Matrix 0.9.1",
63  "format_url": "http://biom-format.org",
64  "type": "OTU table",
65  "generated_by": "QIIME revision 1.4.0-dev",
66  "date": "2011-12-19T19:00:00",
67  "rows":[
68  {"id":"GG_OTU_1", "metadata":null},
69  {"id":"GG_OTU_2", "metadata":null},
70  {"id":"GG_OTU_3", "metadata":null},
71  {"id":"GG_OTU_4", "metadata":null},
72  {"id":"GG_OTU_5", "metadata":null}
73  ],
74  "columns": [
75  {"id":"Sample1", "metadata":null},
76  {"id":"Sample2", "metadata":null},
77  {"id":"Sample3", "metadata":null},
78  {"id":"Sample4", "metadata":null},
79  {"id":"Sample5", "metadata":null},
80  {"id":"Sample6", "metadata":null}
81  ],
82  "matrix_type": "dense",
83  "matrix_element_type": "int",
84  "shape": [5,6],
85  "data":  [[0,0,1,0,0,0],
86  [5,1,0,2,3,1],
87  [0,0,1,4,2,0],
88  [2,1,1,0,0,1],
89  [0,1,1,0,0,0]]
90  }
91  */
92 //**********************************************************************************************************************
93 vector<string> MakeBiomCommand::setParameters(){        
94         try {
95                 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","biom",false,true,true); parameters.push_back(pshared);
96         CommandParameter pcontaxonomy("constaxonomy", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pcontaxonomy);
97         //CommandParameter preference("referencetax", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(preference);
98         CommandParameter pmetadata("metadata", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pmetadata);
99                 CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
100                 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
101         //CommandParameter ppicrust("picrust", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(ppicrust);
102         CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
103                 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
104         CommandParameter pmatrixtype("matrixtype", "Multiple", "sparse-dense", "sparse", "", "", "","",false,false); parameters.push_back(pmatrixtype);
105
106                 vector<string> myArray;
107                 for (int i = 0; i < parameters.size(); i++) {   myArray.push_back(parameters[i].name);          }
108                 return myArray;
109         }
110         catch(exception& e) {
111                 m->errorOut(e, "MakeBiomCommand", "setParameters");
112                 exit(1);
113         }
114 }
115 //**********************************************************************************************************************
116 string MakeBiomCommand::getHelpString(){        
117         try {
118                 string helpString = "";
119                 helpString += "The make.biom command parameters are shared, contaxonomy, metadata, groups, matrixtype and label.  shared is required, unless you have a valid current file.\n"; //, picrust and referencetax
120                 helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included. The group names are separated by dashes.\n";
121                 helpString += "The label parameter allows you to select what distance levels you would like, and are also separated by dashes.\n";
122                 helpString += "The matrixtype parameter allows you to select what type you would like to make. Choices are sparse and dense, default is sparse.\n";
123         helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile). Be SURE that the you are the constaxonomy file distance matches the shared file distance.  ie, for *.0.03.cons.taxonomy set label=0.03. Mothur is smart enough to handle shared files that have been subsampled. It is used to assign taxonomy information to the metadata of rows.\n";
124         helpString += "The metadata parameter is used to provide experimental parameters to the columns.  Things like 'sample1 gut human_gut'. \n";
125         //helpString += "The picrust parameter is used to indicate the biom file is for input to picrust.  NOTE: Picrust requires a greengenes taxonomy. \n";
126         //helpString += "The referencetax parameter is used with the picrust parameter.  Picrust requires the name of the reference taxonomy sequence to be in the biom file. \n";
127                 helpString += "The make.biom command should be in the following format: make.biom(shared=yourShared, groups=yourGroups, label=yourLabels).\n";
128                 helpString += "Example make.biom(shared=abrecovery.an.shared, groups=A-B-C).\n";
129                 helpString += "The default value for groups is all the groups in your groupfile, and all labels in your inputfile will be used.\n";
130                 helpString += "The make.biom command outputs a .biom file.\n";
131                 helpString += "Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n";
132                 return helpString;
133         }
134         catch(exception& e) {
135                 m->errorOut(e, "MakeBiomCommand", "getHelpString");
136                 exit(1);
137         }
138 }
139 //**********************************************************************************************************************
140 string MakeBiomCommand::getOutputPattern(string type) {
141     try {
142         string pattern = "";
143         
144         if (type == "biom") {  pattern = "[filename],[distance],biom"; } 
145         else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true;  }
146         
147         return pattern;
148     }
149     catch(exception& e) {
150         m->errorOut(e, "MakeBiomCommand", "getOutputPattern");
151         exit(1);
152     }
153 }
154
155 //**********************************************************************************************************************
156 MakeBiomCommand::MakeBiomCommand(){     
157         try {
158                 abort = true; calledHelp = true; 
159                 setParameters();
160                 vector<string> tempOutNames;
161                 outputTypes["biom"] = tempOutNames;
162         }
163         catch(exception& e) {
164                 m->errorOut(e, "MakeBiomCommand", "MakeBiomCommand");
165                 exit(1);
166         }
167 }
168 //**********************************************************************************************************************
169 MakeBiomCommand::MakeBiomCommand(string option) {
170         try {
171                 abort = false; calledHelp = false;   
172                 allLines = 1;
173         
174                 //allow user to run help
175                 if(option == "help") { help(); abort = true; calledHelp = true; }
176                 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
177                 
178                 else {
179                         vector<string> myArray = setParameters();
180                         
181                         OptionParser parser(option);
182                         map<string,string> parameters = parser.getParameters();
183                         map<string,string>::iterator it;
184                         
185                         ValidParameters validParameter;
186                         
187                         //check to make sure all parameters are valid for command
188                         for (it = parameters.begin(); it != parameters.end(); it++) { 
189                                 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) {  abort = true;  }
190                         }
191
192                         //initialize outputTypes
193                         vector<string> tempOutNames;
194                         outputTypes["biom"] = tempOutNames;
195                         
196                         //if the user changes the input directory command factory will send this info to us in the output parameter 
197                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
198                         if (inputDir == "not found"){   inputDir = "";          }
199                         else {
200                                 string path;
201                                 it = parameters.find("shared");
202                                 //user has given a template file
203                                 if(it != parameters.end()){ 
204                                         path = m->hasPath(it->second);
205                                         //if the user has not given a path then, add inputdir. else leave path alone.
206                                         if (path == "") {       parameters["shared"] = inputDir + it->second;           }
207                                 }
208                 
209                 it = parameters.find("constaxonomy");
210                                 //user has given a template file
211                                 if(it != parameters.end()){ 
212                                         path = m->hasPath(it->second);
213                                         //if the user has not given a path then, add inputdir. else leave path alone.
214                                         if (path == "") {       parameters["constaxonomy"] = inputDir + it->second;             }
215                                 }
216                 
217                 it = parameters.find("referencetax");
218                                 //user has given a template file
219                                 if(it != parameters.end()){
220                                         path = m->hasPath(it->second);
221                                         //if the user has not given a path then, add inputdir. else leave path alone.
222                                         if (path == "") {       parameters["referencetax"] = inputDir + it->second;             }
223                                 }
224                 
225                 it = parameters.find("metadata");
226                                 //user has given a template file
227                                 if(it != parameters.end()){ 
228                                         path = m->hasPath(it->second);
229                                         //if the user has not given a path then, add inputdir. else leave path alone.
230                                         if (path == "") {       parameters["metadata"] = inputDir + it->second;         }
231                                 }
232                         }
233             
234                         //get shared file
235                         sharedfile = validParameter.validFile(parameters, "shared", true);
236                         if (sharedfile == "not open") { sharedfile = ""; abort = true; }        
237                         else if (sharedfile == "not found") { 
238                                 //if there is a current shared file, use it
239                                 sharedfile = m->getSharedFile(); 
240                                 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
241                                 else {  m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
242                         }else { m->setSharedFile(sharedfile); }
243                         
244                         
245                         //if the user changes the output directory command factory will send this info to us in the output parameter 
246                         outputDir = validParameter.validFile(parameters, "outputdir", false);           if (outputDir == "not found"){  outputDir = m->hasPath(sharedfile);             }
247             
248             contaxonomyfile = validParameter.validFile(parameters, "constaxonomy", true);
249                         if (contaxonomyfile == "not found") {  contaxonomyfile = "";  }
250                         else if (contaxonomyfile == "not open") { contaxonomyfile = ""; abort = true; }
251             
252             //referenceTax = validParameter.validFile(parameters, "referencetax", true);
253                         //if (referenceTax == "not found") {  referenceTax = "";  }
254                         //else if (referenceTax == "not open") { referenceTax = ""; abort = true; }
255
256             metadatafile = validParameter.validFile(parameters, "metadata", true);
257                         if (metadatafile == "not found") {  metadatafile = "";  }
258                         else if (metadatafile == "not open") { metadatafile = ""; abort = true; }
259             
260                         //check for optional parameter and set defaults
261                         // ...at some point should added some additional type checking...
262                         label = validParameter.validFile(parameters, "label", false);                   
263                         if (label == "not found") { label = ""; }
264                         else { 
265                                 if(label != "all") {  m->splitAtDash(label, labels);  allLines = 0;  }
266                                 else { allLines = 1;  }
267                         }
268             
269             //string temp = validParameter.validFile(parameters, "picrust", false);                     if (temp == "not found"){       temp = "f";                             }
270                         //picrust = m->isTrue(temp);
271             //if (picrust && ((contaxonomyfile == "") || (referenceTax == ""))) {
272                 //m->mothurOut("[ERROR]: the picrust parameter requires a consensus taxonomy with greengenes taxonomy the reference."); m->mothurOutEndLine(); abort = true;
273            //}
274             picrust=false;
275                         
276                         groups = validParameter.validFile(parameters, "groups", false);                 
277                         if (groups == "not found") { groups = ""; }
278                         else { 
279                                 m->splitAtDash(groups, Groups);
280                                 m->setGroups(Groups);
281                         }
282                         
283             if ((contaxonomyfile != "") && (labels.size() > 1)) { m->mothurOut("[ERROR]: the contaxonomy parameter cannot be used with multiple labels."); m->mothurOutEndLine(); abort = true; }
284             
285                         format = validParameter.validFile(parameters, "matrixtype", false);                             if (format == "not found") { format = "sparse"; }
286                         
287                         if ((format != "sparse") && (format != "dense")) {
288                                 m->mothurOut(format + " is not a valid option for the matrixtype parameter. Options are sparse and dense."); m->mothurOutEndLine(); abort = true; 
289                         }
290                 }
291         
292         }
293         catch(exception& e) {
294                 m->errorOut(e, "MakeBiomCommand", "MakeBiomCommand");
295                 exit(1);
296         }
297 }
298 //**********************************************************************************************************************
299
300 int MakeBiomCommand::execute(){
301         try {
302         
303                 if (abort == true) { if (calledHelp) { return 0; }  return 2;   }
304             
305                 InputData input(sharedfile, "sharedfile");
306                 vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
307                 string lastLabel = lookup[0]->getLabel();
308         
309         //if user did not specify a label, then use first one
310         if ((contaxonomyfile != "") && (labels.size() == 0)) {
311             allLines = 0;
312             labels.insert(lastLabel);
313         }
314                 
315         getSampleMetaData(lookup);
316         
317                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
318                 set<string> processedLabels;
319                 set<string> userLabels = labels;
320         
321                 //as long as you are not at the end of the file or done wih the lines you want
322                 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
323                         
324                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); } for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  return 0; }
325             
326                         if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){                  
327                 
328                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
329                                 getBiom(lookup);
330                                 
331                                 processedLabels.insert(lookup[0]->getLabel());
332                                 userLabels.erase(lookup[0]->getLabel());
333                         }
334                         
335                         if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
336                                 string saveLabel = lookup[0]->getLabel();
337                 
338                                 for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }  
339                                 lookup = input.getSharedRAbundVectors(lastLabel);
340                                 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
341                                 
342                                 getBiom(lookup);
343                                 
344                                 processedLabels.insert(lookup[0]->getLabel());
345                                 userLabels.erase(lookup[0]->getLabel());
346                                 
347                                 //restore real lastlabel to save below
348                                 lookup[0]->setLabel(saveLabel);
349                         }
350                         
351                         lastLabel = lookup[0]->getLabel();
352             
353                         //prevent memory leak and get next set
354                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i]; lookup[i] = NULL; }
355                         lookup = input.getSharedRAbundVectors();                                
356                 }
357                 
358         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }     
359         
360                 //output error messages about any remaining user labels
361                 set<string>::iterator it;
362                 bool needToRun = false;
363                 for (it = userLabels.begin(); it != userLabels.end(); it++) {  
364                         m->mothurOut("Your file does not include the label " + *it); 
365                         if (processedLabels.count(lastLabel) != 1) {
366                                 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
367                                 needToRun = true;
368                         }else {
369                                 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
370                         }
371                 }
372         
373                 //run last label if you need to
374                 if (needToRun == true)  {
375                         for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }  
376                         lookup = input.getSharedRAbundVectors(lastLabel);
377                         
378                         m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
379             getBiom(lookup);
380                         
381                         for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  }
382                 }
383                 
384         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        m->mothurRemove(outputNames[i]); }  return 0; }     
385                 
386         //set sabund file as new current sabundfile
387         string current = "";
388                 itTypes = outputTypes.find("biom");
389                 if (itTypes != outputTypes.end()) {
390                         if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setBiomFile(current); }
391                 }
392
393         
394                 m->mothurOutEndLine();
395                 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
396                 for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
397                 m->mothurOutEndLine();
398                 
399                 return 0;
400         }
401         catch(exception& e) {
402                 m->errorOut(e, "MakeBiomCommand", "execute");
403                 exit(1);
404         }
405 }
406 //**********************************************************************************************************************
407 int MakeBiomCommand::getBiom(vector<SharedRAbundVector*>& lookup){
408         try {
409         map<string, string> variables; 
410         variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
411         variables["[distance]"] = lookup[0]->getLabel();
412         string outputFileName = getOutputFileName("biom",variables);
413                 ofstream out;
414                 m->openOutputFile(outputFileName, out);
415                 outputNames.push_back(outputFileName); outputTypes["biom"].push_back(outputFileName);
416
417         string mothurString = "mothur" + toString(m->getVersion());
418         time_t rawtime;
419         struct tm * timeinfo;
420         time ( &rawtime );
421         timeinfo = localtime ( &rawtime );
422         string dateString = asctime (timeinfo);
423         int pos = dateString.find('\n');
424         if (pos != string::npos) { dateString = dateString.substr(0, pos);}
425         string spaces = "      ";
426         
427         //standard 
428         out << "{\n" + spaces + "\"id\":\"" + sharedfile + "-" + lookup[0]->getLabel() + "\",\n" + spaces + "\"format\": \"Biological Observation Matrix 0.9.1\",\n" + spaces + "\"format_url\": \"http://biom-format.org\",\n";
429         out << spaces + "\"type\": \"OTU table\",\n" + spaces + "\"generated_by\": \"" << mothurString << "\",\n" + spaces + "\"date\": \"" << dateString << "\",\n";
430         
431         int numBins = lookup[0]->getNumBins();
432         vector<string> picrustLabels;
433         vector<string> metadata = getMetaData(lookup, picrustLabels);
434         
435         if (m->control_pressed) {  out.close(); return 0; }
436         
437         //get row info
438         /*"rows":[
439                 {"id":"GG_OTU_1", "metadata":null},
440                 {"id":"GG_OTU_2", "metadata":null},
441                 {"id":"GG_OTU_3", "metadata":null},
442                 {"id":"GG_OTU_4", "metadata":null},
443                 {"id":"GG_OTU_5", "metadata":null}
444                 ],*/
445         out << spaces + "\"rows\":[\n";
446         string rowFront = spaces + spaces + "{\"id\":\"";
447         string rowBack = "\", \"metadata\":";
448         for (int i = 0; i < numBins-1; i++) {
449             if (m->control_pressed) {  out.close(); return 0; }
450             if (!picrust) { out << rowFront << m->currentSharedBinLabels[i] << rowBack << metadata[i] << "},\n"; }
451             else {  out << rowFront << picrustLabels[i] << rowBack << metadata[i] << "},\n"; }
452         }
453         if (!picrust) {  out << rowFront << m->currentSharedBinLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n"; }
454         else {  out << rowFront << picrustLabels[(numBins-1)] << rowBack << metadata[(numBins-1)] << "}\n" + spaces + "],\n"; }
455         //get column info
456         /*"columns": [
457                     {"id":"Sample1", "metadata":null},
458                     {"id":"Sample2", "metadata":null},
459                     {"id":"Sample3", "metadata":null},
460                     {"id":"Sample4", "metadata":null},
461                     {"id":"Sample5", "metadata":null},
462                     {"id":"Sample6", "metadata":null}
463                     ],*/
464         
465         string colBack = "\", \"metadata\":";
466         out << spaces + "\"columns\":[\n";
467         for (int i = 0; i < lookup.size()-1; i++) {
468             if (m->control_pressed) {  out.close(); return 0; }
469             out << rowFront << lookup[i]->getGroup() << colBack << sampleMetadata[i] << "},\n";
470         }
471         out << rowFront << lookup[(lookup.size()-1)]->getGroup() << colBack << sampleMetadata[lookup.size()-1] << "}\n" + spaces + "],\n";
472         
473         out << spaces + "\"matrix_type\": \"" << format << "\",\n" + spaces + "\"matrix_element_type\": \"int\",\n";
474         out <<  spaces + "\"shape\": [" << numBins << "," << lookup.size() << "],\n";
475         out << spaces + "\"data\":  [";
476         
477         vector<string> dataRows;
478         if (format == "sparse") {
479             /*"data":[[0,2,1],
480              [1,0,5],
481              [1,1,1],
482              [1,3,2],
483              [1,4,3],
484              [1,5,1],
485              [2,2,1],
486              [2,3,4],
487              [2,4,2],
488              [3,0,2],
489              [3,1,1],
490              [3,2,1],
491              [3,5,1],
492              [4,1,1],
493              [4,2,1]
494              ]*/
495             string output = "";
496             for (int i = 0; i < lookup[0]->getNumBins(); i++) {
497                 
498                 if (m->control_pressed) { out.close(); return 0; }
499                 
500                 for (int j = 0; j < lookup.size(); j++) {
501                     string binInfo = "[" + toString(i) + "," + toString(j) + "," + toString(lookup[j]->getAbundance(i)) + "]";
502                     //only print non zero values
503                     if (lookup[j]->getAbundance(i) != 0) { dataRows.push_back(binInfo); }
504                 }
505             }
506         }else {
507             
508             /* "matrix_type": "dense",
509              "matrix_element_type": "int",
510              "shape": [5,6],
511              "data":  [[0,0,1,0,0,0],
512              [5,1,0,2,3,1],
513              [0,0,1,4,2,0],
514              [2,1,1,0,0,1],
515              [0,1,1,0,0,0]]*/
516             
517             for (int i = 0; i < lookup[0]->getNumBins(); i++) {
518                 
519                 if (m->control_pressed) { out.close(); return 0; }
520                 
521                 string binInfo = "[";
522                 for (int j = 0; j < lookup.size()-1; j++) {
523                     binInfo += toString(lookup[j]->getAbundance(i)) + ",";
524                 }
525                 binInfo += toString(lookup[lookup.size()-1]->getAbundance(i)) + "]";
526                 dataRows.push_back(binInfo);
527             }
528         }
529         
530         for (int i = 0; i < dataRows.size()-1; i++) {
531             out << dataRows[i] << ",\n" + spaces  + spaces;
532         }
533         out << dataRows[dataRows.size()-1] << "]\n";
534         
535         out << "}\n";
536         out.close();
537         
538         return 0;
539     }
540         catch(exception& e) {
541                 m->errorOut(e, "MakeBiomCommand", "getBiom");
542                 exit(1);
543         }
544 }
545 //**********************************************************************************************************************
546 vector<string> MakeBiomCommand::getMetaData(vector<SharedRAbundVector*>& lookup, vector<string>& picrustLabels){
547         try {
548         vector<string> metadata;
549         
550         if (contaxonomyfile == "") { for (int i = 0; i < lookup[0]->getNumBins(); i++) {  metadata.push_back("null");  } }
551         else {
552             
553             //read constaxonomy file storing in a map, otulabel -> taxonomy
554             //constaxonomy file will most likely contain more labels than the shared file, because sharedfile could have been subsampled.
555             ifstream in;
556             m->openInputFile(contaxonomyfile, in);
557             
558             //grab headers
559             m->getline(in); m->gobble(in);
560             
561             string otuLabel, tax;
562             int size;
563             vector<string> otuLabels;
564             vector<string> taxs;
565             while (!in.eof()) {
566                 
567                 if (m->control_pressed) { in.close(); return metadata; }
568                 
569                 in >> otuLabel >> size >> tax; m->gobble(in);
570                 
571                 otuLabels.push_back(otuLabel);
572                 taxs.push_back(tax);
573             }
574             in.close();
575             
576             //should the labels be Otu001 or PhyloType001
577             string firstBin = m->currentSharedBinLabels[0];
578             string binTag = "Otu";
579             if ((firstBin.find("Otu")) == string::npos) { binTag = "PhyloType";  }
580             
581             //convert list file bin labels to shared file bin labels
582             //parse tax strings
583             //save in map
584             map<string, string> labelTaxMap;
585             string snumBins = toString(otuLabels.size());
586             for (int i = 0; i < otuLabels.size(); i++) {  
587                 
588                 if (m->control_pressed) { return metadata; }
589                 
590                 //if there is a bin label use it otherwise make one
591                 if (m->isContainingOnlyDigits(otuLabels[i])) {
592                     string binLabel = binTag;
593                     string sbinNumber = otuLabels[i];
594                     if (sbinNumber.length() < snumBins.length()) { 
595                         int diff = snumBins.length() - sbinNumber.length();
596                         for (int h = 0; h < diff; h++) { binLabel += "0"; }
597                     }
598                     binLabel += sbinNumber;
599                     binLabel = m->getSimpleLabel(binLabel);
600                     labelTaxMap[binLabel] = taxs[i];
601                 }else {  labelTaxMap[m->getSimpleLabel(otuLabels[i])] = taxs[i]; }
602             }
603             
604             
605             //{"taxonomy":["k__Bacteria", "p__Proteobacteria", "c__Gammaproteobacteria", "o__Enterobacteriales", "f__Enterobacteriaceae", "g__Escherichia", "s__"]}
606             
607             //traverse the binLabels forming the metadata strings and saving them
608             //make sure to sanity check
609             map<string, string>::iterator it;
610             for (int i = 0; i < lookup[0]->getNumBins(); i++) {
611                 
612                 if (m->control_pressed) { return metadata; }
613                 
614                 it = labelTaxMap.find(m->getSimpleLabel(m->currentSharedBinLabels[i]));
615                 
616                 if (it == labelTaxMap.end()) { m->mothurOut("[ERROR]: can't find taxonomy information for " + m->currentSharedBinLabels[i] + ".\n"); m->control_pressed = true; }
617                 else {
618                     if (picrust) {
619                         string temp = it->second; m->removeConfidences(temp);
620                         picrustLabels.push_back(temp);
621                     }
622                     vector<string> bootstrapValues;
623                     string data = "{\"taxonomy\":[";
624             
625                     vector<string> scores;
626                     vector<string> taxonomies = parseTax(it->second, scores);
627                     
628                     for (int j = 0; j < taxonomies.size()-1; j ++) { data += "\"" + taxonomies[j] + "\", "; }
629                     data += "\"" + taxonomies[taxonomies.size()-1] + "\"]";
630                     
631                     //add bootstrap values if available
632                     if (scores[0] != "null") {
633                         data += ", \"bootstrap\":[";
634                         
635                         for (int j = 0; j < scores.size()-1; j ++) { data += scores[j] + ", "; }
636                         data += scores[scores.size()-1] + "]";
637
638                     }
639                     data += "}";
640                     
641                     metadata.push_back(data);
642                 }
643             }
644         }
645         
646         return metadata;
647         
648     }
649         catch(exception& e) {
650                 m->errorOut(e, "MakeBiomCommand", "getMetadata");
651                 exit(1);
652         }
653
654 }
655 //**********************************************************************************************************************
656 int MakeBiomCommand::getSampleMetaData(vector<SharedRAbundVector*>& lookup){
657         try {
658         sampleMetadata.clear();
659         if (metadatafile == "") {  for (int i = 0; i < lookup.size(); i++) {  sampleMetadata.push_back("null");  } }
660         else {
661             ifstream in;
662             m->openInputFile(metadatafile, in);
663             
664             vector<string> groupNames, metadataLabels;
665             map<string, vector<string> > lines;
666             
667             string headerLine = m->getline(in); m->gobble(in);
668             vector<string> pieces = m->splitWhiteSpace(headerLine);
669             
670             //save names of columns you are reading
671             for (int i = 1; i < pieces.size(); i++) {
672                 metadataLabels.push_back(pieces[i]);
673             }
674             int count = metadataLabels.size();
675                         
676             vector<string> groups = m->getGroups();
677             
678             //read rest of file
679             while (!in.eof()) {
680                 
681                 if (m->control_pressed) { in.close(); return 0; }
682                 
683                 string group = "";
684                 in >> group; m->gobble(in);
685                 groupNames.push_back(group);
686                 
687                 string line = m->getline(in); m->gobble(in);
688                 vector<string> thisPieces = m->splitWhiteSpaceWithQuotes(line);
689         
690                 if (thisPieces.size() != count) { m->mothurOut("[ERROR]: expected " + toString(count) + " items of data for sample " + group + " read " + toString(thisPieces.size()) + ", quitting.\n"); }
691                 else {  if (m->inUsersGroups(group, groups)) { lines[group] = thisPieces; } }
692                 
693                 m->gobble(in);
694             }
695             in.close();
696             
697             map<string, vector<string> >::iterator it;
698             for (int i = 0; i < lookup.size(); i++) {
699                 
700                 if (m->control_pressed) { return 0; }
701                 
702                 it = lines.find(lookup[i]->getGroup());
703                 
704                 if (it == lines.end()) { m->mothurOut("[ERROR]: can't find metadata information for " + lookup[i]->getGroup() + ", quitting.\n"); m->control_pressed = true; }
705                 else {
706                     vector<string> values = it->second;
707                     
708                     string data = "{";
709                     for (int j = 0; j < metadataLabels.size()-1; j++) { 
710                         values[j] = m->removeQuotes(values[j]); 
711                         data += "\"" + metadataLabels[j] + "\":\"" + values[j] + "\", "; 
712                     }
713                     values[metadataLabels.size()-1] = m->removeQuotes(values[metadataLabels.size()-1]);
714                     data += "\"" + metadataLabels[metadataLabels.size()-1] + "\":\"" + values[metadataLabels.size()-1] + "\"}";
715                     sampleMetadata.push_back(data);
716                 }
717             }
718         }
719         
720         return 0;
721         
722     }
723         catch(exception& e) {
724                 m->errorOut(e, "MakeBiomCommand", "getSampleMetaData");
725                 exit(1);
726         }
727     
728 }
729
730 /**************************************************************************************************/
731 //returns {Bacteria, Bacteroidetes, ..} and scores is filled with {100, 98, ...} or {null, null, null}
732 vector<string> MakeBiomCommand::parseTax(string tax, vector<string>& scores) {
733         try {
734                 
735                 string taxon;
736         vector<string> taxs;
737                 
738                 while (tax.find_first_of(';') != -1) {
739                         
740                         if (m->control_pressed) { return taxs; }
741                         
742                         //get taxon
743                         taxon = tax.substr(0,tax.find_first_of(';'));
744             
745                         int pos = taxon.find_last_of('(');
746                         if (pos != -1) {
747                                 //is it a number?
748                                 int pos2 = taxon.find_last_of(')');
749                                 if (pos2 != -1) {
750                                         string confidenceScore = taxon.substr(pos+1, (pos2-(pos+1)));
751                                         if (m->isNumeric1(confidenceScore)) {
752                                                 taxon = taxon.substr(0, pos); //rip off confidence 
753                         scores.push_back(confidenceScore);
754                                         }else{ scores.push_back("null"); }
755                                 }
756                         }else{ scores.push_back("null"); }
757                         
758             //strip "" if they are there
759             pos = taxon.find("\"");
760             if (pos != string::npos) {
761                 string newTax = "";
762                 for (int k = 0; k < taxon.length(); k++) {
763                     if (taxon[k] != '\"') { newTax += taxon[k]; }
764                 }
765                 taxon = newTax;
766             }
767             
768             //look for bootstrap value
769                         taxs.push_back(taxon);
770             tax = tax.substr(tax.find_first_of(';')+1, tax.length());
771                 }
772                 
773                 return taxs;
774         }
775         catch(exception& e) {
776                 m->errorOut(e, "MakeBiomCommand", "parseTax");
777                 exit(1);
778         }
779 }
780
781 //**********************************************************************************************************************
782
783
784