5 // Created by SarahsWork on 6/12/13.
6 // Copyright (c) 2013 Schloss Lab. All rights reserved.
9 #include "lefsecommand.h"
10 #include "linearalgebra.h"
12 //**********************************************************************************************************************
13 vector<string> LefseCommand::setParameters(){
15 CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pdesign);
16 CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","summary",false,true,true); parameters.push_back(pshared);
17 CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass);
18 CommandParameter psubclass("subclass", "String", "", "", "", "", "","",false,false); parameters.push_back(psubclass);
19 CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
20 CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses);
21 CommandParameter palpha("aalpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha);
22 CommandParameter pwalpha("walpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pwalpha);
24 CommandParameter plda("lda", "Number", "", "2.0", "", "", "","",false,false); parameters.push_back(plda);
25 CommandParameter pwilc("wilc", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pwilc);
26 CommandParameter pnormmillion("norm", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pnormmillion);
27 CommandParameter piters("iters", "Number", "", "30", "", "", "","",false,false); parameters.push_back(piters);
28 //CommandParameter pwilcsamename("wilcsamename", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pwilcsamename);
29 CommandParameter pcurv("curv", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcurv);
30 CommandParameter pfiters("fboots", "Number", "", "0.67", "", "", "","",false,false); parameters.push_back(pfiters);
31 CommandParameter pstrict("strict", "Multiple", "0-1-2", "0", "", "", "","",false,false); parameters.push_back(pstrict);
32 CommandParameter pminc("minc", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminc);
33 CommandParameter pmulticlass_strat("multiclass", "Multiple", "onevone-onevall", "onevall", "", "", "","",false,false); parameters.push_back(pmulticlass_strat);
34 //CommandParameter psubject("subject", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psubject);
37 //not used in their current code, but in parameters
38 //CommandParameter pnlogs("nlogs", "Number", "", "3", "", "", "","",false,false); parameters.push_back(pnlogs);
39 //CommandParameter pranktec("ranktec", "Multiple", "lda-svm", "lda", "", "", "","",false,false); parameters.push_back(pranktec); // svm not implemented in their source yet.
40 //CommandParameter psvmnorm("svmnorm", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(psvmnorm); //not used because svm not implemented yet.
43 //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files.
44 CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
45 CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
47 vector<string> myArray;
48 for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
52 m->errorOut(e, "LefseCommand", "setParameters");
56 //**********************************************************************************************************************
57 string LefseCommand::getHelpString(){
59 string helpString = "";
60 helpString += "The lefse command allows you to ....\n";
61 helpString += "The lefse command parameters are: shared, design, class, subclass, label, classes, walpha, aalpha, lda, wilc, iters, curv, fboots, strict, minc, multiclass and norm.\n";
62 helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n";
63 helpString += "The subclass parameter is used to indicate the .....If none is provided, second category is used, or if only one category subclass is ignored. \n";
64 helpString += "The aalpha parameter is used to set the alpha value for the Krukal Wallis Anova test Default=0.05. \n";
65 helpString += "The walpha parameter is used to set the alpha value for the Wilcoxon test. Default=0.05. \n";
66 helpString += "The lda parameter is used to set the threshold on the absolute value of the logarithmic LDA score. Default=2.0. \n";
67 helpString += "The wilc parameter is used to indicate whether to perform the Wilcoxon test. Default=T. \n";
68 helpString += "The iters parameter is used to set the number of bootstrap iteration for LDA. Default=30. \n";
69 //helpString += "The wilcsamename parameter is used to indicate whether perform the wilcoxon test only among the subclasses with the same name. Default=F. \n";
70 helpString += "The curv parameter is used to set whether perform the wilcoxon test ing the Curtis's approach [BETA VERSION] Default=F. \n";
71 helpString += "The norm parameter is used to multiply relative abundances by 1000000. Recommended when very low values are present. Default=T. \n";
72 helpString += "The fboots parameter is used to set the subsampling fraction value for each bootstrap iteration. Default=0.67. \n";
73 helpString += "The strict parameter is used to set the multiple testing correction options. 0 no correction (more strict, default), 1 correction for independent comparisons, 2 correction for independent comparison. Options = 0,1,2. Default=0. \n";
74 helpString += "The minc parameter is used to minimum number of samples per subclass for performing wilcoxon test. Default=10. \n";
75 helpString += "The multiclass parameter is used to (for multiclass tasks) set whether the test is performed in a one-against-one ( onevone - more strict!) or in a one-against-all setting ( onevall - less strict). Default=onevall. \n";
76 helpString += "The classes parameter is used to indicate the classes you would like to use. Classes should be inputted in the following format: classes=label<value1|value2|value3>-label<value1|value2>. For example to include groups from treatment with value early or late and age= young or old. class=treatment<Early|Late>-age<young|old>.\n";
77 helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n";
78 helpString += "The lefse command should be in the following format: lefse(shared=final.an.shared, design=final.design, class=treatment, subclass=age).\n";
82 m->errorOut(e, "LefseCommand", "getHelpString");
86 //**********************************************************************************************************************
87 string LefseCommand::getOutputPattern(string type) {
91 if (type == "summary") { pattern = "[filename],[distance],lefse_summary"; }
92 else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
97 m->errorOut(e, "LefseCommand", "getOutputPattern");
101 //**********************************************************************************************************************
102 LefseCommand::LefseCommand(){
104 abort = true; calledHelp = true;
106 vector<string> tempOutNames;
107 outputTypes["summary"] = tempOutNames;
109 catch(exception& e) {
110 m->errorOut(e, "LefseCommand", "LefseCommand");
114 //**********************************************************************************************************************
115 LefseCommand::LefseCommand(string option) {
117 abort = false; calledHelp = false;
120 //allow user to run help
121 if(option == "help") { help(); abort = true; calledHelp = true; }
122 else if(option == "citation") { citation(); abort = true; calledHelp = true;}
125 //valid paramters for this command
126 vector<string> myArray = setParameters();
128 OptionParser parser(option);
129 map<string,string> parameters = parser.getParameters();
131 ValidParameters validParameter;
132 map<string,string>::iterator it;
133 //check to make sure all parameters are valid for command
134 for (it = parameters.begin(); it != parameters.end(); it++) {
135 if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
138 vector<string> tempOutNames;
139 outputTypes["summary"] = tempOutNames;
141 //if the user changes the input directory command factory will send this info to us in the output parameter
142 string inputDir = validParameter.validFile(parameters, "inputdir", false);
143 if (inputDir == "not found"){ inputDir = ""; }
147 it = parameters.find("design");
148 //user has given a template file
149 if(it != parameters.end()){
150 path = m->hasPath(it->second);
151 //if the user has not given a path then, add inputdir. else leave path alone.
152 if (path == "") { parameters["desing"] = inputDir + it->second; }
155 it = parameters.find("shared");
156 //user has given a template file
157 if(it != parameters.end()){
158 path = m->hasPath(it->second);
159 //if the user has not given a path then, add inputdir. else leave path alone.
160 if (path == "") { parameters["shared"] = inputDir + it->second; }
164 //get shared file, it is required
165 sharedfile = validParameter.validFile(parameters, "shared", true);
166 if (sharedfile == "not open") { sharedfile = ""; abort = true; }
167 else if (sharedfile == "not found") {
168 //if there is a current shared file, use it
169 sharedfile = m->getSharedFile();
170 if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
171 else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
172 }else { m->setSharedFile(sharedfile); }
174 //get shared file, it is required
175 designfile = validParameter.validFile(parameters, "design", true);
176 if (designfile == "not open") { designfile = ""; abort = true; }
177 else if (designfile == "not found") {
178 //if there is a current shared file, use it
179 designfile = m->getDesignFile();
180 if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); }
181 else { m->mothurOut("You have no current design file and the design parameter is required."); m->mothurOutEndLine(); abort = true; }
182 }else { m->setDesignFile(designfile); }
184 //if the user changes the output directory command factory will send this info to us in the output parameter
185 outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
186 outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
189 string label = validParameter.validFile(parameters, "label", false);
190 if (label == "not found") { label = ""; }
192 if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
193 else { allLines = 1; }
196 mclass = validParameter.validFile(parameters, "class", false);
197 if (mclass == "not found") { mclass = ""; }
199 subclass = validParameter.validFile(parameters, "subclass", false);
200 if (subclass == "not found") { subclass = mclass; }
202 classes = validParameter.validFile(parameters, "classes", false);
203 if (classes == "not found") { classes = ""; }
205 string temp = validParameter.validFile(parameters, "aalpha", false);
206 if (temp == "not found") { temp = "0.05"; }
207 m->mothurConvert(temp, anovaAlpha);
209 temp = validParameter.validFile(parameters, "walpha", false);
210 if (temp == "not found") { temp = "0.05"; }
211 m->mothurConvert(temp, wilcoxonAlpha);
213 temp = validParameter.validFile(parameters, "wilc", false);
214 if (temp == "not found") { temp = "T"; }
215 wilc = m->isTrue(temp);
217 temp = validParameter.validFile(parameters, "norm", false);
218 if (temp == "not found") { temp = "T"; }
219 normMillion = m->isTrue(temp);
221 //temp = validParameter.validFile(parameters, "subject", false);
222 //if (temp == "not found") { temp = "F"; }
223 //subject = m->isTrue(temp);
225 temp = validParameter.validFile(parameters, "lda", false);
226 if (temp == "not found") { temp = "2.0"; }
227 m->mothurConvert(temp, ldaThreshold);
229 temp = validParameter.validFile(parameters, "iters", false);
230 if (temp == "not found") { temp = "30"; }
231 m->mothurConvert(temp, iters);
233 temp = validParameter.validFile(parameters, "fboots", false);
234 if (temp == "not found") { temp = "0.67"; }
235 m->mothurConvert(temp, fBoots);
237 //temp = validParameter.validFile(parameters, "wilcsamename", false);
238 //if (temp == "not found") { temp = "F"; }
239 //wilcsamename = m->isTrue(temp);
241 temp = validParameter.validFile(parameters, "curv", false);
242 if (temp == "not found") { temp = "F"; }
243 curv = m->isTrue(temp);
245 temp = validParameter.validFile(parameters, "strict", false);
246 if (temp == "not found"){ temp = "0"; }
247 if ((temp != "0") && (temp != "1") && (temp != "2")) { m->mothurOut("Invalid strict option: choices are 0, 1 or 2."); m->mothurOutEndLine(); abort=true; }
248 else { m->mothurConvert(temp, strict); }
250 temp = validParameter.validFile(parameters, "minc", false);
251 if (temp == "not found") { temp = "10"; }
252 m->mothurConvert(temp, minC);
254 multiClassStrat = validParameter.validFile(parameters, "multiclass", false);
255 if (multiClassStrat == "not found"){ multiClassStrat = "onevall"; }
256 if ((multiClassStrat != "onevall") && (multiClassStrat != "onevone")) { m->mothurOut("Invalid multiclass option: choices are onevone or onevall."); m->mothurOutEndLine(); abort=true; }
260 catch(exception& e) {
261 m->errorOut(e, "LefseCommand", "LefseCommand");
265 //**********************************************************************************************************************
267 int LefseCommand::execute(){
269 //for reading lefse formatted file and running in mothur for testing - pass number of rows used for design file
270 if (false) { makeShared(1); exit(1); }
272 if (abort == true) { if (calledHelp) { return 0; } return 2; }
274 DesignMap designMap(designfile);
275 //if user set classes set groups=those classes
277 map<string, vector<string> > thisClasses = m->parseClasses(classes);
278 vector<string> groups = designMap.getNamesUnique(thisClasses);
279 if (groups.size() != 0) { m->setGroups(groups); }
280 else { m->mothurOut("[ERROR]: no groups meet your classes requirement, quitting.\n"); return 0; }
283 //if user did not select class use first column
284 if (mclass == "") { mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); }
286 InputData input(sharedfile, "sharedfile");
287 vector<SharedRAbundFloatVector*> lookup = input.getSharedRAbundFloatVectors();
288 string lastLabel = lookup[0]->getLabel();
290 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
291 set<string> processedLabels;
292 set<string> userLabels = labels;
295 //as long as you are not at the end of the file or done wih the lines you want
296 while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
298 if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; }
300 if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
302 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
304 process(lookup, designMap);
306 processedLabels.insert(lookup[0]->getLabel());
307 userLabels.erase(lookup[0]->getLabel());
310 if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
311 string saveLabel = lookup[0]->getLabel();
313 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
314 lookup = input.getSharedRAbundFloatVectors(lastLabel);
315 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
317 process(lookup, designMap);
319 processedLabels.insert(lookup[0]->getLabel());
320 userLabels.erase(lookup[0]->getLabel());
322 //restore real lastlabel to save below
323 lookup[0]->setLabel(saveLabel);
326 lastLabel = lookup[0]->getLabel();
327 //prevent memory leak
328 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
330 if (m->control_pressed) { return 0; }
332 //get next line to process
333 lookup = input.getSharedRAbundFloatVectors();
336 if (m->control_pressed) { return 0; }
338 //output error messages about any remaining user labels
339 set<string>::iterator it;
340 bool needToRun = false;
341 for (it = userLabels.begin(); it != userLabels.end(); it++) {
342 m->mothurOut("Your file does not include the label " + *it);
343 if (processedLabels.count(lastLabel) != 1) {
344 m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
347 m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
351 //run last label if you need to
352 if (needToRun == true) {
353 for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
354 lookup = input.getSharedRAbundFloatVectors(lastLabel);
356 m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
357 process(lookup, designMap);
359 for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
363 //output files created by command
364 m->mothurOutEndLine();
365 m->mothurOut("Output File Names: "); m->mothurOutEndLine();
366 for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
367 m->mothurOutEndLine();
371 catch(exception& e) {
372 m->errorOut(e, "LefseCommand", "execute");
376 //**********************************************************************************************************************
378 int LefseCommand::process(vector<SharedRAbundFloatVector*>& lookup, DesignMap& designMap) {
380 vector<string> classes;
381 vector<string> subclasses;
382 map<string, string> subclass2Class;
383 map<string, set<string> > class2SubClasses; //maps class name to vector of its subclasses
384 map<string, vector<int> > subClass2GroupIndex; //maps subclass name to vector of indexes in lookup from that subclass. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old. Saves time below.
385 map<string, vector<int> > class2GroupIndex; //maps subclass name to vector of indexes in lookup from that class. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old. Saves time below.
386 for (int j = 0; j < lookup.size(); j++) {
387 if (normMillion) { normalize(lookup); }
388 string group = lookup[j]->getGroup();
389 string treatment = designMap.get(group, mclass); //get value for this group in this category
390 string thisSub = designMap.get(group, subclass);
391 map<string, string>::iterator it = subclass2Class.find(thisSub);
392 if (it == subclass2Class.end()) {
393 subclass2Class[thisSub] = treatment;
394 vector<int> temp; temp.push_back(j);
395 subClass2GroupIndex[thisSub] = temp;
398 if (it->second != treatment) {
399 //m->mothurOut("[WARNING]: subclass " + thisSub + " has members in " + it->second + " and " + treatment + ". Subclass members must be from the same class for Wilcoxon. Changing " + thisSub + " to " + treatment + "_" + thisSub + ".\n");
400 thisSub = treatment + "_" + thisSub;
401 subclass2Class[thisSub] = treatment;
402 vector<int> temp; temp.push_back(j);
403 subClass2GroupIndex[thisSub] = temp;
404 }else { subClass2GroupIndex[thisSub].push_back(j); }
407 map<string, set<string> >::iterator itClass = class2SubClasses.find(treatment);
408 if (itClass == class2SubClasses.end()) {
409 set<string> temp; temp.insert(thisSub);
410 class2SubClasses[treatment] = temp;
411 vector<int> temp2; temp2.push_back(j);
412 class2GroupIndex[treatment] = temp2;
413 classes.push_back(treatment);
415 class2SubClasses[treatment].insert(thisSub);
416 class2GroupIndex[treatment].push_back(j);
419 //sort classes so order is right
420 sort(classes.begin(), classes.end());
422 vector< vector<double> > means = getMeans(lookup, class2GroupIndex); //[numOTUs][classes] - classes in same order as class2GroupIndex
424 //run kruskal wallis on each otu
425 map<int, double> significantOtuLabels = runKruskalWallis(lookup, designMap);
427 int numSigBeforeWilcox = significantOtuLabels.size();
430 string wilcoxString = "";
431 if ((subclass != "") && wilc) { significantOtuLabels = runWilcoxon(lookup, designMap, significantOtuLabels, class2SubClasses, subClass2GroupIndex, subclass2Class); wilcoxString += " ( " + toString(numSigBeforeWilcox) + " ) before internal wilcoxon"; }
433 int numSigAfterWilcox = significantOtuLabels.size();
435 m->mothurOut("\nNumber of significantly discriminative features: " + toString(numSigAfterWilcox) + wilcoxString + ".\n");
437 map<int, double> sigOTUSLDA;
438 if (numSigAfterWilcox > 0) {
439 sigOTUSLDA = testLDA(lookup, significantOtuLabels, class2GroupIndex, subClass2GroupIndex);
440 m->mothurOut("Number of discriminative features with abs LDA score > " + toString(ldaThreshold) + " : " + toString(significantOtuLabels.size()) + ".\n");
442 else { m->mothurOut("No features with significant differences between the classes.\n"); }
444 printResults(means, significantOtuLabels, sigOTUSLDA, lookup[0]->getLabel(), classes);
448 catch(exception& e) {
449 m->errorOut(e, "LefseCommand", "process");
453 //**********************************************************************************************************************
454 int LefseCommand::normalize(vector<SharedRAbundFloatVector*>& lookup) {
457 for (int i = 0; i < lookup.size(); i++) {
459 for (int j = 0; j < lookup[i]->getNumBins(); j++) { sum += lookup[i]->getAbundance(j); }
460 mul.push_back(1000000.0/sum);
463 for (int i = 0; i < lookup.size(); i++) {
464 for (int j = 0; j < lookup[i]->getNumBins(); j++) { lookup[i]->set(j, lookup[i]->getAbundance(j)*mul[i], lookup[i]->getGroup()); }
469 catch(exception& e) {
470 m->errorOut(e, "LefseCommand", "normalize");
474 //**********************************************************************************************************************
475 map<int, double> LefseCommand::runKruskalWallis(vector<SharedRAbundFloatVector*>& lookup, DesignMap& designMap) {
477 map<int, double> significantOtuLabels;
478 int numBins = lookup[0]->getNumBins();
479 //sanity check to make sure each treatment has a group in the shared file
480 set<string> treatments;
481 for (int j = 0; j < lookup.size(); j++) {
482 string group = lookup[j]->getGroup();
483 string treatment = designMap.get(group, mclass); //get value for this group in this category
484 treatments.insert(treatment);
486 if (treatments.size() < 2) { m->mothurOut("[ERROR]: need at least 2 things to classes to compare, quitting.\n"); m->control_pressed = true; }
488 LinearAlgebra linear;
489 for (int i = 0; i < numBins; i++) {
490 if (m->control_pressed) { break; }
492 vector<spearmanRank> values;
493 for (int j = 0; j < lookup.size(); j++) {
494 string group = lookup[j]->getGroup();
495 string treatment = designMap.get(group, mclass); //get value for this group in this category
496 spearmanRank temp(treatment, lookup[j]->getAbundance(i));
497 values.push_back(temp);
501 double H = linear.calcKruskalWallis(values, pValue);
503 if (pValue < anovaAlpha) { significantOtuLabels[i] = pValue; }
506 return significantOtuLabels;
508 catch(exception& e) {
509 m->errorOut(e, "LefseCommand", "runKruskalWallis");
513 //**********************************************************************************************************************
514 //assumes not neccessarily paired
515 map<int, double> LefseCommand::runWilcoxon(vector<SharedRAbundFloatVector*>& lookup, DesignMap& designMap, map<int, double> bins, map<string, set<string> >& class2SubClasses, map<string, vector<int> >& subClass2GroupIndex, map<string, string> subclass2Class) {
517 map<int, double> significantOtuLabels;
518 map<int, double>::iterator it;
519 //if it exists and meets the following requirements run Wilcoxon
521 1. Subclass members all belong to same main class
525 int numBins = lookup[0]->getNumBins();
526 for (int i = 0; i < numBins; i++) {
527 if (m->control_pressed) { break; }
530 if (it != bins.end()) { //flagged in Kruskal Wallis
532 vector<float> abunds; for (int j = 0; j < lookup.size(); j++) { abunds.push_back(lookup[j]->getAbundance(i)); }
534 bool sig = testOTUWilcoxon(class2SubClasses, abunds, subClass2GroupIndex, subclass2Class);
535 if (sig) { significantOtuLabels[i] = it->second; }
537 }//bins flagged from kw
540 return significantOtuLabels;
542 catch(exception& e) {
543 m->errorOut(e, "LefseCommand", "runWilcoxon");
547 //**********************************************************************************************************************
548 //lefse.py - test_rep_wilcoxon_r function
549 bool LefseCommand::testOTUWilcoxon(map<string, set<string> >& class2SubClasses, vector<float> abunds, map<string, vector<int> >& subClass2GroupIndex, map<string, string> subclass2Class) {
552 double alphaMtc = wilcoxonAlpha;
553 vector< set<string> > allDiffs;
554 LinearAlgebra linear;
556 //for each subclass comparision
557 map<string, set<string> >::iterator itB;
558 for(map<string, set<string> >::iterator it=class2SubClasses.begin();it!=class2SubClasses.end();it++){
560 for(itB;itB!=class2SubClasses.end();itB++){
561 if (m->control_pressed) { return false; }
563 int dirCmp = 0; // not set?? dir_cmp = "not_set" # 0=notset or none, 1=true, 2=false.
567 for (set<string>::iterator itClass1 = (it->second).begin(); itClass1 != (it->second).end(); itClass1++) {
569 for (set<string>::iterator itClass2 = (itB->second).begin(); itClass2 != (itB->second).end(); itClass2++) {
570 string subclass1 = *itClass1;
571 string subclass2 = *itClass2;
574 if (m->debug) { m->mothurOut( "[DEBUG comparing " + it->first + "-" + *itClass1 + " to " + itB->first + "-" + *itClass2 + "\n"); }
576 string treatment1 = subclass2Class[subclass1];
577 string treatment2 = subclass2Class[subclass2];
578 int numSubs1 = class2SubClasses[treatment1].size();
579 int numSubs2 = class2SubClasses[treatment2].size();
581 //if mul_cor != 0: alpha_mtc = th*l_subcl1*l_subcl2 if mul_cor == 2 else 1.0-math.pow(1.0-th,l_subcl1*l_subcl2)
582 if (strict != 0) { alphaMtc = wilcoxonAlpha * numSubs1 * numSubs2 ; }
583 if (strict == 2) {}else{ alphaMtc = 1.0-pow((1.0-wilcoxonAlpha),(double)(numSubs1 * numSubs2)); }
585 //fill x and y with this comparisons data
586 vector<double> x; vector<double> y;
589 vector<int> xIndexes = subClass2GroupIndex[subclass1]; //indexes in lookup for this subclass
590 vector<int> yIndexes = subClass2GroupIndex[subclass2]; //indexes in lookup for this subclass
591 for (int k = 0; k < yIndexes.size(); k++) { y.push_back(abunds[yIndexes[k]]); }
592 for (int k = 0; k < xIndexes.size(); k++) { x.push_back(abunds[xIndexes[k]]); }
595 //if len(cl1) < min_c or len(cl2) < min_c:
597 bool medComp = false; // are there enough samples per subclass
598 if ((xIndexes.size() < minC) || (yIndexes.size() < minC)) { medComp = true; }
600 double sx = m->median(x);
601 double sy = m->median(y);
603 //if cl1[0] == cl2[0] and len(set(cl1)) == 1 and len(set(cl2)) == 1:
604 //tres, first = False, False
607 bool tres = true; //don't think this is set in the python source. Not sure how that is handled, but setting it here.
608 if ((x[0] == y[0]) && (x.size() == 1) && (y.size() == 1)) { tres = false; first = false; }
610 H = linear.calcWilcoxon(x, y, pValue);
611 if (pValue < (alphaMtc*2.0)) { tres = true; }
612 else { tres = false; }
616 if not curv and ( med_comp or tres ):
618 if sx == sy: br = True
625 elif not curv and med_comp:
626 if ((sx < sy) != dir_cmp or sx == sy): br = True
628 if tres and dir_cmp == None:
631 if tres and dir_cmp != (sx < sy):
634 elif not tres or (sx < sy) != dir_cmp or sx == sy: br = True
636 int sxSy = 2; //false
637 if (sx<sy) { sxSy = 1; } //true
641 if ((!curv) && (medComp || tres)) {
642 dirCmp = 2; if (sx<sy) { dirCmp = 1; } //dir_cmp = sx < sy
643 if (sx == sy) { br = true; }
646 if (medComp || tres) {
648 dirCmp = 2; if (sx<sy) { dirCmp = 1; } //dir_cmp = sx < sy
651 }else if (!curv && medComp) {
652 if (sxSy != dirCmp || sx == sy) { br = true; }
654 if (tres && dirCmp == 0) { curv_sign++; }
655 dirCmp = 2; if (sx<sy) { dirCmp = 1; } //dir_cmp = sx < sy
656 if (tres && dirCmp != sxSy) { //if tres and dir_cmp != (sx < sy):
660 }else if (!tres || sxSy != dirCmp || sx == sy) { br = true; } //elif not tres or (sx < sy) != dir_cmp or sx == sy: br = True
663 }//for class2 subclasses
665 }//for class1 subclasses
667 if (curv) { diff = false; if (curv_sign > 0) { diff = true; } } //if curv: diff = curv_sign > 0
668 else { //else: diff = (ok == len(cl_hie[pair[1]])*len(cl_hie[pair[0]]))
670 if (ok == count) { diff = true; }
672 if (diff) { totalOk++; }
673 if (!diff && (multiClassStrat == "onevone")) { return false; }
674 if (diff && (multiClassStrat == "onevall")) { //all_diff.append(pair)
675 set<string> pair; pair.insert(it->first); pair.insert(itB->first);
676 allDiffs.push_back(pair);
681 if (multiClassStrat == "onevall") {
682 int tot_k = class2SubClasses.size();
683 for(map<string, set<string> >::iterator it=class2SubClasses.begin();it!=class2SubClasses.end();it++){
684 if (m->control_pressed) { return false; }
686 //is this class okay in all comparisons
687 for (int h = 0; h < allDiffs.size(); h++) {
688 if (allDiffs[h].count(it->first) != 0) { nk++; }
690 if (nk == (tot_k-1)) { return true; }//if nk == tot_k-1: return True
697 catch(exception& e) {
698 m->errorOut(e, "LefseCommand", "testOTUWilcoxon");
702 //**********************************************************************************************************************
703 //modelled after lefse.py test_lda_r function
704 map<int, double> LefseCommand::testLDA(vector<SharedRAbundFloatVector*>& lookup, map<int, double> bins, map<string, vector<int> >& class2GroupIndex, map<string, vector<int> >& subClass2GroupIndex) {
706 map<int, double> sigOTUS;
707 map<int, double>::iterator it;
708 LinearAlgebra linear;
710 int numBins = lookup[0]->getNumBins();
711 vector< vector<double> > adjustedLookup;
713 for (int i = 0; i < numBins; i++) {
714 if (m->control_pressed) { break; }
717 if (it != bins.end()) { //flagged in Kruskal Wallis and Wilcoxon(if we ran it)
719 //fill x with this OTUs abundances
721 for (int j = 0; j < lookup.size(); j++) { x.push_back(lookup[j]->getAbundance(i)); }
724 for (map<string, vector<int> >::iterator it = class2GroupIndex.begin(); it != class2GroupIndex.end(); it++) {
725 //max(float(feats['class'].count(c))*0.5,4)
726 //max(numGroups in this class*0.5, 4.0)
727 double necessaryNum = ((double)((it->second).size())*0.5);
728 if (4.0 > necessaryNum) { necessaryNum = 4.0; }
731 for (int j = 0; j < (it->second).size(); j++) { uniques.insert(x[(it->second)[j]]); }
733 //if len(set([float(v[1]) for v in ff if v[0] == c])) > max(float(feats['class'].count(c))*0.5,4): continue
734 if ((double)(uniques.size()) > necessaryNum) { }
736 //feats[k][i] = math.fabs(feats[k][i] + lrand.normalvariate(0.0,max(feats[k][i]*0.05,0.01)))
737 for (int j = 0; j < (it->second).size(); j++) { //(it->second) contains indexes of abundance for this class
738 double sigma = max((x[(it->second)[j]]*0.05), 0.01);
739 x[(it->second)[j]] = abs(x[(it->second)[j]] + linear.normalvariate(0.0, sigma));
743 adjustedLookup.push_back(x);
749 map<int, string> indexToClass;
750 vector<string> classes;
751 for (map<string, vector<int> >::iterator it = class2GroupIndex.begin(); it != class2GroupIndex.end(); it++) {
752 //class with minimum number of groups
753 if ((it->second).size() < minCl) { minCl = (it->second).size(); }
754 for (int i = 0; i < (it->second).size(); i++) { indexToClass[(it->second)[i]] = it->first; }
755 classes.push_back(it->first);
758 int numGroups = lookup.size(); //lfk
759 int fractionNumGroups = numGroups * fBoots; //rfk
760 minCl = (int)((float)(minCl*fBoots*fBoots*0.05));
761 minCl = max(minCl, 1);
763 vector< vector< vector<double> > > results;//[iters][numComparison][numOTUs]
764 for (int j = 0; j < iters; j++) {
765 if (m->control_pressed) { return sigOTUS; }
767 //find "good" random vector
769 for (int h = 0; h < 1000; h++) { //generate a vector of length fractionNumGroups with range 0 to numGroups-1
771 for (int k = 0; k < fractionNumGroups; k++) { rand_s.push_back(m->getRandomIndex(numGroups-1)); }
772 if (!contastWithinClassesOrFewPerClass(adjustedLookup, rand_s, minCl, class2GroupIndex, indexToClass)) { h+=1000; } //break out of loop
774 if (m->control_pressed) { return sigOTUS; }
776 //print data in R input format for testing
778 vector<string> groups; for (int h = 0; h < rand_s.size(); h++) { groups.push_back(lookup[rand_s[h]]->getGroup()); }
779 printToCoutForRTesting(adjustedLookup, rand_s, class2GroupIndex, bins, subClass2GroupIndex, groups);
782 //for each pair of classes
783 vector< vector<double> > temp = lda(adjustedLookup, rand_s, indexToClass, classes); //[numComparison][numOTUs]
784 if (temp.size() != 0) { results.push_back(temp); }
787 if (m->control_pressed) { return sigOTUS; }
789 //m = max([numpy.mean([means[k][kk][p] for kk in range(boots)]) for p in range(len(pairs))])
791 for (it = bins.begin(); it != bins.end(); it++) { //[numOTUs] - need to go through bins so we can tie adjustedLookup back to the binNumber. adjustedLookup[0] ->bins entry[0].
792 vector<double> averageForEachComparison; averageForEachComparison.resize(results[0].size(), 0.0);
793 double maxM = 0.0; //max of averages for each comparison
794 for (int j = 0; j < results[0].size(); j++) { //numComparisons
795 for (int i = 0; i < results.size(); i++) { //iters
796 averageForEachComparison[j]+= results[i][j][k];
798 averageForEachComparison[j] /= (double) results.size();
799 if (averageForEachComparison[j] > maxM) { maxM = averageForEachComparison[j]; }
801 //res[k] = math.copysign(1.0,m)*math.log(1.0+math.fabs(m),10)
802 double multiple = 1.0; if (maxM < 0.0) { multiple = -1.0; }
803 double resK = multiple * log10(1.0+abs(maxM));
804 if (resK > ldaThreshold) { sigOTUS[it->first] = resK; }
810 catch(exception& e) {
811 m->errorOut(e, "LefseCommand", "testLDA");
815 //**********************************************************************************************************************
816 vector< vector<double> > LefseCommand::getMeans(vector<SharedRAbundFloatVector*>& lookup, map<string, vector<int> >& class2GroupIndex) {
818 int numBins = lookup[0]->getNumBins();
819 int numClasses = class2GroupIndex.size();
820 vector< vector<double> > means; //[numOTUS][classes]
821 means.resize(numBins);
822 for (int i = 0; i < means.size(); i++) { means[i].resize(numClasses, 0.0); }
824 map<int, string> indexToClass;
826 //shortcut for vectors below
827 map<string, int> quickIndex;
828 vector<int> classCounts;
829 for (map<string, vector<int> >::iterator it = class2GroupIndex.begin(); it != class2GroupIndex.end(); it++) {
830 for (int i = 0; i < (it->second).size(); i++) { indexToClass[(it->second)[i]] = it->first; }
831 quickIndex[it->first] = count; count++;
832 classCounts.push_back((it->second).size());
835 for (int i = 0; i < numBins; i++) {
836 for (int j = 0; j < lookup.size(); j++) {
837 if (m->control_pressed) { return means; }
838 means[i][quickIndex[indexToClass[j]]] += lookup[j]->getAbundance(i);
842 for (int i = 0; i < numBins; i++) {
843 for (int j = 0; j < numClasses; j++) { means[i][j] /= (double) classCounts[j]; }
848 catch(exception& e) {
849 m->errorOut(e, "LefseCommand", "getMeans");
853 //**********************************************************************************************************************
854 vector< vector<double> > LefseCommand::lda(vector< vector<double> >& adjustedLookup, vector<int> rand_s, map<int, string>& indexToClass, vector<string> classes) {
856 //shortcut for vectors below
857 map<string, int> quickIndex;
858 for (int i = 0; i < classes.size(); i++) { quickIndex[classes[i]] = i; }
860 vector<string> randClass; //classes for rand sample
861 vector<int> counts; counts.resize(classes.size(), 0);
862 for (int i = 0; i < rand_s.size(); i++) {
863 string thisClass = indexToClass[rand_s[i]];
864 randClass.push_back(thisClass);
865 counts[quickIndex[thisClass]]++;
868 vector< vector<double> > a; //[numOTUs][numSampled]
869 for (int i = 0; i < adjustedLookup.size(); i++) {
871 for (int j = 0; j < rand_s.size(); j++) {
872 temp.push_back(adjustedLookup[i][rand_s[j]]);
877 LinearAlgebra linear;
878 vector< vector<double> > means; bool ignore;
879 vector< vector<double> > scaling = linear.lda(a, randClass, means, ignore); //means are returned sorted, quickIndex sorts as well since it uses a map. means[class][otu] =
880 if (ignore) { scaling.clear(); return scaling; }
881 if (m->control_pressed) { return scaling; }
883 vector< vector<double> > w; w.resize(a.size()); //w.unit <- w/sqrt(sum(w^2))
885 for (int i = 0; i < scaling.size(); i++) { w[i].push_back(scaling[i][0]); denom += (w[i][0]*w[i][0]); }
887 for (int i = 0; i < w.size(); i++) { w[i][0] /= denom; } //[numOTUs][1] - w.unit
889 //robjects.r('LD <- xy.matrix%*%w.unit') [numSampled][numOtus] * [numOTUs][1]
890 vector< vector<double> > LD = linear.matrix_mult(linear.transpose(a), w);
892 //find means for each groups LDs
893 vector<double> LDMeans; LDMeans.resize(classes.size(), 0.0); //means[0] -> average for [group0].
894 for (int i = 0; i < LD.size(); i++) { LDMeans[quickIndex[randClass[i]]] += LD[i][0]; }
895 for (int i = 0; i < LDMeans.size(); i++) { LDMeans[i] /= (double) counts[i]; }
897 //calculate for each comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
898 vector< vector<double> > results;// [numComparison][numOTUs]
899 for (int i = 0; i < LDMeans.size(); i++) {
900 for (int l = 0; l < i; l++) {
901 if (m->control_pressed) { return scaling; }
902 //robjects.r('effect.size <- abs(mean(LD[sub_d[,"class"]=="'+p[0]+'"]) - mean(LD[sub_d[,"class"]=="'+p[1]+'"]))')
903 double effectSize = abs(LDMeans[i] - LDMeans[l]);
904 //scal = robjects.r('wfinal <- w.unit * effect.size')
905 vector<double> compResults;
906 for (int j = 0; j < w.size(); j++) { //[numOTUs][1]
907 //coeff = [abs(float(v)) if not math.isnan(float(v)) else 0.0 for v in scal]
908 double coeff = abs(w[j][0]*effectSize); if (isnan(coeff) || isinf(coeff)) { coeff = 0.0; }
909 //gm = abs(res[p[0]][j] - res[p[1]][j]) - res is the means for each group for each otu
910 double gm = abs(means[i][j] - means[l][j]);
911 //means[k][i].append((gm+coeff[j])*0.5)
912 compResults.push_back((gm+coeff)*0.5);
914 results.push_back(compResults);
920 catch(exception& e) {
921 m->errorOut(e, "LefseCommand", "lda");
926 //**********************************************************************************************************************
927 //modelled after lefse.py contast_within_classes_or_few_per_class function
928 bool LefseCommand::contastWithinClassesOrFewPerClass(vector< vector<double> >& lookup, vector<int> rands, int minCl, map<string, vector<int> > class2GroupIndex, map<int, string> indexToClass) {
933 for (int i = 0; i < rands.size(); i++) { //fill cls with the classes represented in the random selection
934 for (map<string, vector<int> >::iterator it = class2GroupIndex.begin(); it != class2GroupIndex.end(); it++) {
935 if (m->inUsersGroups(rands[i], (it->second))) {
936 cls.insert(it->first);
943 if (rands.size() != countFound) { m->mothurOut("oops, should never get here, missing something.\n"); }
945 if (cls.size() < class2GroupIndex.size()) { return true; } //some classes are not present in sampling
947 for (set<string>::iterator it = cls.begin(); it != cls.end(); it++) {
948 if (cls.count(*it) < minCl) { return true; } //this sampling has class count below minimum
952 int numBins = lookup.size();
953 for (int i = 0; i < numBins; i++) {
954 if (m->control_pressed) { break; }
956 //break up random sampling by class
957 map<string, set<double> > class2Values; //maps class name -> set of abunds present in random sampling. F003Early -> 0.001, 0.003...
958 for (int j = 0; j < rands.size(); j++) {
959 class2Values[indexToClass[rands[j]]].insert(lookup[i][rands[j]]);
960 //rands[j] = index of randomly selected group in lookup, randIndex2Class[rands[j]] = class this group belongs to. lookup[rands[j]]->getAbundance(i) = abundance of this group for this OTU.
962 //are the unique values less than we want
963 //if (len(set(col)) <= min_cl and min_cl > 1) or (min_cl == 1 and len(set(col)) <= 1):
964 for (map<string, set<double> >::iterator it = class2Values.begin(); it != class2Values.end(); it++) {
965 if (((it->second).size() <= minCl && minCl > 1) || (minCl == 1 && (it->second).size() <= 1)) { return true; }
971 catch(exception& e) {
972 m->errorOut(e, "LefseCommand", "contastWithinClassesOrFewPerClass");
976 //**********************************************************************************************************************
977 int LefseCommand::printResults(vector< vector<double> > means, map<int, double> sigKW, map<int, double> sigLDA, string label, vector<string> classes) {
979 map<string, string> variables;
980 variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
981 variables["[distance]"] = label;
982 string outputFileName = getOutputFileName("summary",variables);
984 m->openOutputFile(outputFileName, out);
985 outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName);
988 out << "OTU\tLogMaxMean\tClass\tLDA\tpValue\n";
991 for (int i = 0; i < means.size(); i++) { //[numOTUs][classes]
992 //find max mean of classes
993 double maxMean = -1.0; string maxClass = "none";
994 for (int j = 0; j < means[i].size(); j++) { if (means[i][j] > maxMean) { maxMean = means[i][j]; maxClass = classes[j]; } }
996 //str(math.log(max(max(v),1.0),10.0))
997 double logMaxMean = 1.0;
998 if (maxMean > logMaxMean) { logMaxMean = maxMean; }
999 logMaxMean = log10(logMaxMean);
1001 out << m->currentBinLabels[i] << '\t' << logMaxMean << '\t';
1002 if (m->debug) { temp = m->currentBinLabels[i] + '\t' + toString(logMaxMean) + '\t'; }
1004 map<int, double>::iterator it = sigLDA.find(i);
1005 if (it != sigLDA.end()) {
1006 out << maxClass << '\t' << it->second << '\t' << sigKW[i] << endl; //sigLDA is a subset of sigKW so no need to look
1007 if (m->debug) { temp += maxClass + '\t' + toString(it->second) + '\t' + toString(sigKW[i]) + '\n'; m->mothurOut(temp); temp = ""; }
1008 }else { out << '-' << endl; }
1015 catch(exception& e) {
1016 m->errorOut(e, "LefseCommand", "printResults");
1020 //**********************************************************************************************************************
1021 //printToCoutForRTesting(adjustedLookup, rand_s, class2GroupIndex, numBins);
1022 bool LefseCommand::printToCoutForRTesting(vector< vector<double> >& adjustedLookup, vector<int> rand_s, map<string, vector<int> >& class2GroupIndex, map<int, double> bins, map<string, vector<int> >& subClass2GroupIndex, vector<string> groups) {
1024 cout << "rand_s = ";
1025 for (int h = 0; h < rand_s.size(); h++) { cout << rand_s[h] << '\t'; } cout << endl;
1029 for (map<int, double>::iterator it = bins.begin(); it != bins.end(); it++) {
1030 if (m->control_pressed) { break; }
1032 cout << m->currentBinLabels[it->first] << " <- c(";
1033 for (int h = 0; h < rand_s.size()-1; h++) { cout << (adjustedLookup[count][rand_s[h]]) << ", "; }
1034 cout << (adjustedLookup[count][rand_s[rand_s.size()-1]]) << ")\n";
1038 string tempOutput = "";
1039 for (int h = 0; h < rand_s.size(); h++) {
1040 //find class this index is in
1041 for (map<string, vector<int> >::iterator it = class2GroupIndex.begin(); it!= class2GroupIndex.end(); it++) {
1042 if (m->inUsersGroups(rand_s[h], (it->second)) ) { cout << (h+1) << " <- c(\"" +it->first + "\")\n" ; }
1047 string tempOutput = "treatments <- c(";
1048 for (int h = 0; h < rand_s.size(); h++) {
1049 //find class this index is in
1050 for (map<string, vector<int> >::iterator it = class2GroupIndex.begin(); it!= class2GroupIndex.end(); it++) {
1051 if (m->inUsersGroups(rand_s[h], (it->second)) ) { tempOutput += "\"" +it->first + "\"" + ","; } //"\"" +it->first + "\""
1054 tempOutput = tempOutput.substr(0, tempOutput.length()-1);
1055 tempOutput += ")\n";
1059 if (subclass != "") {
1060 string tempOutput = "sub <- c(";
1061 for (int h = 0; h < rand_s.size(); h++) {
1062 //find class this index is in
1063 for (map<string, vector<int> >::iterator it = subClass2GroupIndex.begin(); it!= subClass2GroupIndex.end(); it++) {
1064 if (m->inUsersGroups(rand_s[h], (it->second)) ) { tempOutput += "\"" +it->first + "\"" + ','; }
1067 tempOutput = tempOutput.substr(0, tempOutput.length()-1);
1068 tempOutput += ")\n";
1073 string tempOutput = "group <- c(";
1074 for (int h = 0; h < groups.size(); h++) {
1075 tempOutput += "\"" +groups[h] + "\"" + ',';
1077 tempOutput = tempOutput.substr(0, tempOutput.length()-1);
1078 tempOutput += ")\n";
1084 tempOutput = "dat <- data.frame(";
1085 for (map<int, double>::iterator it = bins.begin(); it != bins.end(); it++) {
1086 if (m->control_pressed) { break; }
1088 tempOutput += "\"" + m->currentBinLabels[it->first] + "\"=" + m->currentBinLabels[it->first] + ",";
1090 //tempOutput = tempOutput.substr(0, tempOutput.length()-1);
1091 tempOutput += " class=treatments";
1092 //if (subclass != "") { tempOutput += ", subclass=sub"; }
1093 //if (subject) { tempOutput += ", subject=group"; }
1094 tempOutput += ")\n";
1097 tempOutput = "z <- suppressWarnings(mylda(as.formula(class ~ ";
1098 for (map<int, double>::iterator it = bins.begin(); it != bins.end(); it++) {
1099 if (m->control_pressed) { break; }
1101 tempOutput += m->currentBinLabels[it->first] + "+";
1103 tempOutput = tempOutput.substr(0, tempOutput.length()-1); //rip off extra plus sign
1104 tempOutput += "), data = dat, tol = 1e-10))";
1105 cout << tempOutput + "\nz\n";
1106 cout << "w <- z$scaling[,1]\n"; //robjects.r('w <- z$scaling[,1]')
1107 cout << "w.unit <- w/sqrt(sum(w^2))\n"; //robjects.r('w.unit <- w/sqrt(sum(w^2))')
1108 cout << "ss <- dat[,-match(\"class\",colnames(dat))]\n"; //robjects.r('ss <- sub_d[,-match("class",colnames(sub_d))]')
1109 //if (subclass != "") { cout << "ss <- ss[,-match(\"subclass\",colnames(ss))]\n"; }//robjects.r('ss <- ss[,-match("subclass",colnames(ss))]')
1110 //if (subject) { cout << "ss <- ss[,-match(\"subject\",colnames(ss))]\n"; }//robjects.r('ss <- ss[,-match("subject",colnames(ss))]')
1111 cout << "xy.matrix <- as.matrix(ss)\n"; //robjects.r('xy.matrix <- as.matrix(ss)')
1112 cout << "LD <- xy.matrix%*%w.unit\n"; //robjects.r('LD <- xy.matrix%*%w.unit')
1113 cout << "effect.size <- abs(mean(LD[dat[,\"class\"]==\"'+p[0]+'\"]) - mean(LD[dat[,\"class\"]==\"'+p[1]+'\"]))\n"; //robjects.r('effect.size <- abs(mean(LD[sub_d[,"class"]=="'+p[0]+'"]) - mean(LD[sub_d[,"class"]=="'+p[1]+'"]))')
1114 cout << "wfinal <- w.unit * effect.size\n"; //scal = robjects.r('wfinal <- w.unit * effect.size')
1115 cout << "mm <- z$means\n"; //rres = robjects.r('mm <- z$means')
1119 catch(exception& e) {
1120 m->errorOut(e, "LefseCommand", "printToCoutForRTesting");
1124 //**********************************************************************************************************************
1125 int LefseCommand::makeShared(int numDesignLines) {
1128 m->openInputFile(sharedfile, in);
1129 vector< vector<string> > lines;
1130 for(int i = 0; i < numDesignLines; i++) {
1131 if (m->control_pressed) { return 0; }
1133 string line = m->getline(in);
1134 cout << line << endl;
1135 vector<string> pieces = m->splitWhiteSpace(line);
1136 lines.push_back(pieces);
1140 m->openOutputFile(sharedfile+".design", out); out << "group" << '\t';
1141 for (int j = 0; j < lines.size(); j++) { out << lines[j][0] << '\t'; } out << endl;
1142 for (int j = 1; j < lines[0].size(); j++) {
1143 out <<(j-1) << '\t';
1144 for (int i = 0; i < lines.size(); i++) {
1145 out << lines[i][j] << '\t';
1150 DesignMap design(sharedfile+".design");
1152 vector<SharedRAbundFloatVector*> lookup;
1153 for (int k = 0; k < lines[0].size()-1; k++) {
1154 SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
1155 temp->setLabel("0.03");
1156 temp->setGroup(toString(k));
1157 lookup.push_back(temp);
1160 m->currentBinLabels.clear();
1163 if (m->control_pressed) { return 0; }
1165 string line = m->getline(in);
1166 vector<string> pieces = m->splitWhiteSpace(line);
1169 for (int i = 1; i < pieces.size(); i++) {
1170 float value; m->mothurConvert(pieces[i], value);
1175 //cout << count << '\t';
1176 for (int i = 1; i < pieces.size(); i++) {
1177 float value; m->mothurConvert(pieces[i], value);
1178 lookup[i-1]->push_back(value, toString(i-1));
1179 //cout << pieces[i] << '\t';
1181 m->currentBinLabels.push_back(toString(count));
1182 //m->currentBinLabels.push_back(pieces[0]);
1183 //cout << line<< endl;
1190 for (int k = 0; k < lookup.size(); k++) {
1191 //cout << "0.03" << '\t' << toString(k) << endl; lookup[k]->print(cout);
1194 process(lookup, design);
1198 catch(exception& e) {
1199 m->errorOut(e, "LefseCommand", "printToCoutForRTesting");
1204 //**********************************************************************************************************************