X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=lefsecommand.cpp;h=0403c910705313432fa7320150326927d03c783b;hp=e894d5781346802cc5250f0fb956a9d853b0017e;hb=cf9987b67aa49777a4c91c2d21f96e58bf17aa82;hpb=03fae12080951ea1d889da5187386abc236cc334 diff --git a/lefsecommand.cpp b/lefsecommand.cpp index e894d57..0403c91 100644 --- a/lefsecommand.cpp +++ b/lefsecommand.cpp @@ -17,9 +17,29 @@ vector LefseCommand::setParameters(){ CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass); CommandParameter psubclass("subclass", "String", "", "", "", "", "","",false,false); parameters.push_back(psubclass); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); - CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses); - CommandParameter palpha("anova_alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha); - CommandParameter pwalpha("wilcoxon_alpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pwalpha); + //CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses); + CommandParameter palpha("aalpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha); + CommandParameter pwalpha("walpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pwalpha); + + CommandParameter plda("lda", "Number", "", "2.0", "", "", "","",false,false); parameters.push_back(plda); + CommandParameter pwilc("wilc", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pwilc); + CommandParameter pnormmillion("norm", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pnormmillion); + CommandParameter piters("iters", "Number", "", "30", "", "", "","",false,false); parameters.push_back(piters); + //CommandParameter pwilcsamename("wilcsamename", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pwilcsamename); + CommandParameter pcurv("curv", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pcurv); + CommandParameter pfiters("fboots", "Number", "", "0.67", "", "", "","",false,false); parameters.push_back(pfiters); + CommandParameter pstrict("strict", "Multiple", "0-1-2", "0", "", "", "","",false,false); parameters.push_back(pstrict); + CommandParameter pminc("minc", "Number", "", "10", "", "", "","",false,false); parameters.push_back(pminc); + CommandParameter pmulticlass_strat("multiclass", "Multiple", "onevone-onevall", "onevall", "", "", "","",false,false); parameters.push_back(pmulticlass_strat); + //CommandParameter psubject("subject", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(psubject); + + + //not used in their current code, but in parameters + //CommandParameter pnlogs("nlogs", "Number", "", "3", "", "", "","",false,false); parameters.push_back(pnlogs); + //CommandParameter pranktec("ranktec", "Multiple", "lda-svm", "lda", "", "", "","",false,false); parameters.push_back(pranktec); // svm not implemented in their source yet. + //CommandParameter psvmnorm("svmnorm", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(psvmnorm); //not used because svm not implemented yet. + + //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files. CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); @@ -38,10 +58,22 @@ string LefseCommand::getHelpString(){ try { string helpString = ""; helpString += "The lefse command allows you to ....\n"; - helpString += "The lefse command parameters are: shared, design, class, subclass, label, classes, wilcoxon_alpha, anova_alpha.\n"; + helpString += "The lefse command parameters are: shared, design, class, subclass, label, walpha, aalpha, lda, wilc, iters, curv, fboots, strict, minc, multiclass and norm.\n"; helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n"; - helpString += "The subclass parameter is used to indicate the .....If none is provided second category is used, or if only one category subclass is ignored. \n"; - helpString += "The classes parameter is used to indicate the classes you would like to use. Clases should be inputted in the following format: classes=label-label. For example to include groups from treatment with value early or late and age= young or old. class=treatment-age.\n"; + helpString += "The subclass parameter is used to indicate the .....If none is provided, second category is used, or if only one category subclass is ignored. \n"; + helpString += "The aalpha parameter is used to set the alpha value for the Krukal Wallis Anova test Default=0.05. \n"; + helpString += "The walpha parameter is used to set the alpha value for the Wilcoxon test. Default=0.05. \n"; + helpString += "The lda parameter is used to set the threshold on the absolute value of the logarithmic LDA score. Default=2.0. \n"; + helpString += "The wilc parameter is used to indicate whether to perform the Wilcoxon test. Default=T. \n"; + helpString += "The iters parameter is used to set the number of bootstrap iteration for LDA. Default=30. \n"; + //helpString += "The wilcsamename parameter is used to indicate whether perform the wilcoxon test only among the subclasses with the same name. Default=F. \n"; + helpString += "The curv parameter is used to set whether perform the wilcoxon testing the Curtis's approach [BETA VERSION] Default=F. \n"; + helpString += "The norm parameter is used to multiply relative abundances by 1000000. Recommended when very low values are present. Default=T. \n"; + helpString += "The fboots parameter is used to set the subsampling fraction value for each bootstrap iteration. Default=0.67. \n"; + helpString += "The strict parameter is used to set the multiple testing correction options. 0 no correction (more strict, default), 1 correction for independent comparisons, 2 correction for independent comparison. Options = 0,1,2. Default=0. \n"; + helpString += "The minc parameter is used to minimum number of samples per subclass for performing wilcoxon test. Default=10. \n"; + helpString += "The multiclass parameter is used to (for multiclass tasks) set whether the test is performed in a one-against-one ( onevone - more strict!) or in a one-against-all setting ( onevall - less strict). Default=onevall. \n"; + //helpString += "The classes parameter is used to indicate the classes you would like to use. Classes should be inputted in the following format: classes=label-label. For example to include groups from treatment with value early or late and age= young or old. class=treatment-age.\n"; helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n"; helpString += "The lefse command should be in the following format: lefse(shared=final.an.shared, design=final.design, class=treatment, subclass=age).\n"; return helpString; @@ -57,8 +89,6 @@ string LefseCommand::getOutputPattern(string type) { string pattern = ""; if (type == "summary") { pattern = "[filename],[distance],lefse_summary"; } - else if (type == "kruskall-wallis") { pattern = "[filename],[distance],kruskall_wallis"; } - else if (type == "wilcoxon") { pattern = "[filename],[distance],wilcoxon"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } return pattern; @@ -75,8 +105,6 @@ LefseCommand::LefseCommand(){ setParameters(); vector tempOutNames; outputTypes["summary"] = tempOutNames; - outputTypes["kruskall-wallis"] = tempOutNames; - outputTypes["wilcoxon"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "LefseCommand", "LefseCommand"); @@ -109,8 +137,6 @@ LefseCommand::LefseCommand(string option) { vector tempOutNames; outputTypes["summary"] = tempOutNames; - outputTypes["kruskall-wallis"] = tempOutNames; - outputTypes["wilcoxon"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); @@ -171,19 +197,60 @@ LefseCommand::LefseCommand(string option) { if (mclass == "not found") { mclass = ""; } subclass = validParameter.validFile(parameters, "subclass", false); - if (subclass == "not found") { subclass = ""; } + if (subclass == "not found") { subclass = mclass; } - classes = validParameter.validFile(parameters, "classes", false); - if (classes == "not found") { classes = ""; } - - string temp = validParameter.validFile(parameters, "anova_alpha", false); + string temp = validParameter.validFile(parameters, "aalpha", false); if (temp == "not found") { temp = "0.05"; } m->mothurConvert(temp, anovaAlpha); - temp = validParameter.validFile(parameters, "wilcoxon_alpha", false); + temp = validParameter.validFile(parameters, "walpha", false); if (temp == "not found") { temp = "0.05"; } m->mothurConvert(temp, wilcoxonAlpha); + + temp = validParameter.validFile(parameters, "wilc", false); + if (temp == "not found") { temp = "T"; } + wilc = m->isTrue(temp); + + temp = validParameter.validFile(parameters, "norm", false); + if (temp == "not found") { temp = "T"; } + normMillion = m->isTrue(temp); + + //temp = validParameter.validFile(parameters, "subject", false); + //if (temp == "not found") { temp = "F"; } + //subject = m->isTrue(temp); + temp = validParameter.validFile(parameters, "lda", false); + if (temp == "not found") { temp = "2.0"; } + m->mothurConvert(temp, ldaThreshold); + + temp = validParameter.validFile(parameters, "iters", false); + if (temp == "not found") { temp = "30"; } + m->mothurConvert(temp, iters); + + temp = validParameter.validFile(parameters, "fboots", false); + if (temp == "not found") { temp = "0.67"; } + m->mothurConvert(temp, fBoots); + + //temp = validParameter.validFile(parameters, "wilcsamename", false); + //if (temp == "not found") { temp = "F"; } + //wilcsamename = m->isTrue(temp); + + temp = validParameter.validFile(parameters, "curv", false); + if (temp == "not found") { temp = "F"; } + curv = m->isTrue(temp); + + temp = validParameter.validFile(parameters, "strict", false); + if (temp == "not found"){ temp = "0"; } + if ((temp != "0") && (temp != "1") && (temp != "2")) { m->mothurOut("Invalid strict option: choices are 0, 1 or 2."); m->mothurOutEndLine(); abort=true; } + else { m->mothurConvert(temp, strict); } + + temp = validParameter.validFile(parameters, "minc", false); + if (temp == "not found") { temp = "10"; } + m->mothurConvert(temp, minC); + + multiClassStrat = validParameter.validFile(parameters, "multiclass", false); + if (multiClassStrat == "not found"){ multiClassStrat = "onevall"; } + if ((multiClassStrat != "onevall") && (multiClassStrat != "onevone")) { m->mothurOut("Invalid multiclass option: choices are onevone or onevall."); m->mothurOutEndLine(); abort=true; } } } @@ -196,23 +263,19 @@ LefseCommand::LefseCommand(string option) { int LefseCommand::execute(){ try { + srand(1982); + //for reading lefse formatted file and running in mothur for testing - pass number of rows used for design file + if (false) { makeShared(1); exit(1); } if (abort == true) { if (calledHelp) { return 0; } return 2; } DesignMap designMap(designfile); - //if user set classes set groups=those classes - if (classes != "") { - map > thisClasses = m->parseClasses(classes); - vector groups = designMap.getNamesUnique(thisClasses); - if (groups.size() != 0) { m->setGroups(groups); } - else { m->mothurOut("[ERROR]: no groups meet your classes requirement, quitting.\n"); return 0; } - } //if user did not select class use first column - if (mclass == "") { mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); } + if (mclass == "") { mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); if (subclass == "") { subclass = mclass; } } InputData input(sharedfile, "sharedfile"); - vector lookup = input.getSharedRAbundVectors(); + vector lookup = input.getSharedRAbundFloatVectors(); string lastLabel = lookup[0]->getLabel(); //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. @@ -239,7 +302,7 @@ int LefseCommand::execute(){ string saveLabel = lookup[0]->getLabel(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } - lookup = input.getSharedRAbundVectors(lastLabel); + lookup = input.getSharedRAbundFloatVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); process(lookup, designMap); @@ -258,7 +321,7 @@ int LefseCommand::execute(){ if (m->control_pressed) { return 0; } //get next line to process - lookup = input.getSharedRAbundVectors(); + lookup = input.getSharedRAbundFloatVectors(); } if (m->control_pressed) { return 0; } @@ -279,7 +342,7 @@ int LefseCommand::execute(){ //run last label if you need to if (needToRun == true) { for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } } - lookup = input.getSharedRAbundVectors(lastLabel); + lookup = input.getSharedRAbundFloatVectors(lastLabel); m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); process(lookup, designMap); @@ -293,6 +356,7 @@ int LefseCommand::execute(){ m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); + srand(time(NULL)); return 0; } @@ -303,13 +367,79 @@ int LefseCommand::execute(){ } //********************************************************************************************************************** -int LefseCommand::process(vector& lookup, DesignMap& designMap) { +int LefseCommand::process(vector& lookup, DesignMap& designMap) { try { + vector classes; + vector subclasses; + map subclass2Class; + map > class2SubClasses; //maps class name to vector of its subclasses + map > subClass2GroupIndex; //maps subclass name to vector of indexes in lookup from that subclass. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old. Saves time below. + map > class2GroupIndex; //maps subclass name to vector of indexes in lookup from that class. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old. Saves time below. + if (normMillion) { normalize(lookup); } + for (int j = 0; j < lookup.size(); j++) { + string group = lookup[j]->getGroup(); + string treatment = designMap.get(group, mclass); //get value for this group in this category + string thisSub = designMap.get(group, subclass); + map::iterator it = subclass2Class.find(thisSub); + if (it == subclass2Class.end()) { + subclass2Class[thisSub] = treatment; + vector temp; temp.push_back(j); + subClass2GroupIndex[thisSub] = temp; + } + else { + if (it->second != treatment) { + //m->mothurOut("[WARNING]: subclass " + thisSub + " has members in " + it->second + " and " + treatment + ". Subclass members must be from the same class for Wilcoxon. Changing " + thisSub + " to " + treatment + "_" + thisSub + ".\n"); + thisSub = treatment + "_" + thisSub; + subclass2Class[thisSub] = treatment; + vector temp; temp.push_back(j); + subClass2GroupIndex[thisSub] = temp; + }else { subClass2GroupIndex[thisSub].push_back(j); } + } + + map >::iterator itClass = class2SubClasses.find(treatment); + if (itClass == class2SubClasses.end()) { + set temp; temp.insert(thisSub); + class2SubClasses[treatment] = temp; + vector temp2; temp2.push_back(j); + class2GroupIndex[treatment] = temp2; + classes.push_back(treatment); + }else{ + class2SubClasses[treatment].insert(thisSub); + class2GroupIndex[treatment].push_back(j); + } + } + //sort classes so order is right + sort(classes.begin(), classes.end()); + + vector< vector > means = getMeans(lookup, class2GroupIndex); //[numOTUs][classes] - classes in same order as class2GroupIndex + //run kruskal wallis on each otu - vector significantOtuLabels = runKruskalWallis(lookup, designMap); + map significantOtuLabels = runKruskalWallis(lookup, designMap); + + int numSigBeforeWilcox = significantOtuLabels.size(); + + if (m->debug) { m->mothurOut("[DEBUG]: completed Kruskal Wallis\n"); } //check for subclass - if (subclass != "") { significantOtuLabels = runWilcoxon(lookup, designMap, significantOtuLabels); } + string wilcoxString = ""; + if ((subclass != "") && wilc) { significantOtuLabels = runWilcoxon(lookup, designMap, significantOtuLabels, class2SubClasses, subClass2GroupIndex, subclass2Class); wilcoxString += " ( " + toString(numSigBeforeWilcox) + " ) before internal wilcoxon"; } + + int numSigAfterWilcox = significantOtuLabels.size(); + + if (m->debug) { m->mothurOut("[DEBUG]: completed Wilcoxon\n"); } + + m->mothurOut("\nNumber of significantly discriminative features: " + toString(numSigAfterWilcox) + wilcoxString + ".\n"); + + map sigOTUSLDA; + if (numSigAfterWilcox > 0) { + sigOTUSLDA = testLDA(lookup, significantOtuLabels, class2GroupIndex, subClass2GroupIndex); + m->mothurOut("Number of discriminative features with abs LDA score > " + toString(ldaThreshold) + " : " + toString(significantOtuLabels.size()) + ".\n"); + } + else { m->mothurOut("No features with significant differences between the classes.\n"); } + + if (m->debug) { m->mothurOut("[DEBUG]: completed lda\n"); } + + printResults(means, significantOtuLabels, sigOTUSLDA, lookup[0]->getLabel(), classes); return 0; } @@ -319,20 +449,32 @@ int LefseCommand::process(vector& lookup, DesignMap& design } } //********************************************************************************************************************** - -vector LefseCommand::runKruskalWallis(vector& lookup, DesignMap& designMap) { +int LefseCommand::normalize(vector& lookup) { try { - map variables; - variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); - variables["[distance]"] = lookup[0]->getLabel(); - string outputFileName = getOutputFileName("kruskall-wallis",variables); + vector mul; + for (int i = 0; i < lookup.size(); i++) { + double sum = 0.0; + for (int j = 0; j < lookup[i]->getNumBins(); j++) { sum += lookup[i]->getAbundance(j); } + mul.push_back(1000000.0/sum); + } - ofstream out; - m->openOutputFile(outputFileName, out); - outputNames.push_back(outputFileName); outputTypes["kruskall-wallis"].push_back(outputFileName); - out << "OTULabel\tKW\tPvalue\n"; + for (int i = 0; i < lookup.size(); i++) { + for (int j = 0; j < lookup[i]->getNumBins(); j++) { + lookup[i]->set(j, lookup[i]->getAbundance(j)*mul[i], lookup[i]->getGroup()); + } + } - vector significantOtuLabels; + return 0; + } + catch(exception& e) { + m->errorOut(e, "LefseCommand", "normalize"); + exit(1); + } +} +//********************************************************************************************************************** +map LefseCommand::runKruskalWallis(vector& lookup, DesignMap& designMap) { + try { + map significantOtuLabels; int numBins = lookup[0]->getNumBins(); //sanity check to make sure each treatment has a group in the shared file set treatments; @@ -357,13 +499,9 @@ vector LefseCommand::runKruskalWallis(vector& lookup, double pValue = 0.0; double H = linear.calcKruskalWallis(values, pValue); - - //output H and signifigance - out << m->currentBinLabels[i] << '\t' << H << '\t' << pValue << endl; - - if (pValue < anovaAlpha) { significantOtuLabels.push_back(i); } + + if (pValue < anovaAlpha) { significantOtuLabels[i] = pValue; } } - out.close(); return significantOtuLabels; } @@ -373,113 +511,703 @@ vector LefseCommand::runKruskalWallis(vector& lookup, } } //********************************************************************************************************************** - -vector LefseCommand::runWilcoxon(vector& lookup, DesignMap& designMap, vector bins) { +//assumes not neccessarily paired +map LefseCommand::runWilcoxon(vector& lookup, DesignMap& designMap, map bins, map >& class2SubClasses, map >& subClass2GroupIndex, map subclass2Class) { try { - vector significantOtuLabels; + map significantOtuLabels; + map::iterator it; //if it exists and meets the following requirements run Wilcoxon /* 1. Subclass members all belong to same main class - 2. Number of groups in each subclass is the same - 3. anything else?? - - */ - vector subclasses; - map subclass2Class; - map subclassCounts; - map > subClass2GroupIndex; //maps subclass name to vector of indexes in lookup from that subclass. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old. Saves time below. - bool error = false; - for (int j = 0; j < lookup.size(); j++) { - string group = lookup[j]->getGroup(); - string treatment = designMap.get(group, mclass); //get value for this group in this category - string thisSub = designMap.get(group, subclass); - map::iterator it = subclass2Class.find(thisSub); - if (it == subclass2Class.end()) { - subclass2Class[thisSub] = treatment; - subclassCounts[thisSub] = 1; - vector temp; temp.push_back(j); - subClass2GroupIndex[thisSub] = temp; + anything else + */ + + int numBins = lookup[0]->getNumBins(); + for (int i = 0; i < numBins; i++) { + if (m->control_pressed) { break; } + + it = bins.find(i); + if (it != bins.end()) { //flagged in Kruskal Wallis + + vector abunds; for (int j = 0; j < lookup.size(); j++) { abunds.push_back(lookup[j]->getAbundance(i)); } + + bool sig = testOTUWilcoxon(class2SubClasses, abunds, subClass2GroupIndex, subclass2Class); + if (sig) { significantOtuLabels[i] = it->second; } + + }//bins flagged from kw + }//for bins + + return significantOtuLabels; + } + catch(exception& e) { + m->errorOut(e, "LefseCommand", "runWilcoxon"); + exit(1); + } +} +//********************************************************************************************************************** +//lefse.py - test_rep_wilcoxon_r function +bool LefseCommand::testOTUWilcoxon(map >& class2SubClasses, vector abunds, map >& subClass2GroupIndex, map subclass2Class) { + try { + int totalOk = 0; + double alphaMtc = wilcoxonAlpha; + vector< set > allDiffs; + LinearAlgebra linear; + + //for each subclass comparision + map >::iterator itB; + for(map >::iterator it=class2SubClasses.begin();it!=class2SubClasses.end();it++){ + itB = it;itB++; + for(itB;itB!=class2SubClasses.end();itB++){ + if (m->control_pressed) { return false; } + bool first = true; + int dirCmp = 0; // not set?? dir_cmp = "not_set" # 0=notset or none, 1=true, 2=false. + int curv_sign = 0; + int ok = 0; + int count = 0; + for (set::iterator itClass1 = (it->second).begin(); itClass1 != (it->second).end(); itClass1++) { + bool br = false; + for (set::iterator itClass2 = (itB->second).begin(); itClass2 != (itB->second).end(); itClass2++) { + string subclass1 = *itClass1; + string subclass2 = *itClass2; + count++; + + if (m->debug) { m->mothurOut( "[DEBUG comparing " + it->first + "-" + *itClass1 + " to " + itB->first + "-" + *itClass2 + "\n"); } + + string treatment1 = subclass2Class[subclass1]; + string treatment2 = subclass2Class[subclass2]; + int numSubs1 = class2SubClasses[treatment1].size(); + int numSubs2 = class2SubClasses[treatment2].size(); + + //if mul_cor != 0: alpha_mtc = th*l_subcl1*l_subcl2 if mul_cor == 2 else 1.0-math.pow(1.0-th,l_subcl1*l_subcl2) + if (strict != 0) { alphaMtc = wilcoxonAlpha * numSubs1 * numSubs2 ; } + if (strict == 2) {}else{ alphaMtc = 1.0-pow((1.0-wilcoxonAlpha),(double)(numSubs1 * numSubs2)); } + + //fill x and y with this comparisons data + vector x; vector y; + + //fill x and y + vector xIndexes = subClass2GroupIndex[subclass1]; //indexes in lookup for this subclass + vector yIndexes = subClass2GroupIndex[subclass2]; //indexes in lookup for this subclass + for (int k = 0; k < yIndexes.size(); k++) { y.push_back(abunds[yIndexes[k]]); } + for (int k = 0; k < xIndexes.size(); k++) { x.push_back(abunds[xIndexes[k]]); } + + // med_comp = False + //if len(cl1) < min_c or len(cl2) < min_c: + //med_comp = True + bool medComp = false; // are there enough samples per subclass + if ((xIndexes.size() < minC) || (yIndexes.size() < minC)) { medComp = true; } + + double sx = m->median(x); + double sy = m->median(y); + + //if cl1[0] == cl2[0] and len(set(cl1)) == 1 and len(set(cl2)) == 1: + //tres, first = False, False + double pValue = 0.0; + double H = 0.0; + bool tres = true; //don't think this is set in the python source. Not sure how that is handled, but setting it here. + if ((x[0] == y[0]) && (x.size() == 1) && (y.size() == 1)) { tres = false; first = false; } + else if (!medComp) { + H = linear.calcWilcoxon(x, y, pValue); + if (pValue < (alphaMtc*2.0)) { tres = true; } + else { tres = false; } + } + /*if first: + first = False + if not curv and ( med_comp or tres ): + dir_cmp = sx < sy + if sx == sy: br = True + elif curv: + dir_cmp = None + if med_comp or tres: + curv_sign += 1 + dir_cmp = sx < sy + else: br = True + elif not curv and med_comp: + if ((sx < sy) != dir_cmp or sx == sy): br = True + elif curv: + if tres and dir_cmp == None: + curv_sign += 1 + dir_cmp = sx < sy + if tres and dir_cmp != (sx < sy): + br = True + curv_sign = -1 + elif not tres or (sx < sy) != dir_cmp or sx == sy: br = True + */ + int sxSy = 2; //false + if (sx 0) { diff = true; } } //if curv: diff = curv_sign > 0 + else { //else: diff = (ok == len(cl_hie[pair[1]])*len(cl_hie[pair[0]])) + diff = false; + if (ok == count) { diff = true; } + } + if (diff) { totalOk++; } + if (!diff && (multiClassStrat == "onevone")) { return false; } + if (diff && (multiClassStrat == "onevall")) { //all_diff.append(pair) + set pair; pair.insert(it->first); pair.insert(itB->first); + allDiffs.push_back(pair); + } + }//classes + }//classes + + if (multiClassStrat == "onevall") { + int tot_k = class2SubClasses.size(); + for(map >::iterator it=class2SubClasses.begin();it!=class2SubClasses.end();it++){ + if (m->control_pressed) { return false; } + int nk = 0; + //is this class okay in all comparisons + for (int h = 0; h < allDiffs.size(); h++) { + if (allDiffs[h].count(it->first) != 0) { nk++; } + } + if (nk == (tot_k-1)) { return true; }//if nk == tot_k-1: return True } - else { - subclassCounts[thisSub]++; - subClass2GroupIndex[thisSub].push_back(j); - if (it->second != treatment) { - error = true; - m->mothurOut("[ERROR]: subclass " + thisSub + " has members in " + it->second + " and " + treatment + ". Subclass members must be from the same class. Ignoring wilcoxon.\n"); + return false; + } + + return true; + } + catch(exception& e) { + m->errorOut(e, "LefseCommand", "testOTUWilcoxon"); + exit(1); + } +} +//********************************************************************************************************************** +//modelled after lefse.py test_lda_r function +map LefseCommand::testLDA(vector& lookup, map bins, map >& class2GroupIndex, map >& subClass2GroupIndex) { + try { + map sigOTUS; + map::iterator it; + LinearAlgebra linear; + + int numBins = lookup[0]->getNumBins(); + vector< vector > adjustedLookup; + + for (int i = 0; i < numBins; i++) { + if (m->control_pressed) { break; } + + if (m->debug) { m->mothurOut("[DEBUG]: bin = " + toString(i) + "\n."); } + + it = bins.find(i); + if (it != bins.end()) { //flagged in Kruskal Wallis and Wilcoxon(if we ran it) + + if (m->debug) { m->mothurOut("[DEBUG]:flagged bin = " + toString(i) + "\n."); } + + //fill x with this OTUs abundances + vector x; + for (int j = 0; j < lookup.size(); j++) { x.push_back(lookup[j]->getAbundance(i)); } + + //go through classes + for (map >::iterator it = class2GroupIndex.begin(); it != class2GroupIndex.end(); it++) { + + if (m->debug) { m->mothurOut("[DEBUG]: class = " + it->first + "\n."); } + + //max(float(feats['class'].count(c))*0.5,4) + //max(numGroups in this class*0.5, 4.0) + double necessaryNum = ((double)((it->second).size())*0.5); + if (4.0 > necessaryNum) { necessaryNum = 4.0; } + + set uniques; + for (int j = 0; j < (it->second).size(); j++) { uniques.insert(x[(it->second)[j]]); } + + //if len(set([float(v[1]) for v in ff if v[0] == c])) > max(float(feats['class'].count(c))*0.5,4): continue + if ((double)(uniques.size()) > necessaryNum) { } + else { + //feats[k][i] = math.fabs(feats[k][i] + lrand.normalvariate(0.0,max(feats[k][i]*0.05,0.01))) + for (int j = 0; j < (it->second).size(); j++) { //(it->second) contains indexes of abundance for this class + double sigma = max((x[(it->second)[j]]*0.05), 0.01); + x[(it->second)[j]] = abs(x[(it->second)[j]] + linear.normalvariate(0.0, sigma)); + } + } } + adjustedLookup.push_back(x); } } + + //go through classes + int minCl = 1e6; + map indexToClass; + vector classes; + for (map >::iterator it = class2GroupIndex.begin(); it != class2GroupIndex.end(); it++) { + //class with minimum number of groups + if ((it->second).size() < minCl) { minCl = (it->second).size(); } + for (int i = 0; i < (it->second).size(); i++) { indexToClass[(it->second)[i]] = it->first; } + classes.push_back(it->first); + } + + int numGroups = lookup.size(); //lfk + int fractionNumGroups = numGroups * fBoots; //rfk + minCl = (int)((float)(minCl*fBoots*fBoots*0.05)); + minCl = max(minCl, 1); - if (error) { return significantOtuLabels; } - else { //check counts to make sure subclasses are the same size - set counts; - for (map::iterator it = subclassCounts.begin(); it != subclassCounts.end(); it++) { counts.insert(it->second); } - if (counts.size() > 1) { m->mothurOut("[ERROR]: subclasses must be the same size. Ignoring wilcoxon.\n"); - return significantOtuLabels; } + if (m->debug) { m->mothurOut("[DEBUG]: about to start iters. \n."); } + + vector< vector< vector > > results;//[iters][numComparison][numOTUs] + for (int j = 0; j < iters; j++) { + if (m->control_pressed) { return sigOTUS; } + + if (m->debug) { m->mothurOut("[DEBUG]: iter = " + toString(j) + "\n."); } + + //find "good" random vector + vector rand_s; + for (int h = 0; h < 1000; h++) { //generate a vector of length fractionNumGroups with range 0 to numGroups-1 + rand_s.clear(); + for (int k = 0; k < fractionNumGroups; k++) { rand_s.push_back(m->getRandomIndex(numGroups-1)); } + if (!contastWithinClassesOrFewPerClass(adjustedLookup, rand_s, minCl, class2GroupIndex, indexToClass)) { h+=1000; } //break out of loop + } + if (m->control_pressed) { return sigOTUS; } + + //print data in R input format for testing + if (false) { + vector groups; for (int h = 0; h < rand_s.size(); h++) { groups.push_back(lookup[rand_s[h]]->getGroup()); } + printToCoutForRTesting(adjustedLookup, rand_s, class2GroupIndex, bins, subClass2GroupIndex, groups); + } + + //for each pair of classes + vector< vector > temp = lda(adjustedLookup, rand_s, indexToClass, classes); //[numComparison][numOTUs] + if (temp.size() != 0) { results.push_back(temp); } + } + + if (m->control_pressed) { return sigOTUS; } + + //m = max([numpy.mean([means[k][kk][p] for kk in range(boots)]) for p in range(len(pairs))]) + int k = 0; + for (it = bins.begin(); it != bins.end(); it++) { //[numOTUs] - need to go through bins so we can tie adjustedLookup back to the binNumber. adjustedLookup[0] ->bins entry[0]. + vector averageForEachComparison; averageForEachComparison.resize(results[0].size(), 0.0); + double maxM = 0.0; //max of averages for each comparison + for (int j = 0; j < results[0].size(); j++) { //numComparisons + for (int i = 0; i < results.size(); i++) { //iters + averageForEachComparison[j]+= results[i][j][k]; + } + averageForEachComparison[j] /= (double) results.size(); + if (averageForEachComparison[j] > maxM) { maxM = averageForEachComparison[j]; } + } + //res[k] = math.copysign(1.0,m)*math.log(1.0+math.fabs(m),10) + double multiple = 1.0; if (maxM < 0.0) { multiple = -1.0; } + double resK = multiple * log10(1.0+abs(maxM)); + if (resK > ldaThreshold) { sigOTUS[it->first] = resK; } + k++; } + return sigOTUS; + } + catch(exception& e) { + m->errorOut(e, "LefseCommand", "testLDA"); + exit(1); + } +} +//********************************************************************************************************************** +vector< vector > LefseCommand::getMeans(vector& lookup, map >& class2GroupIndex) { + try { int numBins = lookup[0]->getNumBins(); - vector comp; - //find comparisons and fill comp - map::iterator itB; - for(map::iterator it=subclassCounts.begin();it!=subclassCounts.end();it++){ - itB = it;itB++; - for(itB;itB!=subclassCounts.end();itB++){ - compGroup temp(it->first,itB->first); - comp.push_back(temp); - } + int numClasses = class2GroupIndex.size(); + vector< vector > means; //[numOTUS][classes] + means.resize(numBins); + for (int i = 0; i < means.size(); i++) { means[i].resize(numClasses, 0.0); } + + map indexToClass; + int count = 0; + //shortcut for vectors below + map quickIndex; + vector classCounts; + for (map >::iterator it = class2GroupIndex.begin(); it != class2GroupIndex.end(); it++) { + for (int i = 0; i < (it->second).size(); i++) { indexToClass[(it->second)[i]] = it->first; } + quickIndex[it->first] = count; count++; + classCounts.push_back((it->second).size()); + } + + for (int i = 0; i < numBins; i++) { + for (int j = 0; j < lookup.size(); j++) { + if (m->control_pressed) { return means; } + means[i][quickIndex[indexToClass[j]]] += lookup[j]->getAbundance(i); + } + } + + for (int i = 0; i < numBins; i++) { + for (int j = 0; j < numClasses; j++) { means[i][j] /= (double) classCounts[j]; } + } + + return means; + } + catch(exception& e) { + m->errorOut(e, "LefseCommand", "getMeans"); + exit(1); + } +} +//********************************************************************************************************************** +vector< vector > LefseCommand::lda(vector< vector >& adjustedLookup, vector rand_s, map& indexToClass, vector classes) { + try { + //shortcut for vectors below + map quickIndex; + for (int i = 0; i < classes.size(); i++) { quickIndex[classes[i]] = i; } + + vector randClass; //classes for rand sample + vector counts; counts.resize(classes.size(), 0); + for (int i = 0; i < rand_s.size(); i++) { + string thisClass = indexToClass[rand_s[i]]; + randClass.push_back(thisClass); + counts[quickIndex[thisClass]]++; } - int numComp = comp.size(); - if (numComp < 2) { m->mothurOut("[ERROR]: Need at least 2 subclasses, Ignoring Wilcoxon.\n"); - return significantOtuLabels; } + vector< vector > a; //[numOTUs][numSampled] + for (int i = 0; i < adjustedLookup.size(); i++) { + vector temp; + for (int j = 0; j < rand_s.size(); j++) { + temp.push_back(adjustedLookup[i][rand_s[j]]); + } + a.push_back(temp); + } + + LinearAlgebra linear; + vector< vector > means; bool ignore; + vector< vector > scaling = linear.lda(a, randClass, means, ignore); //means are returned sorted, quickIndex sorts as well since it uses a map. means[class][otu] = + if (ignore) { scaling.clear(); return scaling; } + if (m->control_pressed) { return scaling; } + + vector< vector > w; w.resize(a.size()); //w.unit <- w/sqrt(sum(w^2)) + double denom = 0.0; + for (int i = 0; i < scaling.size(); i++) { w[i].push_back(scaling[i][0]); denom += (w[i][0]*w[i][0]); } + denom = sqrt(denom); + for (int i = 0; i < w.size(); i++) { w[i][0] /= denom; } //[numOTUs][1] - w.unit + + //robjects.r('LD <- xy.matrix%*%w.unit') [numSampled][numOtus] * [numOTUs][1] + vector< vector > LD = linear.matrix_mult(linear.transpose(a), w); + + //find means for each groups LDs + vector LDMeans; LDMeans.resize(classes.size(), 0.0); //means[0] -> average for [group0]. + for (int i = 0; i < LD.size(); i++) { LDMeans[quickIndex[randClass[i]]] += LD[i][0]; } + for (int i = 0; i < LDMeans.size(); i++) { LDMeans[i] /= (double) counts[i]; } + + //calculate for each comparisons i.e. with groups A,B,C = AB, AC, BC = 3; + vector< vector > results;// [numComparison][numOTUs] + for (int i = 0; i < LDMeans.size(); i++) { + for (int l = 0; l < i; l++) { + if (m->control_pressed) { return scaling; } + //robjects.r('effect.size <- abs(mean(LD[sub_d[,"class"]=="'+p[0]+'"]) - mean(LD[sub_d[,"class"]=="'+p[1]+'"]))') + double effectSize = abs(LDMeans[i] - LDMeans[l]); + //scal = robjects.r('wfinal <- w.unit * effect.size') + vector compResults; + for (int j = 0; j < w.size(); j++) { //[numOTUs][1] + //coeff = [abs(float(v)) if not math.isnan(float(v)) else 0.0 for v in scal] + double coeff = abs(w[j][0]*effectSize); if (isnan(coeff) || isinf(coeff)) { coeff = 0.0; } + //gm = abs(res[p[0]][j] - res[p[1]][j]) - res is the means for each group for each otu + double gm = abs(means[i][j] - means[l][j]); + //means[k][i].append((gm+coeff[j])*0.5) + compResults.push_back((gm+coeff)*0.5); + } + results.push_back(compResults); + } + } + + return results; + } + catch(exception& e) { + m->errorOut(e, "LefseCommand", "lda"); + exit(1); + } +} + +//********************************************************************************************************************** +//modelled after lefse.py contast_within_classes_or_few_per_class function +bool LefseCommand::contastWithinClassesOrFewPerClass(vector< vector >& lookup, vector rands, int minCl, map > class2GroupIndex, map indexToClass) { + try { + set cls; + int countFound = 0; + + for (int i = 0; i < rands.size(); i++) { //fill cls with the classes represented in the random selection + for (map >::iterator it = class2GroupIndex.begin(); it != class2GroupIndex.end(); it++) { + if (m->inUsersGroups(rands[i], (it->second))) { + cls.insert(it->first); + countFound++; + } + } + } + + //sanity check + if (rands.size() != countFound) { m->mothurOut("oops, should never get here, missing something.\n"); } + + if (cls.size() < class2GroupIndex.size()) { return true; } //some classes are not present in sampling + + for (set::iterator it = cls.begin(); it != cls.end(); it++) { + if (cls.count(*it) < minCl) { return true; } //this sampling has class count below minimum + } + //for this otu + int numBins = lookup.size(); + for (int i = 0; i < numBins; i++) { + if (m->control_pressed) { break; } + + //break up random sampling by class + map > class2Values; //maps class name -> set of abunds present in random sampling. F003Early -> 0.001, 0.003... + for (int j = 0; j < rands.size(); j++) { + class2Values[indexToClass[rands[j]]].insert(lookup[i][rands[j]]); + //rands[j] = index of randomly selected group in lookup, randIndex2Class[rands[j]] = class this group belongs to. lookup[rands[j]]->getAbundance(i) = abundance of this group for this OTU. + } + //are the unique values less than we want + //if (len(set(col)) <= min_cl and min_cl > 1) or (min_cl == 1 and len(set(col)) <= 1): + for (map >::iterator it = class2Values.begin(); it != class2Values.end(); it++) { + if (((it->second).size() <= minCl && minCl > 1) || (minCl == 1 && (it->second).size() <= 1)) { return true; } + } + } + + return false; + } + catch(exception& e) { + m->errorOut(e, "LefseCommand", "contastWithinClassesOrFewPerClass"); + exit(1); + } +} +//********************************************************************************************************************** +int LefseCommand::printResults(vector< vector > means, map sigKW, map sigLDA, string label, vector classes) { + try { map variables; variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile)); - variables["[distance]"] = lookup[0]->getLabel(); - string outputFileName = getOutputFileName("wilcoxon",variables); + variables["[distance]"] = label; + string outputFileName = getOutputFileName("summary",variables); + ofstream out; + m->openOutputFile(outputFileName, out); + outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); - ofstream out; - m->openOutputFile(outputFileName, out); - outputNames.push_back(outputFileName); outputTypes["wilcoxon"].push_back(outputFileName); - out << "OTULabel\tComparision\tWilcoxon\tPvalue\n"; + //output headers + out << "OTU\tLogMaxMean\tClass\tLDA\tpValue\n"; - LinearAlgebra linear; - for (int i = 0; i < numBins; i++) { + string temp = ""; + for (int i = 0; i < means.size(); i++) { //[numOTUs][classes] + //find max mean of classes + double maxMean = -1.0; string maxClass = "none"; + for (int j = 0; j < means[i].size(); j++) { if (means[i][j] > maxMean) { maxMean = means[i][j]; maxClass = classes[j]; } } + + //str(math.log(max(max(v),1.0),10.0)) + double logMaxMean = 1.0; + if (maxMean > logMaxMean) { logMaxMean = maxMean; } + logMaxMean = log10(logMaxMean); + + out << m->currentSharedBinLabels[i] << '\t' << logMaxMean << '\t'; + if (m->debug) { temp = m->currentSharedBinLabels[i] + '\t' + toString(logMaxMean) + '\t'; } + + map::iterator it = sigLDA.find(i); + if (it != sigLDA.end()) { + out << maxClass << '\t' << it->second << '\t' << sigKW[i] << endl; //sigLDA is a subset of sigKW so no need to look + if (m->debug) { temp += maxClass + '\t' + toString(it->second) + '\t' + toString(sigKW[i]) + '\n'; m->mothurOut(temp); temp = ""; } + }else { out << '-' << endl; } + } + + out.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "LefseCommand", "printResults"); + exit(1); + } +} +//********************************************************************************************************************** +//printToCoutForRTesting(adjustedLookup, rand_s, class2GroupIndex, numBins); +bool LefseCommand::printToCoutForRTesting(vector< vector >& adjustedLookup, vector rand_s, map >& class2GroupIndex, map bins, map >& subClass2GroupIndex, vector groups) { + try { + cout << "rand_s = "; + for (int h = 0; h < rand_s.size(); h++) { cout << rand_s[h] << '\t'; } cout << endl; + + //print otu data + int count = 0; + for (map::iterator it = bins.begin(); it != bins.end(); it++) { + if (m->control_pressed) { break; } + + cout << m->currentSharedBinLabels[it->first] << " <- c("; + for (int h = 0; h < rand_s.size()-1; h++) { cout << (adjustedLookup[count][rand_s[h]]) << ", "; } + cout << (adjustedLookup[count][rand_s[rand_s.size()-1]]) << ")\n"; + count++; + } + /* + string tempOutput = ""; + for (int h = 0; h < rand_s.size(); h++) { + //find class this index is in + for (map >::iterator it = class2GroupIndex.begin(); it!= class2GroupIndex.end(); it++) { + if (m->inUsersGroups(rand_s[h], (it->second)) ) { cout << (h+1) << " <- c(\"" +it->first + "\")\n" ; } + } + }*/ + + + string tempOutput = "treatments <- c("; + for (int h = 0; h < rand_s.size(); h++) { + //find class this index is in + for (map >::iterator it = class2GroupIndex.begin(); it!= class2GroupIndex.end(); it++) { + if (m->inUsersGroups(rand_s[h], (it->second)) ) { tempOutput += "\"" +it->first + "\"" + ","; } //"\"" +it->first + "\"" + } + } + tempOutput = tempOutput.substr(0, tempOutput.length()-1); + tempOutput += ")\n"; + cout << tempOutput; + + /* + if (subclass != "") { + string tempOutput = "sub <- c("; + for (int h = 0; h < rand_s.size(); h++) { + //find class this index is in + for (map >::iterator it = subClass2GroupIndex.begin(); it!= subClass2GroupIndex.end(); it++) { + if (m->inUsersGroups(rand_s[h], (it->second)) ) { tempOutput += "\"" +it->first + "\"" + ','; } + } + } + tempOutput = tempOutput.substr(0, tempOutput.length()-1); + tempOutput += ")\n"; + cout << tempOutput; + } + + if (subject) { + string tempOutput = "group <- c("; + for (int h = 0; h < groups.size(); h++) { + tempOutput += "\"" +groups[h] + "\"" + ','; + } + tempOutput = tempOutput.substr(0, tempOutput.length()-1); + tempOutput += ")\n"; + cout << tempOutput; + }*/ + + + //print data frame + tempOutput = "dat <- data.frame("; + for (map::iterator it = bins.begin(); it != bins.end(); it++) { if (m->control_pressed) { break; } - if (m->inUsersGroups(i, bins)) { //flagged in Kruskal Wallis + tempOutput += "\"" + m->currentSharedBinLabels[it->first] + "\"=" + m->currentSharedBinLabels[it->first] + ","; + } + //tempOutput = tempOutput.substr(0, tempOutput.length()-1); + tempOutput += " class=treatments"; + //if (subclass != "") { tempOutput += ", subclass=sub"; } + //if (subject) { tempOutput += ", subject=group"; } + tempOutput += ")\n"; + cout << tempOutput; - bool sig = false; - //for each subclass comparision - for (int j = 0; j < numComp; j++) { - //fill x and y with this comparisons data - vector x; vector y; - - //fill x and y - vector xIndexes = subClass2GroupIndex[comp[j].group1]; //indexes in lookup for this subclass - for (int k = 0; k < xIndexes.size(); k++) { x.push_back(lookup[xIndexes[k]]->getAbundance(i)); } - - vector yIndexes = subClass2GroupIndex[comp[j].group2]; //indexes in lookup for this subclass - for (int k = 0; k < yIndexes.size(); k++) { y.push_back(lookup[yIndexes[k]]->getAbundance(i)); } - - double pValue = 0.0; - double H = linear.calcWilcoxon(x, y, pValue); + tempOutput = "z <- suppressWarnings(mylda(as.formula(class ~ "; + for (map::iterator it = bins.begin(); it != bins.end(); it++) { + if (m->control_pressed) { break; } - //output H and signifigance - out << m->currentBinLabels[i] << '\t' << comp[j].getCombo() << '\t' << H << '\t' << pValue << endl; - - //set sig - not sure how yet - } - if (sig) { significantOtuLabels.push_back(i); } + tempOutput += m->currentSharedBinLabels[it->first] + "+"; + } + tempOutput = tempOutput.substr(0, tempOutput.length()-1); //rip off extra plus sign + tempOutput += "), data = dat, tol = 1e-10))"; + cout << tempOutput + "\nz\n"; + cout << "w <- z$scaling[,1]\n"; //robjects.r('w <- z$scaling[,1]') + cout << "w.unit <- w/sqrt(sum(w^2))\n"; //robjects.r('w.unit <- w/sqrt(sum(w^2))') + cout << "ss <- dat[,-match(\"class\",colnames(dat))]\n"; //robjects.r('ss <- sub_d[,-match("class",colnames(sub_d))]') + //if (subclass != "") { cout << "ss <- ss[,-match(\"subclass\",colnames(ss))]\n"; }//robjects.r('ss <- ss[,-match("subclass",colnames(ss))]') + //if (subject) { cout << "ss <- ss[,-match(\"subject\",colnames(ss))]\n"; }//robjects.r('ss <- ss[,-match("subject",colnames(ss))]') + cout << "xy.matrix <- as.matrix(ss)\n"; //robjects.r('xy.matrix <- as.matrix(ss)') + cout << "LD <- xy.matrix%*%w.unit\n"; //robjects.r('LD <- xy.matrix%*%w.unit') + cout << "effect.size <- abs(mean(LD[dat[,\"class\"]==\"'+p[0]+'\"]) - mean(LD[dat[,\"class\"]==\"'+p[1]+'\"]))\n"; //robjects.r('effect.size <- abs(mean(LD[sub_d[,"class"]=="'+p[0]+'"]) - mean(LD[sub_d[,"class"]=="'+p[1]+'"]))') + cout << "wfinal <- w.unit * effect.size\n"; //scal = robjects.r('wfinal <- w.unit * effect.size') + cout << "mm <- z$means\n"; //rres = robjects.r('mm <- z$means') + + return true; + } + catch(exception& e) { + m->errorOut(e, "LefseCommand", "printToCoutForRTesting"); + exit(1); + } +} +//********************************************************************************************************************** +int LefseCommand::makeShared(int numDesignLines) { + try { + ifstream in; + m->openInputFile(sharedfile, in); + vector< vector > lines; + for(int i = 0; i < numDesignLines; i++) { + if (m->control_pressed) { return 0; } + + string line = m->getline(in); + cout << line << endl; + vector pieces = m->splitWhiteSpace(line); + lines.push_back(pieces); + } + + ofstream out; + m->openOutputFile(sharedfile+".design", out); out << "group" << '\t'; + for (int j = 0; j < lines.size(); j++) { out << lines[j][0] << '\t'; } out << endl; + for (int j = 1; j < lines[0].size(); j++) { + out <<(j-1) << '\t'; + for (int i = 0; i < lines.size(); i++) { + out << lines[i][j] << '\t'; } + out << endl; } out.close(); + DesignMap design(sharedfile+".design"); - return significantOtuLabels; + vector lookup; + for (int k = 0; k < lines[0].size()-1; k++) { + SharedRAbundFloatVector* temp = new SharedRAbundFloatVector(); + temp->setLabel("0.03"); + temp->setGroup(toString(k)); + lookup.push_back(temp); + } + + m->currentSharedBinLabels.clear(); + int count = 0; + while (!in.eof()) { + if (m->control_pressed) { return 0; } + + string line = m->getline(in); + vector pieces = m->splitWhiteSpace(line); + + float sum = 0.0; + for (int i = 1; i < pieces.size(); i++) { + float value; m->mothurConvert(pieces[i], value); + sum += value; + } + + if (sum != 0.0) { + //cout << count << '\t'; + for (int i = 1; i < pieces.size(); i++) { + float value; m->mothurConvert(pieces[i], value); + lookup[i-1]->push_back(value, toString(i-1)); + //cout << pieces[i] << '\t'; + } + m->currentSharedBinLabels.push_back(toString(count)); + //m->currentBinLabels.push_back(pieces[0]); + //cout << line<< endl; + //cout << endl; + } + count++; + } + in.close(); + + for (int k = 0; k < lookup.size(); k++) { + //cout << "0.03" << '\t' << toString(k) << endl; lookup[k]->print(cout); + } + + process(lookup, design); + + return 0; } catch(exception& e) { - m->errorOut(e, "LefseCommand", "runWilcoxon"); + m->errorOut(e, "LefseCommand", "printToCoutForRTesting"); exit(1); } }