X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=lefsecommand.cpp;h=0403c910705313432fa7320150326927d03c783b;hp=3575e021fd754b5d4baf897893a0633372aef69f;hb=499f4ac6e321f9f03d4c3aa25c3b6880892c8b83;hpb=3a5dd9e428ab93a6dcdce7912e8ebb977be0b893 diff --git a/lefsecommand.cpp b/lefsecommand.cpp index 3575e02..0403c91 100644 --- a/lefsecommand.cpp +++ b/lefsecommand.cpp @@ -17,7 +17,7 @@ vector LefseCommand::setParameters(){ CommandParameter pclass("class", "String", "", "", "", "", "","",false,false); parameters.push_back(pclass); CommandParameter psubclass("subclass", "String", "", "", "", "", "","",false,false); parameters.push_back(psubclass); CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); - CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses); + //CommandParameter pclasses("classes", "String", "", "", "", "", "","",false,false); parameters.push_back(pclasses); CommandParameter palpha("aalpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(palpha); CommandParameter pwalpha("walpha", "Number", "", "0.05", "", "", "","",false,false); parameters.push_back(pwalpha); @@ -58,7 +58,7 @@ string LefseCommand::getHelpString(){ try { string helpString = ""; helpString += "The lefse command allows you to ....\n"; - helpString += "The lefse command parameters are: shared, design, class, subclass, label, classes, walpha, aalpha, lda, wilc, iters, curv, fboots, strict, minc, multiclass and norm.\n"; + helpString += "The lefse command parameters are: shared, design, class, subclass, label, walpha, aalpha, lda, wilc, iters, curv, fboots, strict, minc, multiclass and norm.\n"; helpString += "The class parameter is used to indicate the which category you would like used for the Kruskal Wallis analysis. If none is provided first category is used.\n"; helpString += "The subclass parameter is used to indicate the .....If none is provided, second category is used, or if only one category subclass is ignored. \n"; helpString += "The aalpha parameter is used to set the alpha value for the Krukal Wallis Anova test Default=0.05. \n"; @@ -67,13 +67,13 @@ string LefseCommand::getHelpString(){ helpString += "The wilc parameter is used to indicate whether to perform the Wilcoxon test. Default=T. \n"; helpString += "The iters parameter is used to set the number of bootstrap iteration for LDA. Default=30. \n"; //helpString += "The wilcsamename parameter is used to indicate whether perform the wilcoxon test only among the subclasses with the same name. Default=F. \n"; - helpString += "The curv parameter is used to set whether perform the wilcoxon test ing the Curtis's approach [BETA VERSION] Default=F. \n"; + helpString += "The curv parameter is used to set whether perform the wilcoxon testing the Curtis's approach [BETA VERSION] Default=F. \n"; helpString += "The norm parameter is used to multiply relative abundances by 1000000. Recommended when very low values are present. Default=T. \n"; helpString += "The fboots parameter is used to set the subsampling fraction value for each bootstrap iteration. Default=0.67. \n"; helpString += "The strict parameter is used to set the multiple testing correction options. 0 no correction (more strict, default), 1 correction for independent comparisons, 2 correction for independent comparison. Options = 0,1,2. Default=0. \n"; helpString += "The minc parameter is used to minimum number of samples per subclass for performing wilcoxon test. Default=10. \n"; helpString += "The multiclass parameter is used to (for multiclass tasks) set whether the test is performed in a one-against-one ( onevone - more strict!) or in a one-against-all setting ( onevall - less strict). Default=onevall. \n"; - helpString += "The classes parameter is used to indicate the classes you would like to use. Classes should be inputted in the following format: classes=label-label. For example to include groups from treatment with value early or late and age= young or old. class=treatment-age.\n"; + //helpString += "The classes parameter is used to indicate the classes you would like to use. Classes should be inputted in the following format: classes=label-label. For example to include groups from treatment with value early or late and age= young or old. class=treatment-age.\n"; helpString += "The label parameter is used to indicate which distances in the shared file you would like to use. labels are separated by dashes.\n"; helpString += "The lefse command should be in the following format: lefse(shared=final.an.shared, design=final.design, class=treatment, subclass=age).\n"; return helpString; @@ -199,9 +199,6 @@ LefseCommand::LefseCommand(string option) { subclass = validParameter.validFile(parameters, "subclass", false); if (subclass == "not found") { subclass = mclass; } - classes = validParameter.validFile(parameters, "classes", false); - if (classes == "not found") { classes = ""; } - string temp = validParameter.validFile(parameters, "aalpha", false); if (temp == "not found") { temp = "0.05"; } m->mothurConvert(temp, anovaAlpha); @@ -266,22 +263,16 @@ LefseCommand::LefseCommand(string option) { int LefseCommand::execute(){ try { + srand(1982); //for reading lefse formatted file and running in mothur for testing - pass number of rows used for design file if (false) { makeShared(1); exit(1); } if (abort == true) { if (calledHelp) { return 0; } return 2; } DesignMap designMap(designfile); - //if user set classes set groups=those classes - if (classes != "") { - map > thisClasses = m->parseClasses(classes); - vector groups = designMap.getNamesUnique(thisClasses); - if (groups.size() != 0) { m->setGroups(groups); } - else { m->mothurOut("[ERROR]: no groups meet your classes requirement, quitting.\n"); return 0; } - } //if user did not select class use first column - if (mclass == "") { mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); } + if (mclass == "") { mclass = designMap.getDefaultClass(); m->mothurOut("\nYou did not provide a class, using " + mclass +".\n\n"); if (subclass == "") { subclass = mclass; } } InputData input(sharedfile, "sharedfile"); vector lookup = input.getSharedRAbundFloatVectors(); @@ -365,6 +356,7 @@ int LefseCommand::execute(){ m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } m->mothurOutEndLine(); + srand(time(NULL)); return 0; } @@ -383,8 +375,8 @@ int LefseCommand::process(vector& lookup, DesignMap& d map > class2SubClasses; //maps class name to vector of its subclasses map > subClass2GroupIndex; //maps subclass name to vector of indexes in lookup from that subclass. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old. Saves time below. map > class2GroupIndex; //maps subclass name to vector of indexes in lookup from that class. old -> 1,2,3 means groups in location 1,2,3 of lookup are from old. Saves time below. + if (normMillion) { normalize(lookup); } for (int j = 0; j < lookup.size(); j++) { - if (normMillion) { normalize(lookup); } string group = lookup[j]->getGroup(); string treatment = designMap.get(group, mclass); //get value for this group in this category string thisSub = designMap.get(group, subclass); @@ -426,12 +418,16 @@ int LefseCommand::process(vector& lookup, DesignMap& d int numSigBeforeWilcox = significantOtuLabels.size(); + if (m->debug) { m->mothurOut("[DEBUG]: completed Kruskal Wallis\n"); } + //check for subclass string wilcoxString = ""; if ((subclass != "") && wilc) { significantOtuLabels = runWilcoxon(lookup, designMap, significantOtuLabels, class2SubClasses, subClass2GroupIndex, subclass2Class); wilcoxString += " ( " + toString(numSigBeforeWilcox) + " ) before internal wilcoxon"; } int numSigAfterWilcox = significantOtuLabels.size(); + if (m->debug) { m->mothurOut("[DEBUG]: completed Wilcoxon\n"); } + m->mothurOut("\nNumber of significantly discriminative features: " + toString(numSigAfterWilcox) + wilcoxString + ".\n"); map sigOTUSLDA; @@ -441,6 +437,8 @@ int LefseCommand::process(vector& lookup, DesignMap& d } else { m->mothurOut("No features with significant differences between the classes.\n"); } + if (m->debug) { m->mothurOut("[DEBUG]: completed lda\n"); } + printResults(means, significantOtuLabels, sigOTUSLDA, lookup[0]->getLabel(), classes); return 0; @@ -461,7 +459,9 @@ int LefseCommand::normalize(vector& lookup) { } for (int i = 0; i < lookup.size(); i++) { - for (int j = 0; j < lookup[i]->getNumBins(); j++) { lookup[i]->set(j, lookup[i]->getAbundance(j)*mul[i], lookup[i]->getGroup()); } + for (int j = 0; j < lookup[i]->getNumBins(); j++) { + lookup[i]->set(j, lookup[i]->getAbundance(j)*mul[i], lookup[i]->getGroup()); + } } return 0; @@ -713,15 +713,22 @@ map LefseCommand::testLDA(vector& lookup, for (int i = 0; i < numBins; i++) { if (m->control_pressed) { break; } + if (m->debug) { m->mothurOut("[DEBUG]: bin = " + toString(i) + "\n."); } + it = bins.find(i); if (it != bins.end()) { //flagged in Kruskal Wallis and Wilcoxon(if we ran it) + if (m->debug) { m->mothurOut("[DEBUG]:flagged bin = " + toString(i) + "\n."); } + //fill x with this OTUs abundances vector x; for (int j = 0; j < lookup.size(); j++) { x.push_back(lookup[j]->getAbundance(i)); } //go through classes for (map >::iterator it = class2GroupIndex.begin(); it != class2GroupIndex.end(); it++) { + + if (m->debug) { m->mothurOut("[DEBUG]: class = " + it->first + "\n."); } + //max(float(feats['class'].count(c))*0.5,4) //max(numGroups in this class*0.5, 4.0) double necessaryNum = ((double)((it->second).size())*0.5); @@ -760,10 +767,14 @@ map LefseCommand::testLDA(vector& lookup, minCl = (int)((float)(minCl*fBoots*fBoots*0.05)); minCl = max(minCl, 1); + if (m->debug) { m->mothurOut("[DEBUG]: about to start iters. \n."); } + vector< vector< vector > > results;//[iters][numComparison][numOTUs] for (int j = 0; j < iters; j++) { if (m->control_pressed) { return sigOTUS; } + if (m->debug) { m->mothurOut("[DEBUG]: iter = " + toString(j) + "\n."); } + //find "good" random vector vector rand_s; for (int h = 0; h < 1000; h++) { //generate a vector of length fractionNumGroups with range 0 to numGroups-1 @@ -998,8 +1009,8 @@ int LefseCommand::printResults(vector< vector > means, map if (maxMean > logMaxMean) { logMaxMean = maxMean; } logMaxMean = log10(logMaxMean); - out << m->currentBinLabels[i] << '\t' << logMaxMean << '\t'; - if (m->debug) { temp = m->currentBinLabels[i] + '\t' + toString(logMaxMean) + '\t'; } + out << m->currentSharedBinLabels[i] << '\t' << logMaxMean << '\t'; + if (m->debug) { temp = m->currentSharedBinLabels[i] + '\t' + toString(logMaxMean) + '\t'; } map::iterator it = sigLDA.find(i); if (it != sigLDA.end()) { @@ -1029,7 +1040,7 @@ bool LefseCommand::printToCoutForRTesting(vector< vector >& adjustedLook for (map::iterator it = bins.begin(); it != bins.end(); it++) { if (m->control_pressed) { break; } - cout << m->currentBinLabels[it->first] << " <- c("; + cout << m->currentSharedBinLabels[it->first] << " <- c("; for (int h = 0; h < rand_s.size()-1; h++) { cout << (adjustedLookup[count][rand_s[h]]) << ", "; } cout << (adjustedLookup[count][rand_s[rand_s.size()-1]]) << ")\n"; count++; @@ -1085,7 +1096,7 @@ bool LefseCommand::printToCoutForRTesting(vector< vector >& adjustedLook for (map::iterator it = bins.begin(); it != bins.end(); it++) { if (m->control_pressed) { break; } - tempOutput += "\"" + m->currentBinLabels[it->first] + "\"=" + m->currentBinLabels[it->first] + ","; + tempOutput += "\"" + m->currentSharedBinLabels[it->first] + "\"=" + m->currentSharedBinLabels[it->first] + ","; } //tempOutput = tempOutput.substr(0, tempOutput.length()-1); tempOutput += " class=treatments"; @@ -1098,7 +1109,7 @@ bool LefseCommand::printToCoutForRTesting(vector< vector >& adjustedLook for (map::iterator it = bins.begin(); it != bins.end(); it++) { if (m->control_pressed) { break; } - tempOutput += m->currentBinLabels[it->first] + "+"; + tempOutput += m->currentSharedBinLabels[it->first] + "+"; } tempOutput = tempOutput.substr(0, tempOutput.length()-1); //rip off extra plus sign tempOutput += "), data = dat, tol = 1e-10))"; @@ -1157,7 +1168,7 @@ int LefseCommand::makeShared(int numDesignLines) { lookup.push_back(temp); } - m->currentBinLabels.clear(); + m->currentSharedBinLabels.clear(); int count = 0; while (!in.eof()) { if (m->control_pressed) { return 0; } @@ -1178,7 +1189,7 @@ int LefseCommand::makeShared(int numDesignLines) { lookup[i-1]->push_back(value, toString(i-1)); //cout << pieces[i] << '\t'; } - m->currentBinLabels.push_back(toString(count)); + m->currentSharedBinLabels.push_back(toString(count)); //m->currentBinLabels.push_back(pieces[0]); //cout << line<< endl; //cout << endl;