From cd7040a22ae19c86a13c2c10ed90a64b77a0c482 Mon Sep 17 00:00:00 2001 From: westcott Date: Tue, 28 Jun 2011 10:20:10 +0000 Subject: [PATCH] indicator command --- Mothur.xcodeproj/project.pbxproj | 4 +- chimerauchimecommand.cpp | 18 +- indicatorcommand.cpp | 501 +++++++++++++++++++++++++------ indicatorcommand.h | 10 +- mothurout.cpp | 15 + mothurout.h | 1 + myutils.cpp | 1 + 7 files changed, 437 insertions(+), 113 deletions(-) diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index d3030bb..e920d62 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -1431,8 +1431,6 @@ A7E9B6D712D37EC400DA6239 /* efron.cpp */, A7E9B6D812D37EC400DA6239 /* efron.h */, A7E9B6E212D37EC400DA6239 /* filters.h */, - A7E9B6E512D37EC400DA6239 /* fisher2.c */, - A7E9B6E612D37EC400DA6239 /* fisher2.h */, A7E9B6F012D37EC400DA6239 /* geom.cpp */, A7E9B6F112D37EC400DA6239 /* geom.h */, A7E9B70E12D37EC400DA6239 /* goodscoverage.cpp */, @@ -1731,6 +1729,8 @@ A7E9BA5612D39BD800DA6239 /* metastats */ = { isa = PBXGroup; children = ( + A7E9B6E512D37EC400DA6239 /* fisher2.c */, + A7E9B6E612D37EC400DA6239 /* fisher2.h */, A7E9B75512D37EC400DA6239 /* metastats.h */, A7E9B75612D37EC400DA6239 /* metastats2.c */, ); diff --git a/chimerauchimecommand.cpp b/chimerauchimecommand.cpp index 173675e..05ddaf6 100644 --- a/chimerauchimecommand.cpp +++ b/chimerauchimecommand.cpp @@ -32,7 +32,7 @@ vector ChimeraUchimeCommand::setParameters(){ CommandParameter pchunks("chunks", "Number", "", "4", "", "", "",false,false); parameters.push_back(pchunks); CommandParameter pminchunk("minchunk", "Number", "", "64", "", "", "",false,false); parameters.push_back(pminchunk); CommandParameter pidsmoothwindow("idsmoothwindow", "Number", "", "32", "", "", "",false,false); parameters.push_back(pidsmoothwindow); - CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid); + //CommandParameter pminsmoothid("minsmoothid", "Number", "", "0.95", "", "", "",false,false); parameters.push_back(pminsmoothid); CommandParameter pmaxp("maxp", "Number", "", "2", "", "", "",false,false); parameters.push_back(pmaxp); CommandParameter pskipgaps("skipgaps", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps); CommandParameter pskipgaps2("skipgaps2", "Boolean", "", "T", "", "", "",false,false); parameters.push_back(pskipgaps2); @@ -72,7 +72,7 @@ string ChimeraUchimeCommand::getHelpString(){ helpString += "The chunks parameter is the number of chunks to extract from the query sequence when searching for parents. Default 4.\n"; helpString += "The minchunk parameter is the minimum length of a chunk. Default 64.\n"; helpString += "The idsmoothwindow parameter is the length of id smoothing window. Default 32.\n"; - helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n"; + //helpString += "The minsmoothid parameter - minimum factional identity over smoothed window of candidate parent. Default 0.95.\n"; helpString += "The maxp parameter - maximum number of candidate parents to consider. Default 2. In tests so far, increasing maxp gives only a very small improvement in sensivity but tends to increase the error rate quite a bit.\n"; helpString += "The skipgaps parameter controls how gapped columns affect counting of diffs. If skipgaps is set to T, columns containing gaps do not found as diffs. Default = T.\n"; helpString += "The skipgaps2 parameter controls how gapped columns affect counting of diffs. If skipgaps2 is set to T, if column is immediately adjacent to a column containing a gap, it is not counted as a diff. Default = T.\n"; @@ -337,7 +337,7 @@ ChimeraUchimeCommand::ChimeraUchimeCommand(string option) { chunks = validParameter.validFile(parameters, "chunks", false); if (chunks == "not found") { useChunks = false; chunks = "4"; } else{ useChunks = true; } minchunk = validParameter.validFile(parameters, "minchunk", false); if (minchunk == "not found") { useMinchunk = false; minchunk = "64"; } else{ useMinchunk = true; } idsmoothwindow = validParameter.validFile(parameters, "idsmoothwindow", false); if (idsmoothwindow == "not found") { useIdsmoothwindow = false; idsmoothwindow = "32"; } else{ useIdsmoothwindow = true; } - minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; } + //minsmoothid = validParameter.validFile(parameters, "minsmoothid", false); if (minsmoothid == "not found") { useMinsmoothid = false; minsmoothid = "0.95"; } else{ useMinsmoothid = true; } maxp = validParameter.validFile(parameters, "maxp", false); if (maxp == "not found") { useMaxp = false; maxp = "2"; } else{ useMaxp = true; } minlen = validParameter.validFile(parameters, "minlen", false); if (minlen == "not found") { useMinlen = false; minlen = "10"; } else{ useMinlen = true; } maxlen = validParameter.validFile(parameters, "maxlen", false); if (maxlen == "not found") { useMaxlen = false; maxlen = "10000"; } else{ useMaxlen = true; } @@ -650,7 +650,7 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc cPara.push_back(tempIdsmoothwindow); } - if (useMinsmoothid) { + /*if (useMinsmoothid) { char* tempminsmoothid = new char[14]; //strcpy(tempminsmoothid, "--minsmoothid"); *tempminsmoothid = '\0'; strncat(tempminsmoothid, "--minsmoothid", 13); @@ -659,7 +659,7 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc *tempMinsmoothid = '\0'; strncat(tempMinsmoothid, minsmoothid.c_str(), minsmoothid.length()); //strcpy(tempMinsmoothid, minsmoothid.c_str()); cPara.push_back(tempMinsmoothid); - } + }*/ if (useMaxp) { char* tempmaxp = new char[7]; @@ -673,16 +673,16 @@ int ChimeraUchimeCommand::driver(string outputFName, string filename, string acc } if (!skipgaps) { - char* tempskipgaps = new char[15]; + char* tempskipgaps = new char[13]; //strcpy(tempskipgaps, "--[no]skipgaps"); - *tempskipgaps = '\0'; strncat(tempskipgaps, "--[no]skipgaps", 14); + *tempskipgaps = '\0'; strncat(tempskipgaps, "--noskipgaps", 12); cPara.push_back(tempskipgaps); } if (!skipgaps2) { - char* tempskipgaps2 = new char[16]; + char* tempskipgaps2 = new char[14]; //strcpy(tempskipgaps2, "--[no]skipgaps2"); - *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--[no]skipgaps2", 15); + *tempskipgaps2 = '\0'; strncat(tempskipgaps2, "--noskipgaps2", 13); cPara.push_back(tempskipgaps2); } diff --git a/indicatorcommand.cpp b/indicatorcommand.cpp index 966810b..b06742f 100644 --- a/indicatorcommand.cpp +++ b/indicatorcommand.cpp @@ -14,12 +14,13 @@ //********************************************************************************************************************** vector IndicatorCommand::setParameters(){ try { - CommandParameter pdesign("design", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pdesign); + CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters); + CommandParameter pdesign("design", "InputTypes", "", "", "TreeDesign", "TreeDesign", "none",false,false); parameters.push_back(pdesign); CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none",false,false); parameters.push_back(pshared); CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none",false,false); parameters.push_back(prelabund); CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups); CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); - CommandParameter ptree("tree", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(ptree); + CommandParameter ptree("tree", "InputTypes", "", "", "TreeDesign", "TreeDesign", "none",false,false); parameters.push_back(ptree); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); @@ -36,13 +37,14 @@ vector IndicatorCommand::setParameters(){ string IndicatorCommand::getHelpString(){ try { string helpString = ""; - helpString += "The indicator command reads a shared or relabund file and a tree file, and outputs a .indicator.tre and .indicator.summary file. \n"; + helpString += "The indicator command reads a shared or relabund file and a tree or design file, and outputs a .indicator.tre and .indicator.summary file. \n"; helpString += "The new tree contains labels at each internal node. The label is the node number so you can relate the tree to the summary file.\n"; helpString += "The summary file lists the indicator value for each OTU for each node.\n"; helpString += "The indicator command parameters are tree, groups, shared, relabund, design and label. The tree parameter is required as well as either shared or relabund.\n"; helpString += "The design parameter allows you to provide a design file to relate the tree to the shared or relabund file.\n"; helpString += "The groups parameter allows you to specify which of the groups in your shared or relabund you would like analyzed, or if you provide a design file the groups in your design file. The groups may be entered separated by dashes.\n"; helpString += "The label parameter indicates at what distance your tree relates to the shared or relabund.\n"; + helpString += "The iters parameter allows you to set number of randomization for the P value. The default is 1000."; helpString += "The indicator command should be used in the following format: indicator(tree=test.tre, shared=test.shared, label=0.03)\n"; helpString += "Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n"; return helpString; @@ -142,12 +144,9 @@ IndicatorCommand::IndicatorCommand(string option) { //check for required parameters treefile = validParameter.validFile(parameters, "tree", true); - if (treefile == "not open") { abort = true; } - else if (treefile == "not found") { //if there is a current design file, use it - treefile = m->getTreeFile(); - if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); } - else { m->mothurOut("You have no current tree file and the tree parameter is required."); m->mothurOutEndLine(); abort = true; } - }else { m->setTreeFile(treefile); } + if (treefile == "not open") { treefile = ""; abort = true; } + else if (treefile == "not found") { treefile = ""; } + else { m->setTreeFile(treefile); } sharedfile = validParameter.validFile(parameters, "shared", true); if (sharedfile == "not open") { abort = true; } @@ -160,7 +159,7 @@ IndicatorCommand::IndicatorCommand(string option) { else { inputFileName = relabundfile; m->setRelAbundFile(relabundfile); } designfile = validParameter.validFile(parameters, "design", true); - if (designfile == "not open") { abort = true; } + if (designfile == "not open") { designfile = ""; abort = true; } else if (designfile == "not found") { designfile = ""; } else { m->setDesignFile(designfile); } @@ -172,6 +171,9 @@ IndicatorCommand::IndicatorCommand(string option) { label = validParameter.validFile(parameters, "label", false); if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; } + string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; } + convert(temp, iters); + if ((relabundfile == "") && (sharedfile == "")) { //is there are current file available for either of these? //give priority to shared, then relabund @@ -187,7 +189,20 @@ IndicatorCommand::IndicatorCommand(string option) { } } - if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("You may not use both a shared and relabund file."); m->mothurOutEndLine(); abort = true; } + + if ((designfile == "") && (treefile == "")) { + treefile = m->getTreeFile(); + if (treefile != "") { m->mothurOut("Using " + treefile + " as input file for the tree parameter."); m->mothurOutEndLine(); } + else { + designfile = m->getDesignFile(); + if (designfile != "") { m->mothurOut("Using " + designfile + " as input file for the design parameter."); m->mothurOutEndLine(); } + else { + m->mothurOut("[ERROR]: You must provide either a tree or design file."); m->mothurOutEndLine(); abort = true; + } + } + } + + if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("[ERROR]: You may not use both a shared and relabund file."); m->mothurOutEndLine(); abort = true; } } } @@ -202,6 +217,8 @@ int IndicatorCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } + + cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); //read designfile if given and set up globaldatas groups for read of sharedfiles if (designfile != "") { @@ -236,101 +253,107 @@ int IndicatorCommand::execute(){ /***************************************************/ // reading tree info // /***************************************************/ - string groupfile = ""; - m->setTreeFile(treefile); - Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap - treeMap = new TreeMap(); - bool mismatch = false; - - for (int i = 0; i < m->Treenames.size(); i++) { - //sanity check - is this a group that is not in the sharedfile? - if (designfile == "") { - if (!(m->inUsersGroups(m->Treenames[i], m->namesOfGroups))) { - m->mothurOut("[ERROR]: " + m->Treenames[i] + " is not a group in your shared or relabund file."); m->mothurOutEndLine(); - mismatch = true; - } - treeMap->addSeq(m->Treenames[i], "Group1"); - }else{ - vector myGroups; myGroups.push_back(m->Treenames[i]); - vector myNames = designMap->getNamesSeqs(myGroups); - - for(int k = 0; k < myNames.size(); k++) { - if (!(m->inUsersGroups(myNames[k], m->namesOfGroups))) { - m->mothurOut("[ERROR]: " + myNames[k] + " is not a group in your shared or relabund file."); m->mothurOutEndLine(); + if (treefile != "") { + string groupfile = ""; + m->setTreeFile(treefile); + Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap + treeMap = new TreeMap(); + bool mismatch = false; + + for (int i = 0; i < m->Treenames.size(); i++) { + //sanity check - is this a group that is not in the sharedfile? + if (designfile == "") { + if (!(m->inUsersGroups(m->Treenames[i], m->namesOfGroups))) { + m->mothurOut("[ERROR]: " + m->Treenames[i] + " is not a group in your shared or relabund file."); m->mothurOutEndLine(); mismatch = true; } + treeMap->addSeq(m->Treenames[i], "Group1"); + }else{ + vector myGroups; myGroups.push_back(m->Treenames[i]); + vector myNames = designMap->getNamesSeqs(myGroups); + + for(int k = 0; k < myNames.size(); k++) { + if (!(m->inUsersGroups(myNames[k], m->namesOfGroups))) { + m->mothurOut("[ERROR]: " + myNames[k] + " is not a group in your shared or relabund file."); m->mothurOutEndLine(); + mismatch = true; + } + } + treeMap->addSeq(m->Treenames[i], "Group1"); } - treeMap->addSeq(m->Treenames[i], "Group1"); } - } - - if ((designfile != "") && (m->Treenames.size() != Groups.size())) { m->mothurOut("[ERROR]: You design file does not match your tree, aborting."); m->mothurOutEndLine(); mismatch = true; } - - if (mismatch) { //cleanup and exit - if (designfile != "") { delete designMap; } - if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } - else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } - delete treeMap; - return 0; - } - - read = new ReadNewickTree(treefile); - int readOk = read->read(treeMap); - - if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete treeMap; delete read; return 0; } - - vector T = read->getTrees(); - - delete read; - - if (m->control_pressed) { - if (designfile != "") { delete designMap; } - if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } - else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } - for (int i = 0; i < T.size(); i++) { delete T[i]; } delete treeMap; return 0; - } - T[0]->assembleTree(); + if ((designfile != "") && (m->Treenames.size() != Groups.size())) { m->mothurOut("[ERROR]: You design file does not match your tree, aborting."); m->mothurOutEndLine(); mismatch = true; } + + if (mismatch) { //cleanup and exit + if (designfile != "") { delete designMap; } + if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } + else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } + delete treeMap; + return 0; + } + + read = new ReadNewickTree(treefile); + int readOk = read->read(treeMap); + + if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete treeMap; delete read; return 0; } + + vector T = read->getTrees(); + + delete read; + + if (m->control_pressed) { + if (designfile != "") { delete designMap; } + if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } + else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } + for (int i = 0; i < T.size(); i++) { delete T[i]; } delete treeMap; return 0; + } - /***************************************************/ - // create ouptut tree - respecting pickedGroups // - /***************************************************/ - Tree* outputTree = new Tree(m->Groups.size(), treeMap); - - outputTree->getSubTree(T[0], m->Groups); - outputTree->assembleTree(); + T[0]->assembleTree(); + + /***************************************************/ + // create ouptut tree - respecting pickedGroups // + /***************************************************/ + Tree* outputTree = new Tree(m->Groups.size(), treeMap); - //no longer need original tree, we have output tree to use and label - for (int i = 0; i < T.size(); i++) { delete T[i]; } - + outputTree->getSubTree(T[0], m->Groups); + outputTree->assembleTree(); - if (m->control_pressed) { - if (designfile != "") { delete designMap; } - if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } - else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } - delete outputTree; delete treeMap; return 0; + //no longer need original tree, we have output tree to use and label + for (int i = 0; i < T.size(); i++) { delete T[i]; } + + + if (m->control_pressed) { + if (designfile != "") { delete designMap; } + if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } + else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } + delete outputTree; delete treeMap; return 0; + } + + /***************************************************/ + // get indicator species values // + /***************************************************/ + GetIndicatorSpecies(outputTree); + delete outputTree; delete treeMap; + + }else { //run with design file only + //get indicator species + GetIndicatorSpecies(); } - /***************************************************/ - // get indicator species values // - /***************************************************/ - GetIndicatorSpecies(outputTree); - if (designfile != "") { delete designMap; } if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } - delete outputTree; delete treeMap; - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } //set tree file as new current treefile - string current = ""; - itTypes = outputTypes.find("tree"); - if (itTypes != outputTypes.end()) { - if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTreeFile(current); } + if (treefile != "") { + string current = ""; + itTypes = outputTypes.find("tree"); + if (itTypes != outputTypes.end()) { + if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTreeFile(current); } + } } - m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } @@ -344,6 +367,147 @@ int IndicatorCommand::execute(){ } } //********************************************************************************************************************** +//divide shared or relabund file by groupings in the design file +//report all otu values to file +int IndicatorCommand::GetIndicatorSpecies(){ + try { + string thisOutputDir = outputDir; + if (outputDir == "") { thisOutputDir += m->hasPath(inputFileName); } + string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)) + "indicator.summary"; + outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); + + ofstream out; + m->openOutputFile(outputFileName, out); + out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); + + int numBins = 0; + if (sharedfile != "") { numBins = lookup[0]->getNumBins(); } + else { numBins = lookupFloat[0]->getNumBins(); } + + if (m->control_pressed) { out.close(); return 0; } + + /*****************************************************/ + //create vectors containing rabund info // + /*****************************************************/ + + vector indicatorValues; //size of numBins + vector pValues; + map< vector, vector > randomGroupingsMap; //maps location in groupings to location in groupings, ie, [0][0] -> [1][2]. This is so we don't have to actually move the sharedRabundVectors. + + if (sharedfile != "") { + vector< vector > groupings; + set groupsAlreadyAdded; + vector subset; + + //for each grouping + for (int i = 0; i < designMap->namesOfGroups.size(); i++) { + + for (int k = 0; k < lookup.size(); k++) { + //are you from this grouping? + if (designMap->getGroup(lookup[k]->getGroup()) == designMap->namesOfGroups[i]) { + subset.push_back(lookup[k]); + groupsAlreadyAdded.insert(lookup[k]->getGroup()); + } + } + if (subset.size() != 0) { groupings.push_back(subset); } + subset.clear(); + } + + if (groupsAlreadyAdded.size() != lookup.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); } + + indicatorValues = getValues(groupings, randomGroupingsMap); + + pValues.resize(indicatorValues.size(), 0); + for(int i=0;icontrol_pressed) { break; } + randomGroupingsMap = randomizeGroupings(groupings, lookup.size()); + vector randomIndicatorValues = getValues(groupings, randomGroupingsMap); + + for (int j = 0; j < indicatorValues.size(); j++) { + if (randomIndicatorValues[j] >= indicatorValues[j]) { pValues[j]++; } + } + } + + for (int i = 0; i < pValues.size(); i++) { pValues[i] /= (double)iters; } + + }else { + vector< vector > groupings; + set groupsAlreadyAdded; + vector subset; + + //for each grouping + for (int i = 0; i < designMap->namesOfGroups.size(); i++) { + for (int k = 0; k < lookupFloat.size(); k++) { + //are you from this grouping? + if (designMap->getGroup(lookupFloat[k]->getGroup()) == designMap->namesOfGroups[i]) { + subset.push_back(lookupFloat[k]); + groupsAlreadyAdded.insert(lookupFloat[k]->getGroup()); + } + } + if (subset.size() != 0) { groupings.push_back(subset); } + subset.clear(); + } + + if (groupsAlreadyAdded.size() != lookupFloat.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); } + + indicatorValues = getValues(groupings, randomGroupingsMap); + + pValues.resize(indicatorValues.size(), 0); + for(int i=0;icontrol_pressed) { break; } + randomGroupingsMap = randomizeGroupings(groupings, lookupFloat.size()); + vector randomIndicatorValues = getValues(groupings, randomGroupingsMap); + + for (int j = 0; j < indicatorValues.size(); j++) { + if (randomIndicatorValues[j] >= indicatorValues[j]) { pValues[j]++; } + } + } + + for (int i = 0; i < pValues.size(); i++) { pValues[i] /= (double)iters; } + } + + if (m->control_pressed) { out.close(); return 0; } + + + /******************************************************/ + //output indicator values to table form // + /*****************************************************/ + int indicatorOTU; + double maxValue, pvalue; maxValue=0.0; pvalue=0.0; + out << "OTU\tIndicator_Value\tpValue" << endl; + for (int j = 0; j < indicatorValues.size(); j++) { + + if (m->control_pressed) { out.close(); return 0; } + + out << (j+1) << '\t' << indicatorValues[j] << '\t'; + + if (pValues[j] > (1/(float)iters)) { out << pValues[j] << endl; } + else { out << "<" << (1/(float)iters) << endl; } + + if (maxValue < indicatorValues[j]) { + maxValue = indicatorValues[j]; + indicatorOTU = j; + } + } + + m->mothurOutEndLine(); m->mothurOut("Species\tIndicatorValue\tpValue\n"); + cout << "OTU" << indicatorOTU+1 << '\t' << maxValue << '\t'; + string pValueString = "<" + toString((1/(float)iters)); + if (pValues[indicatorOTU] > (1/(float)iters)) { pValueString = toString(pValues[indicatorOTU]); cout << pValues[indicatorOTU];} + else { cout << "<" << (1/(float)iters); } + m->mothurOutJustToLog("OTU" + toString(indicatorOTU+1) + "\t" + toString(maxValue) + "\t" + pValueString); + m->mothurOutEndLine(); + + out.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "IndicatorCommand", "GetIndicatorSpecies"); + exit(1); + } +} +//********************************************************************************************************************** //traverse tree finding indicator species values for each otu at each node //label node with otu number that has highest indicator value //report all otu values to file @@ -365,9 +529,11 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ //print headings out << "TreeNode\t"; - for (int i = 0; i < numBins; i++) { out << "OTU-" << (i+1) << '\t'; } + for (int i = 0; i < numBins; i++) { out << "OTU" << (i+1) << "_IndValue" << '\t' << "pValue" << '\t'; } out << endl; + m->mothurOutEndLine(); m->mothurOut("Node\tSpecies\tIndicatorValue\tpValue\n"); + string treeOutputDir = outputDir; if (outputDir == "") { treeOutputDir += m->hasPath(treefile); } string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + "indicator.tre"; @@ -396,6 +562,8 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ /*****************************************************/ vector indicatorValues; //size of numBins + vector pValues; + map< vector, vector > randomGroupingsMap; //maps location in groupings to location in groupings, ie, [0][0] -> [1][2]. This is so we don't have to actually move the sharedRabundVectors. if (sharedfile != "") { vector< vector > groupings; @@ -446,7 +614,20 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ if (groupsAlreadyAdded.size() != lookup.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); } - indicatorValues = getValues(groupings); + indicatorValues = getValues(groupings, randomGroupingsMap); + + pValues.resize(indicatorValues.size(), 0); + for(int i=0;icontrol_pressed) { break; } + randomGroupingsMap = randomizeGroupings(groupings, lookup.size()); + vector randomIndicatorValues = getValues(groupings, randomGroupingsMap); + + for (int j = 0; j < indicatorValues.size(); j++) { + if (randomIndicatorValues[j] >= indicatorValues[j]) { pValues[j]++; } + } + } + + for (int i = 0; i < pValues.size(); i++) { pValues[i] /= (double)iters; } }else { vector< vector > groupings; @@ -494,7 +675,21 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ if (groupsAlreadyAdded.size() != lookupFloat.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); } - indicatorValues = getValues(groupings); + indicatorValues = getValues(groupings, randomGroupingsMap); + + pValues.resize(indicatorValues.size(), 0); + for(int i=0;icontrol_pressed) { break; } + randomGroupingsMap = randomizeGroupings(groupings, lookup.size()); + vector randomIndicatorValues = getValues(groupings, randomGroupingsMap); + + for (int j = 0; j < indicatorValues.size(); j++) { + if (randomIndicatorValues[j] >= indicatorValues[j]) { pValues[j]++; } + } + } + + for (int i = 0; i < pValues.size(); i++) { pValues[i] /= (double)iters; } } if (m->control_pressed) { out.close(); return 0; } @@ -504,16 +699,35 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ //output indicator values to table form + label tree // /*****************************************************/ out << (i+1) << '\t'; + int indicatorOTU; + double maxValue, pvalue; maxValue=0.0; pvalue=0.0; for (int j = 0; j < indicatorValues.size(); j++) { if (m->control_pressed) { out.close(); return 0; } - out << indicatorValues[j] << '\t'; + if (pValues[j] < (1/(float)iters)) { + out << indicatorValues[j] << '\t' << '<' << (1/(float)iters) << '\t'; + }else { + out << indicatorValues[j] << '\t' << pValues[j] << '\t'; + } + + if (maxValue < indicatorValues[j]) { + maxValue = indicatorValues[j]; + indicatorOTU = j; + } } out << endl; T->tree[i].setLabel((i+1)); + cout << i+1 << "\tOTU" << indicatorOTU+1 << '\t' << maxValue << '\t'; + string pValueString = "<" + toString((1/(float)iters)); + if (pValues[indicatorOTU] > (1/(float)iters)) { pValueString = toString(pValues[indicatorOTU]); cout << pValues[indicatorOTU];} + else { cout << "<" << (1/(float)iters); } + m->mothurOutJustToLog(toString(i) + "\tOTU" + toString(indicatorOTU+1) + "\t" + toString(maxValue) + "\t" + pValueString); + m->mothurOutEndLine(); + + } out.close(); @@ -532,9 +746,10 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ } } //********************************************************************************************************************** -vector IndicatorCommand::getValues(vector< vector >& groupings){ +vector IndicatorCommand::getValues(vector< vector >& groupings, map< vector, vector > groupingsMap){ try { vector values; + map< vector, vector >::iterator it; //for each otu for (int i = 0; i < groupings[0][0]->getNumBins(); i++) { @@ -551,8 +766,17 @@ vector IndicatorCommand::getValues(vector< vectorgetAbundance(i); - if (groupings[j][k]->getAbundance(i) != 0) { numNotZero++; } + vector temp; temp.push_back(j); temp.push_back(k); + it = groupingsMap.find(temp); + + if (it == groupingsMap.end()) { //this one didnt get moved + totalAbund += groupings[j][k]->getAbundance(i); + if (groupings[j][k]->getAbundance(i) != 0.0) { numNotZero++; } + }else { + totalAbund += groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i); + if (groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i) != 0.0) { numNotZero++; } + } + } //mean abundance @@ -586,7 +810,7 @@ vector IndicatorCommand::getValues(vector< vector IndicatorCommand::getValues(vector< vector >& groupings){ +vector IndicatorCommand::getValues(vector< vector >& groupings, map< vector, vector > groupingsMap){ try { vector values; @@ -596,6 +820,8 @@ vector IndicatorCommand::getValues(vector< vector >& cout << groupings[j][k]->getGroup() << endl; } }*/ + map< vector, vector >::iterator it; + //for each otu for (int i = 0; i < groupings[0][0]->getNumBins(); i++) { vector terms; @@ -607,8 +833,16 @@ vector IndicatorCommand::getValues(vector< vector >& int totalAbund = 0.0; int numNotZero = 0; for (int k = 0; k < groupings[j].size(); k++) { - totalAbund += groupings[j][k]->getAbundance(i); - if (groupings[j][k]->getAbundance(i) != 0.0) { numNotZero++; } + vector temp; temp.push_back(j); temp.push_back(k); + it = groupingsMap.find(temp); + + if (it == groupingsMap.end()) { //this one didnt get moved + totalAbund += groupings[j][k]->getAbundance(i); + if (groupings[j][k]->getAbundance(i) != 0.0) { numNotZero++; } + }else { + totalAbund += groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i); + if (groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i) != 0.0) { numNotZero++; } + } } @@ -901,7 +1135,76 @@ int IndicatorCommand::getSharedFloat(){ m->errorOut(e, "IndicatorCommand", "getShared"); exit(1); } -} +} +//********************************************************************************************************************** +//swap groups between groupings, in essence randomizing the second column of the design file +map< vector, vector > IndicatorCommand::randomizeGroupings(vector< vector >& groupings, int numLookupGroups){ + try { + + map< vector, vector > randomGroupings; + + for (int i = 0; i < numLookupGroups; i++) { + if (m->control_pressed) {break;} + + //get random groups to swap to switch with + //generate random int between 0 and groupings.size()-1 + int z = m->getRandomIndex(groupings.size()-1); + int x = m->getRandomIndex(groupings.size()-1); + int a = m->getRandomIndex(groupings[z].size()-1); + int b = m->getRandomIndex(groupings[x].size()-1); + //cout << i << '\t' << z << '\t' << x << '\t' << a << '\t' << b << endl; + //if ((z < 0) || (z > 1) || x<0 || x>1 || a <0 || a>groupings[z].size()-1 || b<0 || b>groupings[x].size()-1) { cout << "probelm" << i << '\t' << z << '\t' << x << '\t' << a << '\t' << b << endl; } + + vector from; + vector to; + + from.push_back(z); from.push_back(a); + to.push_back(x); to.push_back(b); + + randomGroupings[from] = to; + } + //cout << "done" << endl; + return randomGroupings; + } + catch(exception& e) { + m->errorOut(e, "IndicatorCommand", "randomizeGroupings"); + exit(1); + } +} +//********************************************************************************************************************** +//swap groups between groupings, in essence randomizing the second column of the design file +map< vector, vector > IndicatorCommand::randomizeGroupings(vector< vector >& groupings, int numLookupGroups){ + try { + + map< vector, vector > randomGroupings; + + for (int i = 0; i < numLookupGroups; i++) { + + //get random groups to swap to switch with + //generate random int between 0 and groupings.size()-1 + int z = m->getRandomIndex(groupings.size()-1); + int x = m->getRandomIndex(groupings.size()-1); + int a = m->getRandomIndex(groupings[z].size()-1); + int b = m->getRandomIndex(groupings[x].size()-1); + //cout << i << '\t' << z << '\t' << x << '\t' << a << '\t' << b << endl; + + vector from; + vector to; + + from.push_back(z); from.push_back(a); + to.push_back(x); to.push_back(b); + + randomGroupings[from] = to; + } + + return randomGroupings; + } + catch(exception& e) { + m->errorOut(e, "IndicatorCommand", "randomizeGroupings"); + exit(1); + } +} + /*****************************************************************/ diff --git a/indicatorcommand.h b/indicatorcommand.h index 00694be..aa31562 100644 --- a/indicatorcommand.h +++ b/indicatorcommand.h @@ -28,7 +28,7 @@ public: string getCommandCategory() { return "Hypothesis Testing"; } string getHelpString(); string getCitation() { return "Dufrene M, Legendre P (1997). Species assemblages and indicator species: The need for a flexible asymmetrical approach. Ecol Monogr 67: 345-66.\n McCune B, Grace JB, Urban DL (2002). Analysis of ecological communities. MjM Software Design: Gleneden Beach, OR. \nLegendre P, Legendre L (1998). Numerical Ecology. Elsevier: New York. \nhttp://www.mothur.org/wiki/Indicator"; } - string getDescription() { return "calculate the indicator value for each OTU for each tree node"; } + string getDescription() { return "calculate the indicator value for each OTU"; } int execute(); void help() { m->mothurOut(getHelpString()); } @@ -39,6 +39,7 @@ private: GroupMap* designMap; string treefile, sharedfile, relabundfile, groups, label, inputFileName, outputDir, designfile; bool abort; + int iters; vector outputNames, Groups; vector lookup; vector lookupFloat; @@ -46,10 +47,13 @@ private: int getShared(); int getSharedFloat(); int GetIndicatorSpecies(Tree*&); + int GetIndicatorSpecies(); set getDescendantList(Tree*&, int, map >, map >&); - vector getValues(vector< vector >&); - vector getValues(vector< vector >&); + vector getValues(vector< vector >&, map< vector, vector >); + vector getValues(vector< vector >&, map< vector, vector >); map getDistToRoot(Tree*&); + map< vector, vector > randomizeGroupings(vector< vector >&, int); + map< vector, vector > randomizeGroupings(vector< vector >&, int); }; diff --git a/mothurout.cpp b/mothurout.cpp index a56a621..9896157 100644 --- a/mothurout.cpp +++ b/mothurout.cpp @@ -562,6 +562,21 @@ string MothurOut::getSimpleName(string longName){ /***********************************************************************/ +int MothurOut::getRandomIndex(int highest){ + try { + + int random = (int) ((float)(highest+1) * (float)(rand()) / ((float)RAND_MAX+1.0)); + + return random; + } + catch(exception& e) { + errorOut(e, "MothurOut", "getRandomIndex"); + exit(1); + } + +} +/**********************************************************************/ + string MothurOut::getPathName(string longName){ try { string rootPathName = longName; diff --git a/mothurout.h b/mothurout.h index 98cd6a3..7bbd315 100644 --- a/mothurout.h +++ b/mothurout.h @@ -99,6 +99,7 @@ class MothurOut { float ceilDist(float, int); float roundDist(float, int); unsigned int fromBase36(string); + int getRandomIndex(int); //highest int control_pressed; bool executing, runParse, jumble, gui; diff --git a/myutils.cpp b/myutils.cpp index b9c0147..d0d3817 100755 --- a/myutils.cpp +++ b/myutils.cpp @@ -1531,6 +1531,7 @@ static void GetArgsFromFile(const string &FileName, vector &Args) void MyCmdLine(int argc, char **argv) { + g_Opts.clear(); static unsigned RecurseDepth = 0; ++RecurseDepth; -- 2.39.2