X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=blobdiff_plain;f=indicatorcommand.cpp;h=fd818acb44c4c5280c6a54971d3d39413307861d;hp=b06742fe1c529988457300fc7d3305d98e327f0f;hb=050a3ff02473a3d4c0980964e1a9ebe52e55d6b8;hpb=cd7040a22ae19c86a13c2c10ed90a64b77a0c482 diff --git a/indicatorcommand.cpp b/indicatorcommand.cpp index b06742f..fd818ac 100644 --- a/indicatorcommand.cpp +++ b/indicatorcommand.cpp @@ -14,15 +14,16 @@ //********************************************************************************************************************** vector IndicatorCommand::setParameters(){ try { - CommandParameter piters("iters", "Number", "", "1000", "", "", "",false,false); parameters.push_back(piters); - CommandParameter pdesign("design", "InputTypes", "", "", "TreeDesign", "TreeDesign", "none",false,false); parameters.push_back(pdesign); - CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none",false,false); parameters.push_back(pshared); - CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none",false,false); parameters.push_back(prelabund); - CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups); - CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); - CommandParameter ptree("tree", "InputTypes", "", "", "TreeDesign", "TreeDesign", "none",false,false); parameters.push_back(ptree); - CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); - CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + CommandParameter piters("iters", "Number", "", "1000", "", "", "","",false,false); parameters.push_back(piters); + CommandParameter pdesign("design", "InputTypes", "", "", "TreeDesign", "TreeDesign", "none","summary",false,false,true); parameters.push_back(pdesign); + CommandParameter pshared("shared", "InputTypes", "", "", "SharedRel", "SharedRel", "none","summary",false,false,true); parameters.push_back(pshared); + CommandParameter prelabund("relabund", "InputTypes", "", "", "SharedRel", "SharedRel", "none","summary",false,false); parameters.push_back(prelabund); + CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups); + CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel); + CommandParameter ptree("tree", "InputTypes", "", "", "TreeDesign", "TreeDesign", "none","tree-summary",false,false,true); parameters.push_back(ptree); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false); parameters.push_back(pprocessors); vector myArray; for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } @@ -37,13 +38,15 @@ vector IndicatorCommand::setParameters(){ string IndicatorCommand::getHelpString(){ try { string helpString = ""; - helpString += "The indicator command reads a shared or relabund file and a tree or design file, and outputs a .indicator.tre and .indicator.summary file. \n"; + helpString += "The indicator command can be run in 3 ways: with a shared or relabund file and a design file, or with a shared or relabund file and a tree file, or with a shared or relabund file, tree file and design file. \n"; + helpString += "The indicator command outputs a .indicator.summary file and a .indicator.tre if a tree is given. \n"; helpString += "The new tree contains labels at each internal node. The label is the node number so you can relate the tree to the summary file.\n"; helpString += "The summary file lists the indicator value for each OTU for each node.\n"; - helpString += "The indicator command parameters are tree, groups, shared, relabund, design and label. The tree parameter is required as well as either shared or relabund.\n"; - helpString += "The design parameter allows you to provide a design file to relate the tree to the shared or relabund file.\n"; + helpString += "The indicator command parameters are tree, groups, shared, relabund, design and label. \n"; + helpString += "The design parameter allows you to relate the tree to the shared or relabund file, if your tree contains the grouping names, or if no tree is provided to group your groups into groupings.\n"; helpString += "The groups parameter allows you to specify which of the groups in your shared or relabund you would like analyzed, or if you provide a design file the groups in your design file. The groups may be entered separated by dashes.\n"; helpString += "The label parameter indicates at what distance your tree relates to the shared or relabund.\n"; + helpString += "The processors parameter allows you to specify how many processors you would like to use. The default is 1. \n"; helpString += "The iters parameter allows you to set number of randomization for the P value. The default is 1000."; helpString += "The indicator command should be used in the following format: indicator(tree=test.tre, shared=test.shared, label=0.03)\n"; helpString += "Note: No spaces between parameter labels (i.e. tree), '=' and parameters (i.e.yourTreefile).\n"; @@ -54,7 +57,22 @@ string IndicatorCommand::getHelpString(){ exit(1); } } - +//********************************************************************************************************************** +string IndicatorCommand::getOutputPattern(string type) { + try { + string pattern = ""; + + if (type == "tree") { pattern = "[filename],indicator.tre"; } + else if (type == "summary") { pattern = "[filename],indicator.summary"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } + + return pattern; + } + catch(exception& e) { + m->errorOut(e, "IndicatorCommand", "getOutputPattern"); + exit(1); + } +} //********************************************************************************************************************** IndicatorCommand::IndicatorCommand(){ try { @@ -93,10 +111,9 @@ IndicatorCommand::IndicatorCommand(string option) { } m->runParse = true; - m->Groups.clear(); - m->namesOfGroups.clear(); + m->clearGroups(); + m->clearAllGroups(); m->Treenames.clear(); - m->names.clear(); vector tempOutNames; outputTypes["tree"] = tempOutNames; @@ -166,13 +183,17 @@ IndicatorCommand::IndicatorCommand(string option) { groups = validParameter.validFile(parameters, "groups", false); if (groups == "not found") { groups = ""; Groups.push_back("all"); } else { m->splitAtDash(groups, Groups); } - m->Groups = Groups; + m->setGroups(Groups); label = validParameter.validFile(parameters, "label", false); if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; } string temp = validParameter.validFile(parameters, "iters", false); if (temp == "not found") { temp = "1000"; } - convert(temp, iters); + m->mothurConvert(temp, iters); + + temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } + m->setProcessors(temp); + m->mothurConvert(temp, processors); if ((relabundfile == "") && (sharedfile == "")) { //is there are current file available for either of these? @@ -204,6 +225,7 @@ IndicatorCommand::IndicatorCommand(string option) { if ((relabundfile != "") && (sharedfile != "")) { m->mothurOut("[ERROR]: You may not use both a shared and relabund file."); m->mothurOutEndLine(); abort = true; } + } } catch(exception& e) { @@ -219,19 +241,22 @@ int IndicatorCommand::execute(){ if (abort == true) { if (calledHelp) { return 0; } return 2; } cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); + + int start = time(NULL); - //read designfile if given and set up globaldatas groups for read of sharedfiles + //read designfile if given and set up groups for read of sharedfiles if (designfile != "") { designMap = new GroupMap(designfile); designMap->readDesignMap(); //fill Groups - checks for "all" and for any typo groups - SharedUtil* util = new SharedUtil(); - util->setGroups(Groups, designMap->namesOfGroups); - delete util; + SharedUtil util; + vector nameGroups = designMap->getNamesOfGroups(); + util.setGroups(Groups, nameGroups); + designMap->setNamesOfGroups(nameGroups); - //loop through the Groups and fill Globaldata's Groups with the design file info - m->Groups = designMap->getNamesSeqs(Groups); + vector namesSeqs = designMap->getNamesSeqs(Groups); + m->setGroups(namesSeqs); } /***************************************************/ @@ -247,8 +272,8 @@ int IndicatorCommand::execute(){ if (lookupFloat[0] == NULL) { m->mothurOut("[ERROR] reading relabund file."); m->mothurOutEndLine(); return 0; } } - //reset Globaldatas groups if needed - if (designfile != "") { m->Groups = Groups; } + //reset groups if needed + if (designfile != "") { m->setGroups(Groups); } /***************************************************/ // reading tree info // @@ -257,45 +282,51 @@ int IndicatorCommand::execute(){ string groupfile = ""; m->setTreeFile(treefile); Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap - treeMap = new TreeMap(); + ct = new CountTable(); bool mismatch = false; - - for (int i = 0; i < m->Treenames.size(); i++) { - //sanity check - is this a group that is not in the sharedfile? + + set nameMap; + map groupMap; + set gps; + for (int i = 0; i < m->Treenames.size(); i++) { + nameMap.insert(m->Treenames[i]); + //sanity check - is this a group that is not in the sharedfile? + if (i == 0) { gps.insert("Group1"); } if (designfile == "") { - if (!(m->inUsersGroups(m->Treenames[i], m->namesOfGroups))) { + if (!(m->inUsersGroups(m->Treenames[i], m->getAllGroups()))) { m->mothurOut("[ERROR]: " + m->Treenames[i] + " is not a group in your shared or relabund file."); m->mothurOutEndLine(); mismatch = true; } - treeMap->addSeq(m->Treenames[i], "Group1"); + groupMap[m->Treenames[i]] = "Group1"; }else{ vector myGroups; myGroups.push_back(m->Treenames[i]); vector myNames = designMap->getNamesSeqs(myGroups); for(int k = 0; k < myNames.size(); k++) { - if (!(m->inUsersGroups(myNames[k], m->namesOfGroups))) { + if (!(m->inUsersGroups(myNames[k], m->getAllGroups()))) { m->mothurOut("[ERROR]: " + myNames[k] + " is not a group in your shared or relabund file."); m->mothurOutEndLine(); mismatch = true; } } - treeMap->addSeq(m->Treenames[i], "Group1"); + groupMap[m->Treenames[i]] = "Group1"; } - } + } + ct->createTable(nameMap, groupMap, gps); - if ((designfile != "") && (m->Treenames.size() != Groups.size())) { m->mothurOut("[ERROR]: You design file does not match your tree, aborting."); m->mothurOutEndLine(); mismatch = true; } + if ((designfile != "") && (m->Treenames.size() != Groups.size())) { cout << Groups.size() << '\t' << m->Treenames.size() << endl; m->mothurOut("[ERROR]: You design file does not match your tree, aborting."); m->mothurOutEndLine(); mismatch = true; } if (mismatch) { //cleanup and exit if (designfile != "") { delete designMap; } if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } - delete treeMap; + delete ct; return 0; } read = new ReadNewickTree(treefile); - int readOk = read->read(treeMap); + int readOk = read->read(ct); - if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete treeMap; delete read; return 0; } + if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete ct; delete read; return 0; } vector T = read->getTrees(); @@ -305,35 +336,34 @@ int IndicatorCommand::execute(){ if (designfile != "") { delete designMap; } if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } - for (int i = 0; i < T.size(); i++) { delete T[i]; } delete treeMap; return 0; + for (int i = 0; i < T.size(); i++) { delete T[i]; } delete ct; return 0; } - + T[0]->assembleTree(); /***************************************************/ // create ouptut tree - respecting pickedGroups // /***************************************************/ - Tree* outputTree = new Tree(m->Groups.size(), treeMap); + Tree* outputTree = new Tree(m->getNumGroups(), ct); - outputTree->getSubTree(T[0], m->Groups); + outputTree->getSubTree(T[0], m->getGroups()); outputTree->assembleTree(); //no longer need original tree, we have output tree to use and label for (int i = 0; i < T.size(); i++) { delete T[i]; } - if (m->control_pressed) { if (designfile != "") { delete designMap; } if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } - delete outputTree; delete treeMap; return 0; + delete outputTree; delete ct; return 0; } /***************************************************/ // get indicator species values // /***************************************************/ GetIndicatorSpecies(outputTree); - delete outputTree; delete treeMap; + delete outputTree; delete ct; }else { //run with design file only //get indicator species @@ -344,7 +374,7 @@ int IndicatorCommand::execute(){ if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //set tree file as new current treefile if (treefile != "") { @@ -354,6 +384,8 @@ int IndicatorCommand::execute(){ if ((itTypes->second).size() != 0) { current = (itTypes->second)[0]; m->setTreeFile(current); } } } + + m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOut("It took " + toString(time(NULL) - start) + " secs to find the indicator species."); m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } @@ -373,12 +405,15 @@ int IndicatorCommand::GetIndicatorSpecies(){ try { string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(inputFileName); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)) + "indicator.summary"; + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)); + string outputFileName = getOutputFileName("summary", variables); outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); ofstream out; m->openOutputFile(outputFileName, out); out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); + m->mothurOutEndLine(); m->mothurOut("Species\tIndicator_Groups\tIndicatorValue\tpValue\n"); int numBins = 0; if (sharedfile != "") { numBins = lookup[0]->getNumBins(); } @@ -392,6 +427,7 @@ int IndicatorCommand::GetIndicatorSpecies(){ vector indicatorValues; //size of numBins vector pValues; + vector indicatorGroups; map< vector, vector > randomGroupingsMap; //maps location in groupings to location in groupings, ie, [0][0] -> [1][2]. This is so we don't have to actually move the sharedRabundVectors. if (sharedfile != "") { @@ -400,11 +436,11 @@ int IndicatorCommand::GetIndicatorSpecies(){ vector subset; //for each grouping - for (int i = 0; i < designMap->namesOfGroups.size(); i++) { + for (int i = 0; i < (designMap->getNamesOfGroups()).size(); i++) { for (int k = 0; k < lookup.size(); k++) { //are you from this grouping? - if (designMap->getGroup(lookup[k]->getGroup()) == designMap->namesOfGroups[i]) { + if (designMap->getGroup(lookup[k]->getGroup()) == (designMap->getNamesOfGroups())[i]) { subset.push_back(lookup[k]); groupsAlreadyAdded.insert(lookup[k]->getGroup()); } @@ -415,31 +451,19 @@ int IndicatorCommand::GetIndicatorSpecies(){ if (groupsAlreadyAdded.size() != lookup.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); } - indicatorValues = getValues(groupings, randomGroupingsMap); - - pValues.resize(indicatorValues.size(), 0); - for(int i=0;icontrol_pressed) { break; } - randomGroupingsMap = randomizeGroupings(groupings, lookup.size()); - vector randomIndicatorValues = getValues(groupings, randomGroupingsMap); - - for (int j = 0; j < indicatorValues.size(); j++) { - if (randomIndicatorValues[j] >= indicatorValues[j]) { pValues[j]++; } - } - } + indicatorValues = getValues(groupings, indicatorGroups, randomGroupingsMap); - for (int i = 0; i < pValues.size(); i++) { pValues[i] /= (double)iters; } - + pValues = getPValues(groupings, lookup.size(), indicatorValues); }else { vector< vector > groupings; set groupsAlreadyAdded; vector subset; //for each grouping - for (int i = 0; i < designMap->namesOfGroups.size(); i++) { + for (int i = 0; i < (designMap->getNamesOfGroups()).size(); i++) { for (int k = 0; k < lookupFloat.size(); k++) { //are you from this grouping? - if (designMap->getGroup(lookupFloat[k]->getGroup()) == designMap->namesOfGroups[i]) { + if (designMap->getGroup(lookupFloat[k]->getGroup()) == (designMap->getNamesOfGroups())[i]) { subset.push_back(lookupFloat[k]); groupsAlreadyAdded.insert(lookupFloat[k]->getGroup()); } @@ -450,20 +474,9 @@ int IndicatorCommand::GetIndicatorSpecies(){ if (groupsAlreadyAdded.size() != lookupFloat.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); } - indicatorValues = getValues(groupings, randomGroupingsMap); + indicatorValues = getValues(groupings, indicatorGroups, randomGroupingsMap); - pValues.resize(indicatorValues.size(), 0); - for(int i=0;icontrol_pressed) { break; } - randomGroupingsMap = randomizeGroupings(groupings, lookupFloat.size()); - vector randomIndicatorValues = getValues(groupings, randomGroupingsMap); - - for (int j = 0; j < indicatorValues.size(); j++) { - if (randomIndicatorValues[j] >= indicatorValues[j]) { pValues[j]++; } - } - } - - for (int i = 0; i < pValues.size(); i++) { pValues[i] /= (double)iters; } + pValues = getPValues(groupings, lookupFloat.size(), indicatorValues); } if (m->control_pressed) { out.close(); return 0; } @@ -472,32 +485,26 @@ int IndicatorCommand::GetIndicatorSpecies(){ /******************************************************/ //output indicator values to table form // /*****************************************************/ - int indicatorOTU; - double maxValue, pvalue; maxValue=0.0; pvalue=0.0; - out << "OTU\tIndicator_Value\tpValue" << endl; + out << "OTU\tIndicator_Groups\tIndicator_Value\tpValue" << endl; for (int j = 0; j < indicatorValues.size(); j++) { if (m->control_pressed) { out.close(); return 0; } - out << (j+1) << '\t' << indicatorValues[j] << '\t'; + out << m->currentBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t'; if (pValues[j] > (1/(float)iters)) { out << pValues[j] << endl; } else { out << "<" << (1/(float)iters) << endl; } - if (maxValue < indicatorValues[j]) { - maxValue = indicatorValues[j]; - indicatorOTU = j; + if (pValues[j] <= 0.05) { + cout << m->currentBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t'; + string pValueString = "<" + toString((1/(float)iters)); + if (pValues[j] > (1/(float)iters)) { pValueString = toString(pValues[j]); cout << pValues[j];} + else { cout << "<" << (1/(float)iters); } + m->mothurOutJustToLog(m->currentBinLabels[j] + "\t" + indicatorGroups[j] + "\t" + toString(indicatorValues[j]) + "\t" + pValueString); + m->mothurOutEndLine(); } } - m->mothurOutEndLine(); m->mothurOut("Species\tIndicatorValue\tpValue\n"); - cout << "OTU" << indicatorOTU+1 << '\t' << maxValue << '\t'; - string pValueString = "<" + toString((1/(float)iters)); - if (pValues[indicatorOTU] > (1/(float)iters)) { pValueString = toString(pValues[indicatorOTU]); cout << pValues[indicatorOTU];} - else { cout << "<" << (1/(float)iters); } - m->mothurOutJustToLog("OTU" + toString(indicatorOTU+1) + "\t" + toString(maxValue) + "\t" + pValueString); - m->mothurOutEndLine(); - out.close(); return 0; @@ -516,7 +523,9 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(inputFileName); } - string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)) + "indicator.summary"; + map variables; + variables["[filename]"] = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)); + string outputFileName = getOutputFileName("summary",variables); outputNames.push_back(outputFileName); outputTypes["summary"].push_back(outputFileName); ofstream out; @@ -529,14 +538,15 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ //print headings out << "TreeNode\t"; - for (int i = 0; i < numBins; i++) { out << "OTU" << (i+1) << "_IndValue" << '\t' << "pValue" << '\t'; } + for (int i = 0; i < numBins; i++) { out << m->currentBinLabels[i] << "_IndGroups" << '\t' << m->currentBinLabels[i] << "_IndValue" << '\t' << "pValue" << '\t'; } out << endl; - m->mothurOutEndLine(); m->mothurOut("Node\tSpecies\tIndicatorValue\tpValue\n"); + m->mothurOutEndLine(); m->mothurOut("Node\tSpecies\tIndicator_Groups\tIndicatorValue\tpValue\n"); string treeOutputDir = outputDir; if (outputDir == "") { treeOutputDir += m->hasPath(treefile); } - string outputTreeFileName = treeOutputDir + m->getRootName(m->getSimpleName(treefile)) + "indicator.tre"; + variables["[filename]"] = treeOutputDir + m->getRootName(m->getSimpleName(treefile)); + string outputTreeFileName = getOutputFileName("tree", variables); //create a map from tree node index to names of descendants, save time later to know which sharedRabund you need @@ -554,7 +564,7 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ //for each node for (int i = T->getNumLeaves(); i < T->getNumNodes(); i++) { - + //cout << endl << i+1 << endl; if (m->control_pressed) { out.close(); return 0; } /*****************************************************/ @@ -563,6 +573,7 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ vector indicatorValues; //size of numBins vector pValues; + vector indicatorGroups; map< vector, vector > randomGroupingsMap; //maps location in groupings to location in groupings, ie, [0][0] -> [1][2]. This is so we don't have to actually move the sharedRabundVectors. if (sharedfile != "") { @@ -614,21 +625,9 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ if (groupsAlreadyAdded.size() != lookup.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); } - indicatorValues = getValues(groupings, randomGroupingsMap); - - pValues.resize(indicatorValues.size(), 0); - for(int i=0;icontrol_pressed) { break; } - randomGroupingsMap = randomizeGroupings(groupings, lookup.size()); - vector randomIndicatorValues = getValues(groupings, randomGroupingsMap); - - for (int j = 0; j < indicatorValues.size(); j++) { - if (randomIndicatorValues[j] >= indicatorValues[j]) { pValues[j]++; } - } - } - - for (int i = 0; i < pValues.size(); i++) { pValues[i] /= (double)iters; } + indicatorValues = getValues(groupings, indicatorGroups, randomGroupingsMap); + pValues = getPValues(groupings, lookup.size(), indicatorValues); }else { vector< vector > groupings; @@ -675,21 +674,9 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ if (groupsAlreadyAdded.size() != lookupFloat.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); } - indicatorValues = getValues(groupings, randomGroupingsMap); + indicatorValues = getValues(groupings, indicatorGroups, randomGroupingsMap); - pValues.resize(indicatorValues.size(), 0); - for(int i=0;icontrol_pressed) { break; } - randomGroupingsMap = randomizeGroupings(groupings, lookup.size()); - vector randomIndicatorValues = getValues(groupings, randomGroupingsMap); - - for (int j = 0; j < indicatorValues.size(); j++) { - if (randomIndicatorValues[j] >= indicatorValues[j]) { pValues[j]++; } - } - } - - for (int i = 0; i < pValues.size(); i++) { pValues[i] /= (double)iters; } + pValues = getPValues(groupings, lookupFloat.size(), indicatorValues); } if (m->control_pressed) { out.close(); return 0; } @@ -699,35 +686,28 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ //output indicator values to table form + label tree // /*****************************************************/ out << (i+1) << '\t'; - int indicatorOTU; - double maxValue, pvalue; maxValue=0.0; pvalue=0.0; for (int j = 0; j < indicatorValues.size(); j++) { if (m->control_pressed) { out.close(); return 0; } if (pValues[j] < (1/(float)iters)) { - out << indicatorValues[j] << '\t' << '<' << (1/(float)iters) << '\t'; + out << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t' << '<' << (1/(float)iters) << '\t'; }else { - out << indicatorValues[j] << '\t' << pValues[j] << '\t'; + out << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t' << pValues[j] << '\t'; } - if (maxValue < indicatorValues[j]) { - maxValue = indicatorValues[j]; - indicatorOTU = j; + if (pValues[j] <= 0.05) { + cout << i+1 << '\t' << m->currentBinLabels[j] << '\t' << indicatorGroups[j] << '\t' << indicatorValues[j] << '\t'; + string pValueString = "<" + toString((1/(float)iters)); + if (pValues[j] > (1/(float)iters)) { pValueString = toString(pValues[j]); cout << pValues[j];} + else { cout << "<" << (1/(float)iters); } + m->mothurOutJustToLog(toString(i) + "\t" + m->currentBinLabels[j] + "\t" + indicatorGroups[j] + "\t" + toString(indicatorValues[j]) + "\t" + pValueString); + m->mothurOutEndLine(); } } out << endl; T->tree[i].setLabel((i+1)); - - cout << i+1 << "\tOTU" << indicatorOTU+1 << '\t' << maxValue << '\t'; - string pValueString = "<" + toString((1/(float)iters)); - if (pValues[indicatorOTU] > (1/(float)iters)) { pValueString = toString(pValues[indicatorOTU]); cout << pValues[indicatorOTU];} - else { cout << "<" << (1/(float)iters); } - m->mothurOutJustToLog(toString(i) + "\tOTU" + toString(indicatorOTU+1) + "\t" + toString(maxValue) + "\t" + pValueString); - m->mothurOutEndLine(); - - } out.close(); @@ -746,11 +726,25 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ } } //********************************************************************************************************************** -vector IndicatorCommand::getValues(vector< vector >& groupings, map< vector, vector > groupingsMap){ +vector IndicatorCommand::getValues(vector< vector >& groupings, vector& indicatorGroupings, map< vector, vector > groupingsMap){ try { vector values; map< vector, vector >::iterator it; - + + indicatorGroupings.clear(); + + //create grouping strings + vector groupingsGroups; + for (int j = 0; j < groupings.size(); j++) { + string tempGrouping = ""; + for (int k = 0; k < groupings[j].size()-1; k++) { + tempGrouping += groupings[j][k]->getGroup() + "-"; + } + tempGrouping += groupings[j][groupings[j].size()-1]->getGroup(); + groupingsGroups.push_back(tempGrouping); + } + + //for each otu for (int i = 0; i < groupings[0][0]->getNumBins(); i++) { @@ -790,15 +784,17 @@ vector IndicatorCommand::getValues(vector< vector maxIndVal) { maxIndVal = thisValue; } + if (thisValue > maxIndVal) { maxIndVal = thisValue; maxGrouping = groupingsGroups[j]; } } values.push_back(maxIndVal); + indicatorGroupings.push_back(maxGrouping); } return values; @@ -810,17 +806,24 @@ vector IndicatorCommand::getValues(vector< vector IndicatorCommand::getValues(vector< vector >& groupings, map< vector, vector > groupingsMap){ +vector IndicatorCommand::getValues(vector< vector >& groupings, vector& indicatorGroupings, map< vector, vector > groupingsMap){ try { vector values; - - /*for (int j = 0; j < groupings.size(); j++) { - cout << "grouping " << j << endl; - for (int k = 0; k < groupings[j].size(); k++) { - cout << groupings[j][k]->getGroup() << endl; - } - }*/ map< vector, vector >::iterator it; + + indicatorGroupings.clear(); + + //create grouping strings + vector groupingsGroups; + for (int j = 0; j < groupings.size(); j++) { + string tempGrouping = ""; + for (int k = 0; k < groupings[j].size()-1; k++) { + tempGrouping += groupings[j][k]->getGroup() + "-"; + } + tempGrouping += groupings[j][groupings[j].size()-1]->getGroup(); + groupingsGroups.push_back(tempGrouping); + } + //for each otu for (int i = 0; i < groupings[0][0]->getNumBins(); i++) { @@ -857,15 +860,17 @@ vector IndicatorCommand::getValues(vector< vector >& } float maxIndVal = 0.0; + string maxGrouping = ""; for (int j = 0; j < terms.size(); j++) { float thisAij = (terms[j] / AijDenominator); //relative abundance float thisValue = thisAij * Bij[j] * 100.0; //save largest - if (thisValue > maxIndVal) { maxIndVal = thisValue; } + if (thisValue > maxIndVal) { maxIndVal = thisValue; maxGrouping = groupingsGroups[j]; } } values.push_back(maxIndVal); + indicatorGroupings.push_back(maxGrouping); } return values; @@ -1135,7 +1140,360 @@ int IndicatorCommand::getSharedFloat(){ m->errorOut(e, "IndicatorCommand", "getShared"); exit(1); } -} +} +//********************************************************************************************************************** +vector IndicatorCommand::driver(vector< vector >& groupings, int num, vector indicatorValues, int numIters){ + try { + vector pvalues; + pvalues.resize(indicatorValues.size(), 0); + vector notUsedGroupings; //we dont care about the grouping for the pvalues since they are randomized, but we need to pass the function something to make it work. + + for(int i=0;icontrol_pressed) { break; } + map< vector, vector > groupingsMap = randomizeGroupings(groupings, num); + vector randomIndicatorValues = getValues(groupings, notUsedGroupings, groupingsMap); + + for (int j = 0; j < indicatorValues.size(); j++) { + if (randomIndicatorValues[j] >= indicatorValues[j]) { pvalues[j]++; } + } + } + + return pvalues; + + }catch(exception& e) { + m->errorOut(e, "IndicatorCommand", "driver"); + exit(1); + } +} +//********************************************************************************************************************** +vector IndicatorCommand::getPValues(vector< vector >& groupings, int num, vector indicatorValues){ + try { + vector pvalues; + + if(processors == 1){ + pvalues = driver(groupings, num, indicatorValues, iters); + for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; } + }else{ + //divide iters between processors + vector procIters; + int numItersPerProcessor = iters / processors; + + //divide iters between processes + for (int h = 0; h < processors; h++) { + if(h == processors - 1){ numItersPerProcessor = iters - h * numItersPerProcessor; } + procIters.push_back(numItersPerProcessor); + } + + vector processIDS; + int process = 1; + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + pvalues = driver(groupings, num, indicatorValues, procIters[process]); + + //pass pvalues to parent + ofstream out; + string tempFile = toString(getpid()) + ".pvalues.temp"; + m->openOutputFile(tempFile, out); + + //pass values + for (int i = 0; i < pvalues.size(); i++) { + out << pvalues[i] << '\t'; + } + out << endl; + + out.close(); + + exit(0); + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } + } + + //do my part + pvalues = driver(groupings, num, indicatorValues, procIters[0]); + + //force parent to wait until all the processes are done + for (int i=0;iopenInputFile(tempFile, in); + + ////// to do /////////// + int numTemp; numTemp = 0; + for (int j = 0; j < pvalues.size(); j++) { + in >> numTemp; m->gobble(in); + pvalues[j] += numTemp; + } + in.close(); m->mothurRemove(tempFile); + } + for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; } +#else + + //fill in functions + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + + //Create processor worker threads. + for( int i=1; i > newGroupings; + + for (int k = 0; k < groupings.size(); k++) { + vector newLookup; + for (int l = 0; l < groupings[k].size(); l++) { + SharedRAbundFloatVector* temp = new SharedRAbundFloatVector(); + temp->setLabel(groupings[k][l]->getLabel()); + temp->setGroup(groupings[k][l]->getGroup()); + newLookup.push_back(temp); + } + newGroupings.push_back(newLookup); + } + + //for each bin + for (int l = 0; l < groupings.size(); l++) { + for (int k = 0; k < groupings[l][0]->getNumBins(); k++) { + if (m->control_pressed) { for (int j = 0; j < newGroupings.size(); j++) { for (int u = 0; u < newGroupings[j].size(); u++) { delete newGroupings[j][u]; } } return pvalues; } + + for (int j = 0; j < groupings[l].size(); j++) { newGroupings[l][j]->push_back(groupings[l][j]->getAbundance(k), groupings[l][j]->getGroup()); } + } + } + + vector copyIValues = indicatorValues; + + indicatorData* temp = new indicatorData(m, procIters[i], newGroupings, num, copyIValues); + pDataArray.push_back(temp); + processIDS.push_back(i); + + hThreadArray[i-1] = CreateThread(NULL, 0, MyIndicatorThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); + } + + //do my part + pvalues = driver(groupings, num, indicatorValues, procIters[0]); + + //Wait until all threads have terminated. + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); + + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + for (int j = 0; j < pDataArray[i]->pvalues.size(); j++) { pvalues[j] += pDataArray[i]->pvalues[j]; } + + for (int l = 0; l < pDataArray[i]->groupings.size(); l++) { + for (int j = 0; j < pDataArray[i]->groupings[l].size(); j++) { delete pDataArray[i]->groupings[l][j]; } + } + + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } + + for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; } +#endif + } + + + return pvalues; + } + catch(exception& e) { + m->errorOut(e, "IndicatorCommand", "getPValues"); + exit(1); + } +} + +//********************************************************************************************************************** +//same as above, just data type difference +vector IndicatorCommand::driver(vector< vector >& groupings, int num, vector indicatorValues, int numIters){ + try { + vector pvalues; + pvalues.resize(indicatorValues.size(), 0); + vector notUsedGroupings; //we dont care about the grouping for the pvalues since they are randomized, but we need to pass the function something to make it work. + + for(int i=0;icontrol_pressed) { break; } + map< vector, vector > groupingsMap = randomizeGroupings(groupings, num); + vector randomIndicatorValues = getValues(groupings, notUsedGroupings, groupingsMap); + + for (int j = 0; j < indicatorValues.size(); j++) { + if (randomIndicatorValues[j] >= indicatorValues[j]) { pvalues[j]++; } + } + } + + return pvalues; + + }catch(exception& e) { + m->errorOut(e, "IndicatorCommand", "driver"); + exit(1); + } +} +//********************************************************************************************************************** +//same as above, just data type difference +vector IndicatorCommand::getPValues(vector< vector >& groupings, int num, vector indicatorValues){ + try { + vector pvalues; + + if(processors == 1){ + pvalues = driver(groupings, num, indicatorValues, iters); + for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; } + }else{ + //divide iters between processors + vector procIters; + int numItersPerProcessor = iters / processors; + + //divide iters between processes + for (int h = 0; h < processors; h++) { + if(h == processors - 1){ numItersPerProcessor = iters - h * numItersPerProcessor; } + procIters.push_back(numItersPerProcessor); + } + + vector processIDS; + int process = 1; + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix) + + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + pvalues = driver(groupings, num, indicatorValues, procIters[process]); + + //pass pvalues to parent + ofstream out; + string tempFile = toString(getpid()) + ".pvalues.temp"; + m->openOutputFile(tempFile, out); + + //pass values + for (int i = 0; i < pvalues.size(); i++) { + out << pvalues[i] << '\t'; + } + out << endl; + + out.close(); + + exit(0); + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } + } + + //do my part + pvalues = driver(groupings, num, indicatorValues, procIters[0]); + + //force parent to wait until all the processes are done + for (int i=0;iopenInputFile(tempFile, in); + + ////// to do /////////// + int numTemp; numTemp = 0; + for (int j = 0; j < pvalues.size(); j++) { + in >> numTemp; m->gobble(in); + pvalues[j] += numTemp; + } + in.close(); m->mothurRemove(tempFile); + } + for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; } +#else + + //fill in functions + vector pDataArray; + DWORD dwThreadIdArray[processors-1]; + HANDLE hThreadArray[processors-1]; + + //Create processor worker threads. + for( int i=1; i > newGroupings; + + for (int k = 0; k < groupings.size(); k++) { + vector newLookup; + for (int l = 0; l < groupings[k].size(); l++) { + SharedRAbundFloatVector* temp = new SharedRAbundFloatVector(); + temp->setLabel(groupings[k][l]->getLabel()); + temp->setGroup(groupings[k][l]->getGroup()); + newLookup.push_back(temp); + } + newGroupings.push_back(newLookup); + } + + //for each bin + for (int l = 0; l < groupings.size(); l++) { + for (int k = 0; k < groupings[l][0]->getNumBins(); k++) { + if (m->control_pressed) { for (int j = 0; j < newGroupings.size(); j++) { for (int u = 0; u < newGroupings[j].size(); u++) { delete newGroupings[j][u]; } } return pvalues; } + + for (int j = 0; j < groupings[l].size(); j++) { newGroupings[l][j]->push_back((float)(groupings[l][j]->getAbundance(k)), groupings[l][j]->getGroup()); } + } + } + + vector copyIValues = indicatorValues; + + indicatorData* temp = new indicatorData(m, procIters[i], newGroupings, num, copyIValues); + pDataArray.push_back(temp); + processIDS.push_back(i); + + hThreadArray[i-1] = CreateThread(NULL, 0, MyIndicatorThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]); + } + + //do my part + pvalues = driver(groupings, num, indicatorValues, procIters[0]); + + //Wait until all threads have terminated. + WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE); + + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + for (int j = 0; j < pDataArray[i]->pvalues.size(); j++) { pvalues[j] += pDataArray[i]->pvalues[j]; } + + for (int l = 0; l < pDataArray[i]->groupings.size(); l++) { + for (int j = 0; j < pDataArray[i]->groupings[l].size(); j++) { delete pDataArray[i]->groupings[l][j]; } + } + + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } + + for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; } +#endif + } + + + return pvalues; + } + catch(exception& e) { + m->errorOut(e, "IndicatorCommand", "getPValues"); + exit(1); + } +} //********************************************************************************************************************** //swap groups between groupings, in essence randomizing the second column of the design file map< vector, vector > IndicatorCommand::randomizeGroupings(vector< vector >& groupings, int numLookupGroups){