X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=unifracweightedcommand.cpp;h=13d10fd5f4d6c690777fcc4730f06fea8832341f;hb=8e67e9de1b200106bea5a468ac02125954656499;hp=e596db230ee24d80597cfa903a49ed9ee6a7f25b;hpb=03dca3b32a903c3f29fbcf5b410b19d6ab6dae63;p=mothur.git diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp index e596db2..13d10fd 100644 --- a/unifracweightedcommand.cpp +++ b/unifracweightedcommand.cpp @@ -10,6 +10,7 @@ #include "unifracweightedcommand.h" #include "consensus.h" #include "subsample.h" +#include "treereader.h" //********************************************************************************************************************** vector UnifracWeightedCommand::setParameters(){ @@ -63,6 +64,30 @@ string UnifracWeightedCommand::getHelpString(){ } } //********************************************************************************************************************** +string UnifracWeightedCommand::getOutputFileNameTag(string type, string inputName=""){ + try { + string outputFileName = ""; + map >::iterator it; + + //is this a type this command creates + it = outputTypes.find(type); + if (it == outputTypes.end()) { m->mothurOut("[ERROR]: this command doesn't create a " + type + " output file.\n"); } + else { + if (type == "weighted") { outputFileName = "weighted"; } + else if (type == "wsummary") { outputFileName = "wsummary"; } + else if (type == "phylip") { outputFileName = "dist"; } + else if (type == "column") { outputFileName = "dist"; } + else if (type == "tree") { outputFileName = "tre"; } + else { m->mothurOut("[ERROR]: No definition for type " + type + " output file tag.\n"); m->control_pressed = true; } + } + return outputFileName; + } + catch(exception& e) { + m->errorOut(e, "UnifracWeightedCommand", "getOutputFileNameTag"); + exit(1); + } +} +//********************************************************************************************************************** UnifracWeightedCommand::UnifracWeightedCommand(){ try { abort = true; calledHelp = true; @@ -141,12 +166,6 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) { } } - m->runParse = true; - m->clearGroups(); - m->clearAllGroups(); - m->Treenames.clear(); - m->names.clear(); - //check for required parameters treefile = validParameter.validFile(parameters, "tree", true); if (treefile == "not open") { treefile = ""; abort = true; } @@ -238,9 +257,16 @@ int UnifracWeightedCommand::execute() { m->setTreeFile(treefile); - readTrees(); if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; } + TreeReader* reader = new TreeReader(treefile, groupfile, namefile); + T = reader->getTrees(); + tmap = T[0]->getTreeMap(); + map nameMap = reader->getNames(); + map unique2Dup = reader->getNameMap(); + delete reader; + + if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; } - sumFile = outputDir + m->getSimpleName(treefile) + ".wsummary"; + sumFile = outputDir + m->getSimpleName(treefile) + getOutputFileNameTag("wsummary"); m->openOutputFile(sumFile, outSum); outputNames.push_back(sumFile); outputTypes["wsummary"].push_back(sumFile); @@ -253,7 +279,7 @@ int UnifracWeightedCommand::execute() { if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } return 0; } - Weighted weighted(tmap, includeRoot); + Weighted weighted(includeRoot); int start = time(NULL); @@ -304,9 +330,9 @@ int UnifracWeightedCommand::execute() { vector randomData; randomData.resize(numComp,0); //weighted score info for random trees. data[0] = weightedscore AB, data[1] = weightedscore AC... if (random) { - output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted", itersString); - outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted"); - outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted"); + output = new ColumnFile(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("weighted"), itersString); + outputNames.push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("weighted")); + outputTypes["weighted"].push_back(outputDir + m->getSimpleName(treefile) + toString(i+1) + "." + getOutputFileNameTag("weighted")); } userData = weighted.getValues(T[i], processors, outputDir); //userData[0] = weightedscore @@ -333,16 +359,20 @@ int UnifracWeightedCommand::execute() { if (m->control_pressed) { break; } - //copy to preserve old one - would do this in subsample but tree needs it and memory cleanup becomes messy. + //copy to preserve old one - would do this in subsample but memory cleanup becomes messy. TreeMap* newTmap = new TreeMap(); - newTmap->getCopy(tmap); + //newTmap->getCopy(*tmap); + //SubSample sample; + //Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize); + + //uses method of setting groups to doNotIncludeMe SubSample sample; - Tree* subSampleTree = sample.getSample(T[i], newTmap, nameMap, subsampleSize); - + Tree* subSampleTree = sample.getSample(T[i], tmap, newTmap, subsampleSize, unique2Dup); + //call new weighted function vector iterData; iterData.resize(numComp,0); - Weighted thisWeighted(newTmap, includeRoot); + Weighted thisWeighted(includeRoot); iterData = thisWeighted.getValues(subSampleTree, processors, outputDir); //userData[0] = weightedscore //save data to make ave dist, std dist @@ -350,6 +380,8 @@ int UnifracWeightedCommand::execute() { delete newTmap; delete subSampleTree; + + if((thisIter+1) % 100 == 0){ m->mothurOut(toString(thisIter+1)); m->mothurOutEndLine(); } } if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (random) { delete output; } outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } @@ -453,15 +485,15 @@ int UnifracWeightedCommand::getAverageSTDMatrices(vector< vector >& dist } } - string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.ave.dist"; - outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); - + string aveFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.ave." + getOutputFileNameTag("phylip"); + if (outputForm != "column") { outputNames.push_back(aveFileName); outputTypes["phylip"].push_back(aveFileName); } + else { outputNames.push_back(aveFileName); outputTypes["column"].push_back(aveFileName); } ofstream out; m->openOutputFile(aveFileName, out); - string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.std.dist"; - outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); - + string stdFileName = outputDir + m->getSimpleName(treefile) + toString(treeNum+1) + ".weighted.std." + getOutputFileNameTag("phylip"); + if (outputForm != "column") { outputNames.push_back(stdFileName); outputTypes["phylip"].push_back(stdFileName); } + else { outputNames.push_back(stdFileName); outputTypes["column"].push_back(stdFileName); } ofstream outStd; m->openOutputFile(stdFileName, outStd); @@ -525,8 +557,8 @@ int UnifracWeightedCommand::getConsensusTrees(vector< vector >& dists, i m->runParse = false; //create treemap class from groupmap for tree class to use - TreeMap* newTmap = new TreeMap(); - newTmap->makeSim(m->getGroups()); + TreeMap newTmap; + newTmap.makeSim(m->getGroups()); //clear old tree names if any m->Treenames.clear(); @@ -536,20 +568,19 @@ int UnifracWeightedCommand::getConsensusTrees(vector< vector >& dists, i vector newTrees = buildTrees(dists, treeNum, newTmap); //also creates .all.tre file containing the trees created - if (m->control_pressed) { delete newTmap; return 0; } + if (m->control_pressed) { return 0; } Consensus con; - Tree* conTree = con.getTree(newTrees, newTmap); + Tree* conTree = con.getTree(newTrees); //create a new filename - string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons.tre"; + string conFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.cons." + getOutputFileNameTag("tree"); outputNames.push_back(conFile); outputTypes["tree"].push_back(conFile); ofstream outTree; m->openOutputFile(conFile, outTree); if (conTree != NULL) { conTree->print(outTree, "boot"); delete conTree; } outTree.close(); - delete newTmap; return 0; @@ -561,13 +592,13 @@ int UnifracWeightedCommand::getConsensusTrees(vector< vector >& dists, i } /**************************************************************************************************/ -vector UnifracWeightedCommand::buildTrees(vector< vector >& dists, int treeNum, TreeMap* mytmap) { +vector UnifracWeightedCommand::buildTrees(vector< vector >& dists, int treeNum, TreeMap& mytmap) { try { vector trees; //create a new filename - string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.all.tre"; + string outputFile = outputDir + m->getRootName(m->getSimpleName(treefile)) + toString(treeNum+1) + ".weighted.all." + getOutputFileNameTag("tree"); outputNames.push_back(outputFile); outputTypes["tree"].push_back(outputFile); ofstream outAll; @@ -595,8 +626,9 @@ vector UnifracWeightedCommand::buildTrees(vector< vector >& dists } //create tree - Tree* tempTree = new Tree(mytmap, sims); - tempTree->assembleTree(); + Tree* tempTree = new Tree(&mytmap, sims); + map empty; + tempTree->assembleTree(empty); trees.push_back(tempTree); @@ -617,80 +649,6 @@ vector UnifracWeightedCommand::buildTrees(vector< vector >& dists } /**************************************************************************************************/ -int UnifracWeightedCommand::readTrees() { - try { - - if (groupfile != "") { - //read in group map info. - tmap = new TreeMap(groupfile); - tmap->readMap(); - }else{ //fake out by putting everyone in one group - Tree* tree = new Tree(treefile); delete tree; //extracts names from tree to make faked out groupmap - tmap = new TreeMap(); - - for (int i = 0; i < m->Treenames.size(); i++) { tmap->addSeq(m->Treenames[i], "Group1"); } - } - - if (namefile != "") { readNamesFile(); } - - read = new ReadNewickTree(treefile); - int readOk = read->read(tmap); - - if (readOk != 0) { m->mothurOut("Read Terminated."); m->mothurOutEndLine(); delete tmap; delete read; return 0; } - - read->AssembleTrees(); - T = read->getTrees(); - delete read; - - //make sure all files match - //if you provide a namefile we will use the numNames in the namefile as long as the number of unique match the tree names size. - int numNamesInTree; - if (namefile != "") { - if (numUniquesInName == m->Treenames.size()) { numNamesInTree = nameMap.size(); } - else { numNamesInTree = m->Treenames.size(); } - }else { numNamesInTree = m->Treenames.size(); } - - - //output any names that are in group file but not in tree - if (numNamesInTree < tmap->getNumSeqs()) { - for (int i = 0; i < tmap->namesOfSeqs.size(); i++) { - //is that name in the tree? - int count = 0; - for (int j = 0; j < m->Treenames.size(); j++) { - if (tmap->namesOfSeqs[i] == m->Treenames[j]) { break; } //found it - count++; - } - - if (m->control_pressed) { - delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } - for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->clearGroups(); - return 0; - } - - //then you did not find it so report it - if (count == m->Treenames.size()) { - //if it is in your namefile then don't remove - map::iterator it = nameMap.find(tmap->namesOfSeqs[i]); - - if (it == nameMap.end()) { - m->mothurOut(tmap->namesOfSeqs[i] + " is in your groupfile and not in your tree. It will be disregarded."); m->mothurOutEndLine(); - tmap->removeSeq(tmap->namesOfSeqs[i]); - i--; //need this because removeSeq removes name from namesOfSeqs - } - } - } - } - - return 0; - } - catch(exception& e) { - m->errorOut(e, "UnifracWeightedCommand", "readTrees"); - exit(1); - } -} -/**************************************************************************************************/ - int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector usersScores) { try { @@ -731,7 +689,7 @@ int UnifracWeightedCommand::runRandomCalcs(Tree* thisTree, vector usersS createProcesses(thisTree, namesOfGroupCombos, rScores); } #else - driver(T[i], namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores); + driver(thisTree, namesOfGroupCombos, 0, namesOfGroupCombos.size(), rScores); #endif if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } delete output; outSum.close(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } @@ -839,7 +797,7 @@ int UnifracWeightedCommand::driver(Tree* t, vector< vector > namesOfGrou try { Tree* randT = new Tree(tmap); - Weighted weighted(tmap, includeRoot); + Weighted weighted(includeRoot); for (int h = start; h < (start+num); h++) { @@ -951,10 +909,10 @@ void UnifracWeightedCommand::createPhylipFile() { string phylipFileName; if ((outputForm == "lt") || (outputForm == "square")) { - phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.phylip.dist"; + phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.phylip." + getOutputFileNameTag("phylip"); outputNames.push_back(phylipFileName); outputTypes["phylip"].push_back(phylipFileName); }else { //column - phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.column.dist"; + phylipFileName = outputDir + m->getSimpleName(treefile) + toString(i+1) + ".weighted.column." + getOutputFileNameTag("column"); outputNames.push_back(phylipFileName); outputTypes["column"].push_back(phylipFileName); } @@ -1079,46 +1037,6 @@ void UnifracWeightedCommand::calculateFreqsCumuls() { exit(1); } } -/*****************************************************************/ -int UnifracWeightedCommand::readNamesFile() { - try { - m->names.clear(); - numUniquesInName = 0; - - ifstream in; - m->openInputFile(namefile, in); - - string first, second; - map::iterator itNames; - - while(!in.eof()) { - in >> first >> second; m->gobble(in); - - numUniquesInName++; - - itNames = m->names.find(first); - if (itNames == m->names.end()) { - m->names[first] = second; - - //we need a list of names in your namefile to use above when removing extra seqs above so we don't remove them - vector dupNames; - m->splitAtComma(second, dupNames); - - for (int i = 0; i < dupNames.size(); i++) { - nameMap[dupNames[i]] = first; - if ((groupfile == "") && (i != 0)) { tmap->addSeq(dupNames[i], "Group1"); } - } - }else { m->mothurOut(first + " has already been seen in namefile, disregarding names file."); m->mothurOutEndLine(); in.close(); m->names.clear(); namefile = ""; return 1; } - } - in.close(); - - return 0; - } - catch(exception& e) { - m->errorOut(e, "UnifracWeightedCommand", "readNamesFile"); - exit(1); - } -} /***********************************************************/