From: westcott Date: Fri, 3 Dec 2010 19:53:27 +0000 (+0000) Subject: fixed indicator command bug X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=1916dd65829d6bb5b8bef74eddc61ea38fccf63a fixed indicator command bug --- diff --git a/catchallcommand.cpp b/catchallcommand.cpp index 8f28413..efe99b0 100644 --- a/catchallcommand.cpp +++ b/catchallcommand.cpp @@ -28,6 +28,7 @@ CatchAllCommand::CatchAllCommand(){ //initialize outputTypes vector tempOutNames; outputTypes["csv"] = tempOutNames; + outputTypes["summary"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "CatchAllCommand", "CatchAllCommand"); @@ -86,6 +87,7 @@ CatchAllCommand::CatchAllCommand(string option) { //initialize outputTypes vector tempOutNames; outputTypes["csv"] = tempOutNames; + outputTypes["summary"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); @@ -203,6 +205,7 @@ int CatchAllCommand::execute() { outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv"); outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv"); + createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel()); if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str()); } delete read; delete input; globaldata->ginput = NULL; delete sabund; return 0; } @@ -238,6 +241,8 @@ int CatchAllCommand::execute() { outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv"); outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv"); + createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel()); + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str()); } delete read; delete input; globaldata->ginput = NULL; delete sabund; return 0; } processedLabels.insert(sabund->getLabel()); @@ -291,7 +296,9 @@ int CatchAllCommand::execute() { outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv"); outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv"); outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv"); - outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv"); + outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv"); + + createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel()); delete sabund; } @@ -340,6 +347,55 @@ string CatchAllCommand::process(SAbundVector* sabund) { exit(1); } } +//********************************************************************************************************************** +string CatchAllCommand::createSummaryFile(string file1, string label) { + try { + string filename = outputDir + m->getRootName(m->getSimpleName(sabundfile)) + label + ".catchall.summary"; + filename = m->getFullPathName(filename); + outputNames.push_back(filename); outputTypes["summary"].push_back(filename); + + ofstream out; + m->openOutputFile(filename, out); + + out << "group\tmodel\testimate\tlci\tuci" << endl; + + ifstream in; + m->openInputFile(file1, in); + + if (!in.eof()) { + + string header = m->getline(in); m->gobble(in); + + int pos = header.find("Total Number of Observed Species ="); + cout << pos << '\t' << header.length() << endl; exit(1); + if (pos == string::npos) { m->mothurOut("[ERROR]: cannot parse " + file1); m->mothurOutEndLine(); } + else { + //pos will be the position of the T in total, so we want to count to the position of = + pos += 34; + char c=header[pos]; + string numString = ""; + while (c != ','){ + if (c != ' ') { + numString += c; + } + } + + int numOtus; convert(numString, numOtus); + cout << numOtus << endl; + } + } + + in.close(); + out.close(); + + return filename; + + } + catch(exception& e) { + m->errorOut(e, "CatchAllCommand", "createSummaryFile"); + exit(1); + } +} /**************************************************************************************/ diff --git a/catchallcommand.h b/catchallcommand.h index d59855b..8795cd7 100644 --- a/catchallcommand.h +++ b/catchallcommand.h @@ -48,6 +48,7 @@ private: map< string, vector > outputTypes; string process(SAbundVector*); + string createSummaryFile(string, string); }; /****************************************************************************/ diff --git a/getoturepcommand.cpp b/getoturepcommand.cpp index 0edaf3d..818775f 100644 --- a/getoturepcommand.cpp +++ b/getoturepcommand.cpp @@ -52,7 +52,7 @@ GetOTURepCommand::GetOTURepCommand(){ //********************************************************************************************************************** vector GetOTURepCommand::getValidParameters(){ try { - string Array[] = {"fasta","list","label","name", "group", "sorted", "phylip","column","large","cutoff","precision","groups","outputdir","inputdir"}; + string Array[] = {"fasta","list","label","name", "group", "weighted","sorted", "phylip","column","large","cutoff","precision","groups","outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); return myArray; } @@ -97,7 +97,7 @@ GetOTURepCommand::GetOTURepCommand(string option) { help(); abort = true; } else { //valid paramters for this command - string Array[] = {"fasta","list","label","name", "group", "sorted", "phylip","column","large","cutoff","precision","groups","outputdir","inputdir"}; + string Array[] = {"fasta","list","label","name","weighted", "group", "sorted", "phylip","column","large","cutoff","precision","groups","outputdir","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -241,6 +241,11 @@ GetOTURepCommand::GetOTURepCommand(string option) { string temp = validParameter.validFile(parameters, "large", false); if (temp == "not found") { temp = "F"; } large = m->isTrue(temp); + temp = validParameter.validFile(parameters, "weighted", false); if (temp == "not found") { if (namefile == "") { temp = "F"; } else { temp = "t"; } } + weighted = m->isTrue(temp); + + if ((weighted) && (namefile == "")) { m->mothurOut("You cannot set weighted to true unless you provide a namesfile."); m->mothurOutEndLine(); abort = true; } + temp = validParameter.validFile(parameters, "precision", false); if (temp == "not found") { temp = "100"; } convert(temp, precision); @@ -259,7 +264,7 @@ GetOTURepCommand::GetOTURepCommand(string option) { void GetOTURepCommand::help(){ try { - m->mothurOut("The get.oturep command parameters are phylip, column, list, fasta, name, group, large, cutoff, precision, groups, sorted and label. The fasta and list parameters are required, as well as phylip or column and name.\n"); + m->mothurOut("The get.oturep command parameters are phylip, column, list, fasta, name, group, large, weighted, cutoff, precision, groups, sorted and label. The fasta and list parameters are required, as well as phylip or column and name.\n"); m->mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n"); m->mothurOut("The phylip or column parameter is required, but only one may be used. If you use a column file the name filename is required. \n"); m->mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n"); @@ -268,6 +273,11 @@ void GetOTURepCommand::help(){ m->mothurOut("The default value for label is all labels in your inputfile.\n"); m->mothurOut("The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n"); m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM. The default value is false.\n"); + m->mothurOut("The weighted parameter allows you to indicate that want to find the weighted representative. You must provide a namesfile to set weighted to true. The default value is false with no namesfile and true when a name file is provided.\n"); + m->mothurOut("The representative is found by selecting the sequence that has the smallest total distance to all other sequences in the OTU. If a tie occurs the smallest average distance is used.\n"); + m->mothurOut("For weighted = false, mothur assumes the distance file contains only unique sequences, the list file may contain all sequences, but only the uniques are considered to become the representative. If your distance file contains all the sequences it would become weighted=true.\n"); + m->mothurOut("For weighted = true, mothur assumes the distance file contains only unique sequences, the list file must contain all sequences, all sequences are considered to become the representative, but unique name will be used in the output for consistency.\n"); + m->mothurOut("If your distance file contains all the sequence and you do not provide a name file, the weighted representative will be given, unless your listfile is unique. If you provide a namefile, then you can select weighted or unweighted.\n"); m->mothurOut("The group parameter allows you provide a group file.\n"); m->mothurOut("The groups parameter allows you to indicate that you want representative sequences for each group specified for each OTU, group name should be separated by dashes. ex. groups=A-B-C.\n"); m->mothurOut("The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n"); @@ -324,6 +334,9 @@ int GetOTURepCommand::execute(){ if (m->control_pressed) { delete readMatrix; return 0; } seqVec[currentCell->row][currentCell->column] = currentCell->dist; } + //add dummy map for unweighted calc + SeqMap dummy; + seqVec.push_back(dummy); delete matrix; delete readMatrix; @@ -356,6 +369,7 @@ int GetOTURepCommand::execute(){ //positions in file where the distances for each sequence begin //rowPositions[1] = position in file where distance related to sequence 1 start. rowPositions = formatMatrix->getRowPositions(); + rowPositions.push_back(-1); //dummy row for unweighted calc delete formatMatrix; delete nameMap; @@ -422,7 +436,9 @@ int GetOTURepCommand::execute(){ if (large) { inRow.close(); remove(distFile.c_str()); } delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; } - + + if (!weighted) { readNamesFile(weighted); } + while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { if (allLines == 1 || labels.count(list->getLabel()) == 1){ @@ -506,6 +522,8 @@ int GetOTURepCommand::execute(){ delete input; globaldata->ginput = NULL; delete read; + if (!weighted) { nameFileMap.clear(); } + //read fastafile fasta = new FastaMap(); fasta->readFastaFile(fastafile); @@ -573,20 +591,84 @@ void GetOTURepCommand::readNamesFile() { } } //********************************************************************************************************************** +//read names file to find the weighted rep for each bin +void GetOTURepCommand::readNamesFile(bool w) { + try { + vector dupNames; + m->openInputFile(namefile, inNames); + + string name, names, sequence; + + while(inNames){ + inNames >> name; m->gobble(inNames); //read from first column A + inNames >> names; //read from second column A,B,C,D + + dupNames.clear(); + + //parse names into vector + m->splitAtComma(names, dupNames); + + for (int i = 0; i < dupNames.size(); i++) { + nameFileMap[dupNames[i]] = name; + } + + m->gobble(inNames); + } + inNames.close(); + + } + catch(exception& e) { + m->errorOut(e, "GetOTURepCommand", "readNamesFile"); + exit(1); + } +} +//********************************************************************************************************************** string GetOTURepCommand::findRep(vector names) { try{ // if only 1 sequence in bin or processing the "unique" label, then // the first sequence of the OTU is the representative one - if ((names.size() == 2) || (names.size() == 1) || (list->getLabel() == "unique")) { + if ((names.size() == 1) || (list->getLabel() == "unique")) { return names[0]; }else{ vector seqIndex(names.size()); vector max_dist(names.size()); vector total_dist(names.size()); + map::iterator itNameFile; + map::iterator itNameIndex; //fill seqIndex and initialize sums for (size_t i = 0; i < names.size(); i++) { - seqIndex[i] = nameToIndex[names[i]]; + if (weighted) { + seqIndex[i] = nameToIndex[names[i]]; + }else { + if (namefile == "") { + itNameIndex = nameToIndex.find(names[i]); + + if (itNameIndex == nameToIndex.end()) { // you are not in the distance file and no namesfile, then assume you are not unique + if (large) { seqIndex[i] = (rowPositions.size()-1); } + else { seqIndex[i] = (seqVec.size()-1); } + }else { + seqIndex[i] = itNameIndex->second; + } + + }else { + itNameFile = nameFileMap.find(names[i]); + + if (itNameFile == nameFileMap.end()) { + m->mothurOut("[ERROR]: " + names[i] + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; + }else{ + string name1 = itNameFile->first; + string name2 = itNameFile->second; + + if (name1 == name2) { //then you are unique so add your real dists + seqIndex[i] = nameToIndex[names[i]]; + }else { //add dummy + if (large) { seqIndex[i] = (rowPositions.size()-1); } + else { seqIndex[i] = (seqVec.size()-1); } + } + } + } + } max_dist[i] = 0.0; total_dist[i] = 0.0; } diff --git a/getoturepcommand.h b/getoturepcommand.h index 874e43c..7a17c9d 100644 --- a/getoturepcommand.h +++ b/getoturepcommand.h @@ -61,9 +61,10 @@ private: string filename, fastafile, listfile, namefile, groupfile, label, sorted, phylipfile, columnfile, distFile, format, outputDir, groups; ofstream out; ifstream in, inNames, inRow; - bool abort, allLines, groupError, large; + bool abort, allLines, groupError, large, weighted; set labels; //holds labels to be used map nameToIndex; //maps sequence name to index in sparsematrix + map nameFileMap; vector outputNames, Groups; map outputNameFiles; float cutoff; @@ -74,6 +75,7 @@ private: map > outputTypes; void readNamesFile(); + void readNamesFile(bool); int process(ListVector*); SeqMap getMap(int); string findRep(vector); // returns the name of the "representative" sequence of given bin or subset of a bin, for groups diff --git a/indicatorcommand.cpp b/indicatorcommand.cpp index 4c9baee..21c7032 100644 --- a/indicatorcommand.cpp +++ b/indicatorcommand.cpp @@ -23,7 +23,7 @@ vector IndicatorCommand::getValidParameters(){ //********************************************************************************************************************** vector IndicatorCommand::getRequiredParameters(){ try { - string Array[] = {"label","tree"}; + string Array[] = {"tree"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); return myArray; } @@ -139,15 +139,12 @@ IndicatorCommand::IndicatorCommand(string option) { else { inputFileName = relabundfile; } groups = validParameter.validFile(parameters, "groups", false); - if (groups == "not found") { groups = ""; pickedGroups = false; } - else { - pickedGroups = true; - m->splitAtDash(groups, Groups); - globaldata->Groups = Groups; - } + if (groups == "not found") { groups = ""; Groups.push_back("all"); } + else { m->splitAtDash(groups, Groups); } + globaldata->Groups = Groups; label = validParameter.validFile(parameters, "label", false); - if (label == "not found") { label = ""; m->mothurOut("You must provide a label to process."); m->mothurOutEndLine(); abort = true; } + if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; } if ((relabundfile == "") && (sharedfile == "")) { m->mothurOut("You must provide either a shared or relabund file."); m->mothurOutEndLine(); abort = true; } @@ -165,9 +162,9 @@ IndicatorCommand::IndicatorCommand(string option) { void IndicatorCommand::help(){ try { m->mothurOut("The indicator command reads a shared or relabund file and a tree file, and outputs a .indicator.tre and .indicator.summary file. \n"); - m->mothurOut("The new tree contains labels at each internal node. The label is the OTU number of the indicator OTU.\n"); + m->mothurOut("The new tree contains labels at each internal node. The label is the node number so you can relate the tree to the summary file.\n"); m->mothurOut("The summary file lists the indicator value for each OTU for each node.\n"); - m->mothurOut("The indicator command parameters are tree, groups, shared, relabund and label. The tree and label parameter are required as well as either shared or relabund.\n"); + m->mothurOut("The indicator command parameters are tree, groups, shared, relabund and label. The tree parameter is required as well as either shared or relabund.\n"); m->mothurOut("The groups parameter allows you to specify which of the groups in your shared or relabund you would like analyzed. The groups may be entered separated by dashes.\n"); m->mothurOut("The label parameter indicates at what distance your tree relates to the shared or relabund.\n"); m->mothurOut("The indicator command should be used in the following format: indicator(tree=test.tre, shared=test.shared, label=0.03)\n"); @@ -252,17 +249,13 @@ int IndicatorCommand::execute(){ /***************************************************/ Tree* outputTree = new Tree(globaldata->Groups.size()); - if (pickedGroups) { - outputTree->getSubTree(T[0], globaldata->Groups); - outputTree->assembleTree(); - }else{ - outputTree->getCopy(T[0]); - outputTree->assembleTree(); - } - + outputTree->getSubTree(T[0], globaldata->Groups); + outputTree->assembleTree(); + //no longer need original tree, we have output tree to use and label for (int i = 0; i < T.size(); i++) { delete T[i]; } globaldata->gTree.clear(); + if (m->control_pressed) { if (sharedfile != "") { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } else { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } } @@ -299,6 +292,7 @@ int IndicatorCommand::execute(){ //report all otu values to file int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ try { + string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(inputFileName); } string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)) + "indicator.summary"; @@ -306,6 +300,7 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ ofstream out; m->openOutputFile(outputFileName, out); + out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); out << "Node\tOTU#\tIndVal" << endl; string treeOutputDir = outputDir; @@ -351,8 +346,25 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ //you are valid if you are not one of my descendants //AND your distToLeaf is <= mine //AND your distToLeaf is >= my smallest childs - //AND you were not added as part of a larger grouping + //AND you were not added as part of a larger groupings + set groupsAlreadyAdded; + //create a grouping with my grouping + vector subset; + int count = 0; + int doneCount = nodeToDescendants[i].size(); + for (int k = 0; k < lookup.size(); k++) { + //is this descendant of i + if ((nodeToDescendants[i].count(lookup[k]->getGroup()) != 0)) { + subset.push_back(lookup[k]); + groupsAlreadyAdded.insert(lookup[k]->getGroup()); + count++; + } + if (count == doneCount) { break; } //quit once you get the rabunds for this grouping + } + if (subset.size() != 0) { groupings.push_back(subset); } + + for (int j = (T->getNumNodes()-1); j >= 0; j--) { if ((descendantNodes[i].count(j) == 0) && (distToLeaf[j] <= distToLeaf[i]) && ((distToLeaf[j] >= distToLeaf[T->tree[i].getLChild()]) || (distToLeaf[j] >= distToLeaf[T->tree[i].getRChild()]))) { vector subset; @@ -373,7 +385,10 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ } } - if (groupsAlreadyAdded.size() != lookup.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); } + if (groupsAlreadyAdded.size() != lookup.size()) { cout << i << '\t' << groupsAlreadyAdded.size() << '\t' << lookup.size() << endl; m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); } + for (int k = 0; k < lookup.size(); k++) { + if (groupsAlreadyAdded.count(lookup[k]->getGroup()) == 0) { cout << lookup[k]->getGroup() << endl; } + } indicatorValues = getValues(groupings); @@ -385,7 +400,23 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ //AND your distToLeaf is <= mine //AND your distToLeaf is >= my smallest childs //AND you were not added as part of a larger grouping + set groupsAlreadyAdded; + //create a grouping with my grouping + vector subset; + int count = 0; + int doneCount = nodeToDescendants[i].size(); + for (int k = 0; k < lookupFloat.size(); k++) { + //is this descendant of i + if ((nodeToDescendants[i].count(lookupFloat[k]->getGroup()) != 0)) { + subset.push_back(lookupFloat[k]); + groupsAlreadyAdded.insert(lookupFloat[k]->getGroup()); + count++; + } + if (count == doneCount) { break; } //quit once you get the rabunds for this grouping + } + if (subset.size() != 0) { groupings.push_back(subset); } + for (int j = (T->getNumNodes()-1); j >= 0; j--) { if ((descendantNodes[i].count(j) == 0) && (distToLeaf[j] <= distToLeaf[i]) && ((distToLeaf[j] >= distToLeaf[T->tree[i].getLChild()]) || (distToLeaf[j] >= distToLeaf[T->tree[i].getRChild()]))) { vector subset; @@ -416,28 +447,15 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){ /******************************************************/ //output indicator values to table form + label tree // /*****************************************************/ - vector indicatorOTUs; - float largestValue = 0.0; for (int j = 0; j < indicatorValues.size(); j++) { if (m->control_pressed) { out.close(); return 0; } out << (i+1) << '\t' << (j+1) << '\t' << indicatorValues[j] << endl; - - //show no favortism - if (indicatorValues[j] > largestValue) { - largestValue = indicatorValues[j]; - indicatorOTUs.clear(); - indicatorOTUs.push_back(j+1); - }else if (indicatorValues[j] == largestValue) { - indicatorOTUs.push_back(j+1); - } - - random_shuffle(indicatorOTUs.begin(), indicatorOTUs.end()); - - T->tree[i].setLabel(indicatorOTUs[0]); } + T->tree[i].setLabel((i+1)); + } out.close(); @@ -580,26 +598,33 @@ map IndicatorCommand::getLengthToLeaf(Tree*& T){ if (lc == -1) { // you are a leaf //if you are a leaf set you priliminary length to 1.0, this may adjust later T->tree[i].setBranchLength(1.0); - dists[i] = 0.0; + dists[i] = 1.0; }else{ // you are an internal node //look at your children's length to leaf float ldist = dists[lc]; float rdist = dists[rc]; - float greater; - if (rdist > greater) { greater = rdist; } - else { greater = ldist; } + float greater = ldist; + if (rdist > greater) { greater = rdist; dists[i] = ldist; } + else { dists[i] = rdist; } //branch length = difference + 1 T->tree[lc].setBranchLength((abs(ldist-greater) + 1.0)); T->tree[rc].setBranchLength((abs(rdist-greater) + 1.0)); - - dists[i] = dists[lc] + (abs(ldist-greater) + 1.0); } }else{ - if (lc == -1) { dists[i] = 0.0; } - else { dists[i] = dists[lc] + T->tree[lc].getBranchLength(); } + if (lc == -1) { dists[i] = T->tree[i].getBranchLength(); } + else { //smaller of my two children distances plus my branch length + //look at your children's length to leaf + float ldist = dists[lc]; + float rdist = dists[rc]; + + float smaller = ldist; + if (rdist < smaller) { smaller = rdist; } + + dists[i] = smaller + T->tree[i].getBranchLength(); + } } } @@ -634,6 +659,8 @@ set IndicatorCommand::getDescendantList(Tree*& T, int i, map::iterator itNum = nodes[rc].begin(); itNum != nodes[rc].end(); itNum++) { nodes[i].insert(*itNum); } + //you are your own descendant + nodes[i].insert(i); } return names; @@ -650,6 +677,8 @@ int IndicatorCommand::getShared(){ lookup = input->getSharedRAbundVectors(); string lastLabel = lookup[0]->getLabel(); + if (label == "") { label = lastLabel; delete input; return 0; } + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set labels; labels.insert(label); set processedLabels; @@ -724,6 +753,8 @@ int IndicatorCommand::getSharedFloat(){ lookupFloat = input->getSharedRAbundFloatVectors(); string lastLabel = lookupFloat[0]->getLabel(); + if (label == "") { label = lastLabel; delete input; return 0; } + //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. set labels; labels.insert(label); set processedLabels; diff --git a/indicatorcommand.h b/indicatorcommand.h index 0d924fd..1a41ffd 100644 --- a/indicatorcommand.h +++ b/indicatorcommand.h @@ -35,7 +35,7 @@ private: ReadTree* read; TreeMap* treeMap; string treefile, sharedfile, relabundfile, groups, label, inputFileName, outputDir; - bool abort, pickedGroups; + bool abort; vector outputNames, Groups; map > outputTypes; vector lookup; diff --git a/mothur b/mothur index 39270c0..a9ab78c 100755 Binary files a/mothur and b/mothur differ diff --git a/summarycommand.cpp b/summarycommand.cpp index 1e0e8db..e67659f 100644 --- a/summarycommand.cpp +++ b/summarycommand.cpp @@ -550,6 +550,7 @@ string SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector< files[outputNames[i]] = thisFilesLines; temp.close(); + remove(outputNames[i].c_str()); } //output label line to new file diff --git a/tree.cpp b/tree.cpp index 2a22cf3..114b4de 100644 --- a/tree.cpp +++ b/tree.cpp @@ -270,7 +270,7 @@ void Tree::getSubTree(Tree* copy, vector Groups) { //initialize leaf nodes if (i <= (numLeaves-1)) { tree[i].setName(Groups[i]); - + //save group info string group = globaldata->gTreemap->getGroup(Groups[i]); vector tempGroups; tempGroups.push_back(group); @@ -301,6 +301,7 @@ void Tree::getSubTree(Tree* copy, vector Groups) { int parent = copy->tree[i].getParent(); if (parent != -1) { + if (m->inUsersGroups(copy->tree[i].getName(), Groups)) { //find my siblings name int parentRC = copy->tree[parent].getRChild(); @@ -361,7 +362,7 @@ void Tree::getSubTree(Tree* copy, vector Groups) { //so set our parents sib to our great-grandparent int parent = copy->tree[i].getParent(); int grandparent = copy->tree[parent].getParent(); - + int parentsSibIndex; if (grandparent != -1) { int greatgrandparent = copy->tree[grandparent].getParent(); int greatgrandparentLC = copy->tree[greatgrandparent].getLChild(); @@ -370,8 +371,8 @@ void Tree::getSubTree(Tree* copy, vector Groups) { int grandparentLC = copy->tree[grandparent].getLChild(); int grandparentRC = copy->tree[grandparent].getRChild(); - int parentsSibIndex = grandparentLC; - if (grandparentRC == parent) { parentsSibIndex = grandparentLC; } + parentsSibIndex = grandparentLC; + if (grandparentLC == parent) { parentsSibIndex = grandparentRC; } //whichever of my greatgrandparents children was my grandparent if (greatgrandparentLC == grandparent) { greatgrandparentLC = parentsSibIndex; } @@ -401,7 +402,6 @@ void Tree::getSubTree(Tree* copy, vector Groups) { int nextSpot = numLeaves; populateNewTree(copy->tree, root, nextSpot); - } catch(exception& e) { m->errorOut(e, "Tree", "getCopy"); @@ -415,17 +415,15 @@ int Tree::populateNewTree(vector& oldtree, int node, int& index) { if (oldtree[node].getLChild() != -1) { int rc = populateNewTree(oldtree, oldtree[node].getLChild(), index); int lc = populateNewTree(oldtree, oldtree[node].getRChild(), index); - + tree[index].setChildren(lc, rc); - index++; + tree[rc].setParent(index); + tree[lc].setParent(index); - return (index-1); + return (index++); }else { //you are a leaf int indexInNewTree = globaldata->gTreemap->getIndex(oldtree[node].getName()); - - tree[indexInNewTree].setParent(index); return indexInNewTree; - } } catch(exception& e) { @@ -895,7 +893,69 @@ try { exit(1); } } - +/*****************************************************************/ +void Tree::printBranch(int node, ostream& out, string mode, vector& theseNodes) { + try { + + // you are not a leaf + if (theseNodes[node].getLChild() != -1) { + out << "("; + printBranch(theseNodes[node].getLChild(), out, mode); + out << ","; + printBranch(theseNodes[node].getRChild(), out, mode); + out << ")"; + if (mode == "branch") { + //if there is a branch length then print it + if (theseNodes[node].getBranchLength() != -1) { + out << ":" << theseNodes[node].getBranchLength(); + } + }else if (mode == "boot") { + //if there is a label then print it + if (theseNodes[node].getLabel() != -1) { + out << theseNodes[node].getLabel(); + } + }else if (mode == "both") { + if (theseNodes[node].getLabel() != -1) { + out << theseNodes[node].getLabel(); + } + //if there is a branch length then print it + if (theseNodes[node].getBranchLength() != -1) { + out << ":" << theseNodes[node].getBranchLength(); + } + } + }else { //you are a leaf + string leafGroup = globaldata->gTreemap->getGroup(theseNodes[node].getName()); + + if (mode == "branch") { + out << leafGroup; + //if there is a branch length then print it + if (theseNodes[node].getBranchLength() != -1) { + out << ":" << theseNodes[node].getBranchLength(); + } + }else if (mode == "boot") { + out << leafGroup; + //if there is a label then print it + if (theseNodes[node].getLabel() != -1) { + out << theseNodes[node].getLabel(); + } + }else if (mode == "both") { + out << theseNodes[node].getName(); + if (theseNodes[node].getLabel() != -1) { + out << theseNodes[node].getLabel(); + } + //if there is a branch length then print it + if (theseNodes[node].getBranchLength() != -1) { + out << ":" << theseNodes[node].getBranchLength(); + } + } + } + + } + catch(exception& e) { + m->errorOut(e, "Tree", "printBranch"); + exit(1); + } +} /*****************************************************************/ void Tree::printTree() { diff --git a/tree.h b/tree.h index 102d565..0b316c3 100644 --- a/tree.h +++ b/tree.h @@ -68,6 +68,7 @@ private: //only takes names from the first tree in the tree file and assumes that all trees use the same names. int readTreeString(ifstream&); int populateNewTree(vector&, int, int&); + void printBranch(int, ostream&, string, vector&); MothurOut* m;