fixed indicator command bug

author westcott <westcott>

Fri, 3 Dec 2010 19:53:27 +0000 (19:53 +0000)

committer westcott <westcott>

Fri, 3 Dec 2010 19:53:27 +0000 (19:53 +0000)
author westcott <westcott>
Fri, 3 Dec 2010 19:53:27 +0000 (19:53 +0000)
committer westcott <westcott>
Fri, 3 Dec 2010 19:53:27 +0000 (19:53 +0000)
diff --git a/catchallcommand.cpp b/catchallcommand.cpp

index 8f2841358dd4bbb91b69c2a530b6332b8fc2d131..efe99b007d6cd37e823ccca91937f4c38cd26c1b 100644 (file)
--- a/catchallcommand.cpp
+++ b/catchallcommand.cpp
@@ -28,6 +28,7 @@ CatchAllCommand::CatchAllCommand(){
                 //initialize outputTypes
                 vector<string> tempOutNames;
                 outputTypes["csv"] = tempOutNames;
+               outputTypes["summary"] = tempOutNames;
         }
         catch(exception& e) {
                 m->errorOut(e, "CatchAllCommand", "CatchAllCommand");
@@ -86,6 +87,7 @@ CatchAllCommand::CatchAllCommand(string option)  {
                         //initialize outputTypes
                         vector<string> tempOutNames;
                         outputTypes["csv"] = tempOutNames;
+                       outputTypes["summary"] = tempOutNames;
                         
                         //if the user changes the input directory command factory will send this info to us in the output parameter 
                         string inputDir = validParameter.validFile(parameters, "inputdir", false);              
@@ -203,6 +205,7 @@ int CatchAllCommand::execute() {
                                         outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
                                         outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
                                 
+                                       createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel());
                                                                                 
                                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str()); } delete read;  delete input; globaldata->ginput = NULL; delete sabund;  return 0; }
  
@@ -238,6 +241,8 @@ int CatchAllCommand::execute() {
                                         outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
                                         outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");
                                 
+                                       createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel());
+                               
                                         if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {remove(outputNames[i].c_str()); } delete read;  delete input; globaldata->ginput = NULL; delete sabund;  return 0; }
  
                                         processedLabels.insert(sabund->getLabel());
@@ -291,7 +296,9 @@ int CatchAllCommand::execute() {
                         outputNames.push_back(filename + "_Analysis.csv"); outputTypes["csv"].push_back(filename + "_Analysis.csv");
                         outputNames.push_back(filename + "_BestModelsAnalysis.csv"); outputTypes["csv"].push_back(filename + "_BestModelsAnalysis.csv");
                         outputNames.push_back(filename + "_BestModelsFits.csv"); outputTypes["csv"].push_back(filename + "_BestModelsFits.csv");
-                       outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");                        
+                       outputNames.push_back(filename + "_BubblePlot.csv"); outputTypes["csv"].push_back(filename + "_BubblePlot.csv");        
+                       
+                       createSummaryFile(filename + "_BestModelsAnalysis.csv", sabund->getLabel());
                         
                         delete sabund;
                 }
@@ -340,6 +347,55 @@ string CatchAllCommand::process(SAbundVector* sabund) {
                 exit(1);
         }
  }
+//**********************************************************************************************************************
+string CatchAllCommand::createSummaryFile(string file1, string label) {
+       try {
+               string filename = outputDir + m->getRootName(m->getSimpleName(sabundfile)) + label + ".catchall.summary";
+               filename = m->getFullPathName(filename);
+               outputNames.push_back(filename); outputTypes["summary"].push_back(filename);
+               
+               ofstream out;
+               m->openOutputFile(filename, out);
+               
+               out << "group\tmodel\testimate\tlci\tuci" << endl;
+               
+               ifstream in;
+               m->openInputFile(file1, in);
+               
+               if (!in.eof()) {
+                       
+                       string header = m->getline(in); m->gobble(in);
+                       
+                       int pos = header.find("Total Number of Observed Species =");
+                       cout << pos << '\t' << header.length() << endl; exit(1);
+                       if (pos == string::npos) { m->mothurOut("[ERROR]: cannot parse " + file1); m->mothurOutEndLine(); }
+                       else {
+                               //pos will be the position of the T in total, so we want to count to the position of =
+                               pos += 34;
+                               char c=header[pos];
+                               string numString = "";
+                               while (c != ','){
+                                       if (c != ' ') {
+                                               numString += c;
+                                       }
+                               }
+                               
+                               int numOtus; convert(numString, numOtus);
+                               cout << numOtus << endl;
+                       }
+               }
+               
+               in.close();
+               out.close();
+               
+               return filename;
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "CatchAllCommand", "createSummaryFile");
+               exit(1);
+       }
+}
  /**************************************************************************************/
  
  
diff --git a/catchallcommand.h b/catchallcommand.h

index d59855b240442d541699970cd0d75a9ef5cde452..8795cd7512284c32b96100d814e2febfecec9504 100644 (file)
--- a/catchallcommand.h
+++ b/catchallcommand.h
@@ -48,6 +48,7 @@ private:
         map< string, vector<string> > outputTypes;
         
         string process(SAbundVector*);
+       string createSummaryFile(string, string); 
  };
  
  /****************************************************************************/
diff --git a/getoturepcommand.cpp b/getoturepcommand.cpp

index 0edaf3dda753e1b590d96f07880d8dfe970da463..818775fdd7e24678545750fcfad6596efdd666e6 100644 (file)
--- a/getoturepcommand.cpp
+++ b/getoturepcommand.cpp
@@ -52,7 +52,7 @@ GetOTURepCommand::GetOTURepCommand(){
  //**********************************************************************************************************************
  vector<string> GetOTURepCommand::getValidParameters(){ 
         try {
-               string Array[] =  {"fasta","list","label","name", "group", "sorted", "phylip","column","large","cutoff","precision","groups","outputdir","inputdir"};
+               string Array[] =  {"fasta","list","label","name", "group", "weighted","sorted", "phylip","column","large","cutoff","precision","groups","outputdir","inputdir"};
                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                 return myArray;
         }
@@ -97,7 +97,7 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                         help(); abort = true;
                 } else {
                         //valid paramters for this command
-                       string Array[] =  {"fasta","list","label","name", "group", "sorted", "phylip","column","large","cutoff","precision","groups","outputdir","inputdir"};
+                       string Array[] =  {"fasta","list","label","name","weighted", "group", "sorted", "phylip","column","large","cutoff","precision","groups","outputdir","inputdir"};
                         vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                         
                         OptionParser parser(option);
@@ -241,6 +241,11 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
                         string temp = validParameter.validFile(parameters, "large", false);             if (temp == "not found") {      temp = "F";     }
                         large = m->isTrue(temp);
                         
+                       temp = validParameter.validFile(parameters, "weighted", false);         if (temp == "not found") {      if (namefile == "") { temp = "F"; } else { temp = "t"; }        }
+                       weighted = m->isTrue(temp);
+                       
+                       if ((weighted) && (namefile == "")) { m->mothurOut("You cannot set weighted to true unless you provide a namesfile."); m->mothurOutEndLine(); abort = true; }
+                       
                         temp = validParameter.validFile(parameters, "precision", false);                        if (temp == "not found") { temp = "100"; }
                         convert(temp, precision); 
                         
@@ -259,7 +264,7 @@ GetOTURepCommand::GetOTURepCommand(string option)  {
  
  void GetOTURepCommand::help(){
         try {
-               m->mothurOut("The get.oturep command parameters are phylip, column, list, fasta, name, group, large, cutoff, precision, groups, sorted and label.  The fasta and list parameters are required, as well as phylip or column and name.\n");
+               m->mothurOut("The get.oturep command parameters are phylip, column, list, fasta, name, group, large, weighted, cutoff, precision, groups, sorted and label.  The fasta and list parameters are required, as well as phylip or column and name.\n");
                 m->mothurOut("The label parameter allows you to select what distance levels you would like a output files created for, and is separated by dashes.\n");
                 m->mothurOut("The phylip or column parameter is required, but only one may be used.  If you use a column file the name filename is required. \n");
                 m->mothurOut("If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed.\n");
@@ -268,6 +273,11 @@ void GetOTURepCommand::help(){
                 m->mothurOut("The default value for label is all labels in your inputfile.\n");
                 m->mothurOut("The sorted parameter allows you to indicate you want the output sorted. You can sort by sequence name, bin number, bin size or group. The default is no sorting, but your options are name, number, size, or group.\n");
                 m->mothurOut("The large parameter allows you to indicate that your distance matrix is too large to fit in RAM.  The default value is false.\n");
+               m->mothurOut("The weighted parameter allows you to indicate that want to find the weighted representative. You must provide a namesfile to set weighted to true.  The default value is false with no namesfile and true when a name file is provided.\n");
+               m->mothurOut("The representative is found by selecting the sequence that has the smallest total distance to all other sequences in the OTU. If a tie occurs the smallest average distance is used.\n");
+               m->mothurOut("For weighted = false, mothur assumes the distance file contains only unique sequences, the list file may contain all sequences, but only the uniques are considered to become the representative. If your distance file contains all the sequences it would become weighted=true.\n");
+               m->mothurOut("For weighted = true, mothur assumes the distance file contains only unique sequences, the list file must contain all sequences, all sequences are considered to become the representative, but unique name will be used in the output for consistency.\n");
+               m->mothurOut("If your distance file contains all the sequence and you do not provide a name file, the weighted representative will be given, unless your listfile is unique. If you provide a namefile, then you can select weighted or unweighted.\n");
                 m->mothurOut("The group parameter allows you provide a group file.\n");
                 m->mothurOut("The groups parameter allows you to indicate that you want representative sequences for each group specified for each OTU, group name should be separated by dashes. ex. groups=A-B-C.\n");
                 m->mothurOut("The get.oturep command outputs a .fastarep and .rep.names file for each distance you specify, selecting one OTU representative for each bin.\n");
@@ -324,6 +334,9 @@ int GetOTURepCommand::execute(){
                                 if (m->control_pressed) { delete readMatrix; return 0; }
                                 seqVec[currentCell->row][currentCell->column] = currentCell->dist;
                         }
+                       //add dummy map for unweighted calc
+                       SeqMap dummy;
+                       seqVec.push_back(dummy);
                         
                         delete matrix;
                         delete readMatrix;
@@ -356,6 +369,7 @@ int GetOTURepCommand::execute(){
                         //positions in file where the distances for each sequence begin
                         //rowPositions[1] = position in file where distance related to sequence 1 start.
                         rowPositions = formatMatrix->getRowPositions();
+                       rowPositions.push_back(-1); //dummy row for unweighted calc
                         
                         delete formatMatrix;
                         delete nameMap;
@@ -422,7 +436,9 @@ int GetOTURepCommand::execute(){
                         if (large) {  inRow.close(); remove(distFile.c_str());  }
                         delete read; delete input; delete list; globaldata->gListVector = NULL; return 0; 
                 }
-       
+               
+               if (!weighted) { readNamesFile(weighted); }
+               
                 while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
                         
                         if (allLines == 1 || labels.count(list->getLabel()) == 1){
@@ -506,6 +522,8 @@ int GetOTURepCommand::execute(){
                 delete input;  globaldata->ginput = NULL;
                 delete read;
                 
+               if (!weighted) { nameFileMap.clear(); }
+               
                 //read fastafile
                 fasta = new FastaMap();
                 fasta->readFastaFile(fastafile);
@@ -573,20 +591,84 @@ void GetOTURepCommand::readNamesFile() {
         }
  }
  //**********************************************************************************************************************
+//read names file to find the weighted rep for each bin
+void GetOTURepCommand::readNamesFile(bool w) {
+       try {
+               vector<string> dupNames;
+               m->openInputFile(namefile, inNames);
+               
+               string name, names, sequence;
+               
+               while(inNames){
+                       inNames >> name;        m->gobble(inNames);             //read from first column  A
+                       inNames >> names;                                                       //read from second column  A,B,C,D
+                       
+                       dupNames.clear();
+                       
+                       //parse names into vector
+                       m->splitAtComma(names, dupNames);
+                       
+                       for (int i = 0; i < dupNames.size(); i++) {
+                               nameFileMap[dupNames[i]] = name;
+                       }
+                       
+                       m->gobble(inNames);
+               }
+               inNames.close();
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "GetOTURepCommand", "readNamesFile");
+               exit(1);
+       }
+}
+//**********************************************************************************************************************
  string GetOTURepCommand::findRep(vector<string> names) {
         try{
                 // if only 1 sequence in bin or processing the "unique" label, then 
                 // the first sequence of the OTU is the representative one
-               if ((names.size() == 2) || (names.size() == 1) || (list->getLabel() == "unique")) {
+               if ((names.size() == 1) || (list->getLabel() == "unique")) {
                         return names[0];
                 }else{
                         vector<int> seqIndex(names.size());
                         vector<float> max_dist(names.size());
                         vector<float> total_dist(names.size());
+                       map<string, string>::iterator itNameFile;
+                       map<string, int>::iterator itNameIndex;
  
                         //fill seqIndex and initialize sums
                         for (size_t i = 0; i < names.size(); i++) {
-                               seqIndex[i] = nameToIndex[names[i]];
+                               if (weighted) {
+                                       seqIndex[i] = nameToIndex[names[i]];
+                               }else { 
+                                       if (namefile == "") {
+                                               itNameIndex = nameToIndex.find(names[i]);
+                                               
+                                               if (itNameIndex == nameToIndex.end()) { // you are not in the distance file and no namesfile, then assume you are not unique
+                                                       if (large) {  seqIndex[i] = (rowPositions.size()-1); }
+                                                       else {  seqIndex[i] = (seqVec.size()-1); }
+                                               }else {
+                                                       seqIndex[i] = itNameIndex->second;
+                                               }
+                                               
+                                       }else {
+                                               itNameFile = nameFileMap.find(names[i]);
+                                               
+                                               if (itNameFile == nameFileMap.end()) {
+                                                       m->mothurOut("[ERROR]: " + names[i] + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; 
+                                               }else{
+                                                       string name1 = itNameFile->first;
+                                                       string name2 = itNameFile->second;
+                                                       
+                                                       if (name1 == name2) { //then you are unique so add your real dists
+                                                               seqIndex[i] = nameToIndex[names[i]];
+                                                       }else { //add dummy
+                                                               if (large) {  seqIndex[i] = (rowPositions.size()-1); }
+                                                               else {  seqIndex[i] = (seqVec.size()-1); }
+                                                       }
+                                               }
+                                       }
+                               }
                                 max_dist[i] = 0.0;
                                 total_dist[i] = 0.0;
                         }
diff --git a/getoturepcommand.h b/getoturepcommand.h

index 874e43ce5a8f45178a073b9d8e8e00fa8852deb0..7a17c9de22a4c08b8d44a6e59ea3c6f07f18430a 100644 (file)
--- a/getoturepcommand.h
+++ b/getoturepcommand.h
@@ -61,9 +61,10 @@ private:
         string filename, fastafile, listfile, namefile, groupfile, label, sorted, phylipfile, columnfile, distFile, format, outputDir, groups;
         ofstream out;
         ifstream in, inNames, inRow;
-       bool abort, allLines, groupError, large;
+       bool abort, allLines, groupError, large, weighted;
         set<string> labels; //holds labels to be used
         map<string, int> nameToIndex;  //maps sequence name to index in sparsematrix
+       map<string, string> nameFileMap;
         vector<string> outputNames, Groups;
         map<string, string> outputNameFiles;
         float cutoff;
@@ -74,6 +75,7 @@ private:
         map<string, vector<string> > outputTypes;
  
         void readNamesFile();
+       void readNamesFile(bool);
         int process(ListVector*);
         SeqMap getMap(int);
         string findRep(vector<string>);         // returns the name of the "representative" sequence of given bin or subset of a bin, for groups
diff --git a/indicatorcommand.cpp b/indicatorcommand.cpp

index 4c9baee4697e8cabbe313627196b780d8f26fec3..21c70324611794f0574eb07cd97f2d10980a8bef 100644 (file)
--- a/indicatorcommand.cpp
+++ b/indicatorcommand.cpp
@@ -23,7 +23,7 @@ vector<string> IndicatorCommand::getValidParameters(){
  //**********************************************************************************************************************
  vector<string> IndicatorCommand::getRequiredParameters(){      
         try {
-               string Array[] =  {"label","tree"};
+               string Array[] =  {"tree"};
                 vector<string> myArray (Array, Array+(sizeof(Array)/sizeof(string)));
                 return myArray;
         }
@@ -139,15 +139,12 @@ IndicatorCommand::IndicatorCommand(string option)  {
                         else { inputFileName = relabundfile; }
                         
                         groups = validParameter.validFile(parameters, "groups", false);                 
-                       if (groups == "not found") { groups = ""; pickedGroups = false; }
-                       else { 
-                               pickedGroups = true;
-                               m->splitAtDash(groups, Groups);
-                               globaldata->Groups = Groups;
-                       }                       
+                       if (groups == "not found") { groups = "";  Groups.push_back("all"); }
+                       else { m->splitAtDash(groups, Groups);  }                       
+                       globaldata->Groups = Groups;
                         
                         label = validParameter.validFile(parameters, "label", false);                   
-                       if (label == "not found") { label = ""; m->mothurOut("You must provide a label to process."); m->mothurOutEndLine(); abort = true; }    
+                       if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }  
                         
                         if ((relabundfile == "") && (sharedfile == "")) { m->mothurOut("You must provide either a shared or relabund file."); m->mothurOutEndLine(); abort = true;  }
                         
@@ -165,9 +162,9 @@ IndicatorCommand::IndicatorCommand(string option)  {
  void IndicatorCommand::help(){
         try {
                 m->mothurOut("The indicator command reads a shared or relabund file and a tree file, and outputs a .indicator.tre and .indicator.summary file. \n");
-               m->mothurOut("The new tree contains labels at each internal node.  The label is the OTU number of the indicator OTU.\n");
+               m->mothurOut("The new tree contains labels at each internal node.  The label is the node number so you can relate the tree to the summary file.\n");
                 m->mothurOut("The summary file lists the indicator value for each OTU for each node.\n");
-               m->mothurOut("The indicator command parameters are tree, groups, shared, relabund and label. The tree and label parameter are required as well as either shared or relabund.\n");
+               m->mothurOut("The indicator command parameters are tree, groups, shared, relabund and label. The tree parameter is required as well as either shared or relabund.\n");
                 m->mothurOut("The groups parameter allows you to specify which of the groups in your shared or relabund you would like analyzed.  The groups may be entered separated by dashes.\n");
                 m->mothurOut("The label parameter indicates at what distance your tree relates to the shared or relabund.\n");
                 m->mothurOut("The indicator command should be used in the following format: indicator(tree=test.tre, shared=test.shared, label=0.03)\n");
@@ -252,17 +249,13 @@ int IndicatorCommand::execute(){
                 /***************************************************/
                 Tree* outputTree = new Tree(globaldata->Groups.size()); 
                 
-               if (pickedGroups) {
-                       outputTree->getSubTree(T[0], globaldata->Groups);
-                       outputTree->assembleTree();
-               }else{
-                       outputTree->getCopy(T[0]);
-                       outputTree->assembleTree();
-               }
-               
+               outputTree->getSubTree(T[0], globaldata->Groups);
+               outputTree->assembleTree();
+                       
                 //no longer need original tree, we have output tree to use and label
                 for (int i = 0; i < T.size(); i++) {  delete T[i];  } globaldata->gTree.clear();
                 
+                               
                 if (m->control_pressed) {  
                         if (sharedfile != "") {  for (int i = 0; i < lookup.size(); i++) {  delete lookup[i];  } }
                         else { for (int i = 0; i < lookupFloat.size(); i++) {  delete lookupFloat[i];  } }
@@ -299,6 +292,7 @@ int IndicatorCommand::execute(){
  //report all otu values to file
  int IndicatorCommand::GetIndicatorSpecies(Tree*& T){
         try {
+               
                 string thisOutputDir = outputDir;
                 if (outputDir == "") {  thisOutputDir += m->hasPath(inputFileName);  }
                 string outputFileName = thisOutputDir + m->getRootName(m->getSimpleName(inputFileName)) + "indicator.summary";
@@ -306,6 +300,7 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){
                 
                 ofstream out;
                 m->openOutputFile(outputFileName, out);
+               out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
                 out << "Node\tOTU#\tIndVal" << endl;
                 
                 string treeOutputDir = outputDir;
@@ -351,8 +346,25 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){
                                 //you are valid if you are not one of my descendants
                                 //AND your distToLeaf is <= mine
                                 //AND your distToLeaf is >= my smallest childs
-                               //AND you were not added as part of a larger grouping
+                               //AND you were not added as part of a larger groupings
+                               
                                 set<string> groupsAlreadyAdded;
+                               //create a grouping with my grouping
+                               vector<SharedRAbundVector*> subset;
+                               int count = 0;
+                               int doneCount = nodeToDescendants[i].size();
+                               for (int k = 0; k < lookup.size(); k++) {
+                                       //is this descendant of i
+                                       if ((nodeToDescendants[i].count(lookup[k]->getGroup()) != 0)) { 
+                                               subset.push_back(lookup[k]);
+                                               groupsAlreadyAdded.insert(lookup[k]->getGroup());
+                                               count++;
+                                       }
+                                       if (count == doneCount) { break; } //quit once you get the rabunds for this grouping
+                               }
+                               if (subset.size() != 0) { groupings.push_back(subset); }
+                               
+                               
                                 for (int j = (T->getNumNodes()-1); j >= 0; j--) {
                                         if ((descendantNodes[i].count(j) == 0) && (distToLeaf[j] <= distToLeaf[i]) && ((distToLeaf[j] >= distToLeaf[T->tree[i].getLChild()]) || (distToLeaf[j] >= distToLeaf[T->tree[i].getRChild()]))) {
                                                 vector<SharedRAbundVector*> subset;
@@ -373,7 +385,10 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){
                                         }
                                 }
                                 
-                               if (groupsAlreadyAdded.size() != lookup.size()) { m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); }
+                               if (groupsAlreadyAdded.size() != lookup.size()) { cout << i << '\t' << groupsAlreadyAdded.size() << '\t' << lookup.size() << endl; m->mothurOut("[ERROR]: could not make proper groupings."); m->mothurOutEndLine(); }
+                               for (int k = 0; k < lookup.size(); k++) {
+                                       if (groupsAlreadyAdded.count(lookup[k]->getGroup()) == 0) { cout << lookup[k]->getGroup() << endl; }
+                               }
                                 
                                 indicatorValues = getValues(groupings);
                                 
@@ -385,7 +400,23 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){
                                 //AND your distToLeaf is <= mine
                                 //AND your distToLeaf is >= my smallest childs
                                 //AND you were not added as part of a larger grouping
+                               
                                 set<string> groupsAlreadyAdded;
+                               //create a grouping with my grouping
+                               vector<SharedRAbundFloatVector*> subset;
+                               int count = 0;
+                               int doneCount = nodeToDescendants[i].size();
+                               for (int k = 0; k < lookupFloat.size(); k++) {
+                                       //is this descendant of i
+                                       if ((nodeToDescendants[i].count(lookupFloat[k]->getGroup()) != 0)) { 
+                                               subset.push_back(lookupFloat[k]);
+                                               groupsAlreadyAdded.insert(lookupFloat[k]->getGroup());
+                                               count++;
+                                       }
+                                       if (count == doneCount) { break; } //quit once you get the rabunds for this grouping
+                               }
+                               if (subset.size() != 0) { groupings.push_back(subset); }
+                               
                                 for (int j = (T->getNumNodes()-1); j >= 0; j--) {
                                         if ((descendantNodes[i].count(j) == 0) && (distToLeaf[j] <= distToLeaf[i]) && ((distToLeaf[j] >= distToLeaf[T->tree[i].getLChild()]) || (distToLeaf[j] >= distToLeaf[T->tree[i].getRChild()]))) {
                                                 vector<SharedRAbundFloatVector*> subset;
@@ -416,28 +447,15 @@ int IndicatorCommand::GetIndicatorSpecies(Tree*& T){
                         /******************************************************/
                         //output indicator values to table form + label tree  //
                         /*****************************************************/
-                       vector<int> indicatorOTUs;
-                       float largestValue = 0.0;
                         for (int j = 0; j < indicatorValues.size(); j++) {
                                 
                                 if (m->control_pressed) { out.close(); return 0; }
                                 
                                 out << (i+1) << '\t' << (j+1) << '\t' << indicatorValues[j] << endl;
-                               
-                               //show no favortism
-                               if (indicatorValues[j] > largestValue) { 
-                                       largestValue = indicatorValues[j];
-                                       indicatorOTUs.clear();
-                                       indicatorOTUs.push_back(j+1);
-                               }else if (indicatorValues[j] == largestValue) { 
-                                       indicatorOTUs.push_back(j+1);
-                               }
-                               
-                               random_shuffle(indicatorOTUs.begin(), indicatorOTUs.end());
-                               
-                               T->tree[i].setLabel(indicatorOTUs[0]);
                         }
                         
+                       T->tree[i].setLabel((i+1));
+                       
                 }
                 out.close();
         
@@ -580,26 +598,33 @@ map<int, float> IndicatorCommand::getLengthToLeaf(Tree*& T){
                                 if (lc == -1) { // you are a leaf
                                         //if you are a leaf set you priliminary length to 1.0, this may adjust later
                                         T->tree[i].setBranchLength(1.0);
-                                       dists[i] = 0.0;
+                                       dists[i] = 1.0;
                                 }else{ // you are an internal node
                                         //look at your children's length to leaf
                                         float ldist = dists[lc];
                                         float rdist = dists[rc];
                                         
-                                       float greater;
-                                       if (rdist > greater) { greater = rdist; }
-                                       else { greater = ldist; }
+                                       float greater = ldist;
+                                       if (rdist > greater) { greater = rdist; dists[i] = ldist; }
+                                       else { dists[i] = rdist; }
                                         
                                         //branch length = difference + 1
                                         T->tree[lc].setBranchLength((abs(ldist-greater) + 1.0));
                                         T->tree[rc].setBranchLength((abs(rdist-greater) + 1.0));
-                                       
-                                       dists[i] = dists[lc] + (abs(ldist-greater) + 1.0);
                                 }
                                 
                         }else{
-                               if (lc == -1) { dists[i] = 0.0; }
-                               else { dists[i] = dists[lc] + T->tree[lc].getBranchLength(); }
+                               if (lc == -1) { dists[i] = T->tree[i].getBranchLength(); }
+                               else { //smaller of my two children distances plus my branch length
+                                       //look at your children's length to leaf
+                                       float ldist = dists[lc];
+                                       float rdist = dists[rc];
+                                       
+                                       float smaller = ldist;
+                                       if (rdist < smaller) { smaller = rdist;  }
+                                       
+                                       dists[i] = smaller + T->tree[i].getBranchLength(); 
+                               }
                         }
                         
                 }
@@ -634,6 +659,8 @@ set<string> IndicatorCommand::getDescendantList(Tree*& T, int i, map<int, set<st
                         for (set<int>::iterator itNum = nodes[rc].begin(); itNum != nodes[rc].end(); itNum++) {
                                 nodes[i].insert(*itNum);
                         }
+                       //you are your own descendant
+                       nodes[i].insert(i);
                 }
                 
                 return names;
@@ -650,6 +677,8 @@ int IndicatorCommand::getShared(){
                 lookup = input->getSharedRAbundVectors();
                 string lastLabel = lookup[0]->getLabel();
                 
+               if (label == "") { label = lastLabel; delete input; return 0; }
+               
                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
                 set<string> labels; labels.insert(label);
                 set<string> processedLabels;
@@ -724,6 +753,8 @@ int IndicatorCommand::getSharedFloat(){
                 lookupFloat = input->getSharedRAbundFloatVectors();
                 string lastLabel = lookupFloat[0]->getLabel();
                 
+               if (label == "") { label = lastLabel; delete input; return 0; }
+               
                 //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
                 set<string> labels; labels.insert(label);
                 set<string> processedLabels;
diff --git a/indicatorcommand.h b/indicatorcommand.h

index 0d924fd1ff0a1a17cfa51b6faa5de55dcd3bf186..1a41ffd33c1e0a175ed6560b300ba9469fa20cb8 100644 (file)
--- a/indicatorcommand.h
+++ b/indicatorcommand.h
@@ -35,7 +35,7 @@ private:
         ReadTree* read;
         TreeMap* treeMap;
         string treefile, sharedfile, relabundfile, groups, label, inputFileName, outputDir;
-       bool abort, pickedGroups;
+       bool abort;
         vector<string> outputNames, Groups;
         map<string, vector<string> > outputTypes;
         vector<SharedRAbundVector*> lookup;
diff --git a/mothur b/mothur

index 39270c06604caadaf65f3297a8bb8839064fcbe7..a9ab78c3274abb79502f022b4570f8fc522b8853 100755 (executable)

Binary files a/mothur and b/mothur differ
diff --git a/summarycommand.cpp b/summarycommand.cpp

index 1e0e8dbc552a872df3fe591d15ea378db3316959..e67659f50db215490313062d442336afbd2f25e1 100644 (file)
--- a/summarycommand.cpp
+++ b/summarycommand.cpp
@@ -550,6 +550,7 @@ string SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector<
                         files[outputNames[i]] = thisFilesLines;
                         
                         temp.close();
+                       remove(outputNames[i].c_str());
                 }
                 
                 //output label line to new file
diff --git a/tree.cpp b/tree.cpp

index 2a22cf3b952ff217c9e57c5ce98bbb01c5cbacb0..114b4de0d6e2c408479ae8c03b65d2b1300bb9b8 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -270,7 +270,7 @@ void Tree::getSubTree(Tree* copy, vector<string> Groups) {
                         //initialize leaf nodes
                         if (i <= (numLeaves-1)) {
                                 tree[i].setName(Groups[i]);
-                                       
+                               
                                 //save group info
                                 string group = globaldata->gTreemap->getGroup(Groups[i]);
                                 vector<string> tempGroups; tempGroups.push_back(group);
@@ -301,6 +301,7 @@ void Tree::getSubTree(Tree* copy, vector<string> Groups) {
                         int parent = copy->tree[i].getParent();
                         
                         if (parent != -1) {
+                               
                                 if (m->inUsersGroups(copy->tree[i].getName(), Groups)) {
                                         //find my siblings name
                                         int parentRC = copy->tree[parent].getRChild();
@@ -361,7 +362,7 @@ void Tree::getSubTree(Tree* copy, vector<string> Groups) {
                                                 //so set our parents sib to our great-grandparent
                                                 int parent = copy->tree[i].getParent();
                                                 int grandparent = copy->tree[parent].getParent();
-                                               
+                                               int parentsSibIndex;
                                                 if (grandparent != -1) {
                                                         int greatgrandparent = copy->tree[grandparent].getParent();
                                                         int greatgrandparentLC = copy->tree[greatgrandparent].getLChild();
@@ -370,8 +371,8 @@ void Tree::getSubTree(Tree* copy, vector<string> Groups) {
                                                         int grandparentLC = copy->tree[grandparent].getLChild();
                                                         int grandparentRC = copy->tree[grandparent].getRChild();
                                                         
-                                                       int parentsSibIndex = grandparentLC;
-                                                       if (grandparentRC == parent) { parentsSibIndex = grandparentLC; }
+                                                       parentsSibIndex = grandparentLC;
+                                                       if (grandparentLC == parent) { parentsSibIndex = grandparentRC; }
  
                                                         //whichever of my greatgrandparents children was my grandparent
                                                         if (greatgrandparentLC == grandparent) { greatgrandparentLC = parentsSibIndex; }
@@ -401,7 +402,6 @@ void Tree::getSubTree(Tree* copy, vector<string> Groups) {
                 int nextSpot = numLeaves;
                 populateNewTree(copy->tree, root, nextSpot);
                 
-               
         }
         catch(exception& e) {
                 m->errorOut(e, "Tree", "getCopy");
@@ -415,17 +415,15 @@ int Tree::populateNewTree(vector<Node>& oldtree, int node, int& index) {
                 if (oldtree[node].getLChild() != -1) {
                         int rc = populateNewTree(oldtree, oldtree[node].getLChild(), index);
                         int lc = populateNewTree(oldtree, oldtree[node].getRChild(), index);
-                       
+
                         tree[index].setChildren(lc, rc);
-                       index++;
+                       tree[rc].setParent(index);
+                       tree[lc].setParent(index);
                         
-                       return (index-1);
+                       return (index++);
                 }else { //you are a leaf
                         int indexInNewTree = globaldata->gTreemap->getIndex(oldtree[node].getName());
-                       
-                       tree[indexInNewTree].setParent(index);
                         return indexInNewTree;
-                       
                 }
         }
         catch(exception& e) {
@@ -895,7 +893,69 @@ try {
                 exit(1);
         }
  }
-                                                                         
+/*****************************************************************/
+void Tree::printBranch(int node, ostream& out, string mode, vector<Node>& theseNodes) {
+       try {
+               
+               // you are not a leaf
+               if (theseNodes[node].getLChild() != -1) {
+                       out << "(";
+                       printBranch(theseNodes[node].getLChild(), out, mode);
+                       out << ",";
+                       printBranch(theseNodes[node].getRChild(), out, mode);
+                       out << ")";
+                       if (mode == "branch") {
+                               //if there is a branch length then print it
+                               if (theseNodes[node].getBranchLength() != -1) {
+                                       out << ":" << theseNodes[node].getBranchLength();
+                               }
+                       }else if (mode == "boot") {
+                               //if there is a label then print it
+                               if (theseNodes[node].getLabel() != -1) {
+                                       out << theseNodes[node].getLabel();
+                               }
+                       }else if (mode == "both") {
+                               if (theseNodes[node].getLabel() != -1) {
+                                       out << theseNodes[node].getLabel();
+                               }
+                               //if there is a branch length then print it
+                               if (theseNodes[node].getBranchLength() != -1) {
+                                       out << ":" << theseNodes[node].getBranchLength();
+                               }
+                       }
+               }else { //you are a leaf
+                       string leafGroup = globaldata->gTreemap->getGroup(theseNodes[node].getName());
+                       
+                       if (mode == "branch") {
+                               out << leafGroup; 
+                               //if there is a branch length then print it
+                               if (theseNodes[node].getBranchLength() != -1) {
+                                       out << ":" << theseNodes[node].getBranchLength();
+                               }
+                       }else if (mode == "boot") {
+                               out << leafGroup; 
+                               //if there is a label then print it
+                               if (theseNodes[node].getLabel() != -1) {
+                                       out << theseNodes[node].getLabel();
+                               }
+                       }else if (mode == "both") {
+                               out << theseNodes[node].getName();
+                               if (theseNodes[node].getLabel() != -1) {
+                                       out << theseNodes[node].getLabel();
+                               }
+                               //if there is a branch length then print it
+                               if (theseNodes[node].getBranchLength() != -1) {
+                                       out << ":" << theseNodes[node].getBranchLength();
+                               }
+                       }
+               }
+               
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Tree", "printBranch");
+               exit(1);
+       }
+}
  /*****************************************************************/
  
  void Tree::printTree() {
diff --git a/tree.h b/tree.h

index 102d56546c24d427616b0388fcf80b869312b694..0b316c3042f01cbb000a3aed975a778f8af5f9fc 100644 (file)
--- a/tree.h
+++ b/tree.h
@@ -68,6 +68,7 @@ private:
                                                         //only takes names from the first tree in the tree file and assumes that all trees use the same names.
         int readTreeString(ifstream&);
         int populateNewTree(vector<Node>&, int, int&);
+       void printBranch(int, ostream&, string, vector<Node>&);
                 
         MothurOut* m;
author	westcott <westcott>
	Fri, 3 Dec 2010 19:53:27 +0000 (19:53 +0000)
committer	westcott <westcott>
	Fri, 3 Dec 2010 19:53:27 +0000 (19:53 +0000)
catchallcommand.cpp		patch \| blob \| history
catchallcommand.h		patch \| blob \| history
getoturepcommand.cpp		patch \| blob \| history
getoturepcommand.h		patch \| blob \| history
indicatorcommand.cpp		patch \| blob \| history
indicatorcommand.h		patch \| blob \| history
mothur		patch \| blob \| history
summarycommand.cpp		patch \| blob \| history
tree.cpp		patch \| blob \| history
tree.h		patch \| blob \| history