]> git.donarmstrong.com Git - mothur.git/commitdiff
sped up unifrac weighted.
authorwestcott <westcott>
Mon, 30 Aug 2010 13:49:18 +0000 (13:49 +0000)
committerwestcott <westcott>
Mon, 30 Aug 2010 13:49:18 +0000 (13:49 +0000)
readtree.cpp
tree.cpp
tree.h
trimseqscommand.cpp
unifracunweightedcommand.cpp
unifracweightedcommand.cpp
weighted.cpp
weighted.h

index f72d69821f56d8fa8fb90d098f71ca855bb08126..e98524b45e9ec513fe171c5a342ab381a68fcacc 100644 (file)
@@ -341,7 +341,7 @@ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T) {
                        if(f.peek() == ':'){                                                                          
                                readSpecialChar(f,':',"colon"); 
                                                                                
-                               if(n >= numNodes){      m->mothurOut("Error: Too many nodes in input tree\n");  readOk = -1; return -1; }
+                               if(n >= numNodes){ m->mothurOut("Error: Too many nodes in input tree\n");  readOk = -1; return -1; }
                                
                                T->tree[n].setBranchLength(readBranchLength(f));
                        }else{
index 3cb26113f8deb8acf3bf28f7040becf2bac7385f..f8bb76fd664383f148c73eca62aa5cc2291202c8 100644 (file)
--- a/tree.cpp
+++ b/tree.cpp
@@ -34,17 +34,28 @@ Tree::Tree() {
                numNodes = 2*numLeaves - 1;
                
                tree.resize(numNodes);
+               
+               //initialize groupNodeInfo
+               for (int i = 0; i < globaldata->gTreemap->namesOfGroups.size(); i++) {
+                       groupNodeInfo[globaldata->gTreemap->namesOfGroups[i]].resize(0);
+               }
 
                //initialize tree with correct number of nodes, name and group info.
                for (int i = 0; i < numNodes; i++) {
                        //initialize leaf nodes
                        if (i <= (numLeaves-1)) {
                                tree[i].setName(globaldata->Treenames[i]);
-                               vector<string> tempGroups; tempGroups.push_back(globaldata->gTreemap->getGroup(globaldata->Treenames[i]));
+                               
+                               //save group info
+                               string group = globaldata->gTreemap->getGroup(globaldata->Treenames[i]);
+                               vector<string> tempGroups; tempGroups.push_back(group);
                                tree[i].setGroup(tempGroups);
+                               groupNodeInfo[group].push_back(i); 
+                               
                                //set pcount and pGroup for groupname to 1.
-                               tree[i].pcount[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
-                               tree[i].pGroups[globaldata->gTreemap->getGroup(globaldata->Treenames[i])] = 1;
+                               tree[i].pcount[group] = 1;
+                               tree[i].pGroups[group] = 1;
+                               
                                //Treemap knows name, group and index to speed up search
                                globaldata->gTreemap->setIndex(globaldata->Treenames[i], i);
        
@@ -97,10 +108,14 @@ void Tree::addNamesToCounts() {
                                
                                map<string, int>::iterator itCounts;
                                int maxPars = 1;
+                               set<string> groupsAddedForThisNode;
                                for (int j = 0; j < dupNames.size(); j++) {
-                               
+                                       
+                                       string group = globaldata->gTreemap->getGroup(dupNames[j]);
+                                       
                                        if (dupNames[j] != name) {//you already added yourself in the constructor
-                                               string group = globaldata->gTreemap->getGroup(dupNames[j]);
+                               
+                                               if (groupsAddedForThisNode.count(group) == 0)  {  groupNodeInfo[group].push_back(i);  groupsAddedForThisNode.insert(group);  } //if you have not already added this node for this group, then add it
                                                
                                                //update pcounts
                                                itCounts = tree[i].pcount.find(group);
@@ -122,7 +137,7 @@ void Tree::addNamesToCounts() {
                                                if(tree[i].pGroups[group] > maxPars){
                                                        maxPars = tree[i].pGroups[group];
                                                }
-                                       }//end if
+                                       }else {  groupsAddedForThisNode.insert(group);  } //add it so you don't add it to groupNodeInfo again
                                }//end for
                                
                                if (maxPars > 1) { //then we have some more dominant groups
@@ -208,6 +223,27 @@ int Tree::assembleTree() {
                exit(1);
        }
 }
+/*****************************************************************/
+int Tree::assembleTree(string n) {
+       try {
+               
+               //build the pGroups in non leaf nodes to be used in the parsimony calcs.
+               for (int i = numLeaves; i < numNodes; i++) {
+                       if (m->control_pressed) { return 1; }
+
+                       tree[i].pGroups = (mergeGroups(i));
+                       tree[i].pcount = (mergeGcounts(i));
+               }
+               //float B = clock();
+               //cout << "assembleTree\t" << (B-A) / CLOCKS_PER_SEC << endl;
+               return 0;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Tree", "assembleTree");
+               exit(1);
+       }
+}
+
 /*****************************************************************/
 void Tree::getCopy(Tree* copy) {
        try {
@@ -240,6 +276,8 @@ void Tree::getCopy(Tree* copy) {
                        tree[i].pcount = copy->tree[i].pcount;
                }
                
+               groupNodeInfo = copy->groupNodeInfo;
+               
        }
        catch(exception& e) {
                m->errorOut(e, "Tree", "getCopy");
@@ -392,6 +430,11 @@ map<string,int> Tree::mergeGcounts(int position) {
 
 void Tree::randomLabels(vector<string> g) {
        try {
+       
+               //initialize groupNodeInfo
+               for (int i = 0; i < globaldata->gTreemap->namesOfGroups.size(); i++) {
+                       groupNodeInfo[globaldata->gTreemap->namesOfGroups[i]].resize(0);
+               }
                
                for(int i = 0; i < numLeaves; i++){
                        int z;
@@ -423,6 +466,9 @@ void Tree::randomLabels(vector<string> g) {
                                tree[z].pcount = (tree[i].pcount);
                                tree[i].pcount = (gcount_hold);
                        }
+                       
+                       for (int k = 0; k < (tree[i].getGroup()).size(); k++) {  groupNodeInfo[(tree[i].getGroup())[k]].push_back(i); }
+                       for (int k = 0; k < (tree[z].getGroup()).size(); k++) {  groupNodeInfo[(tree[z].getGroup())[k]].push_back(z); }
                }
        }
        catch(exception& e) {
@@ -479,14 +525,14 @@ void Tree::randomBlengths()  {
 /*************************************************************************************************/
 void Tree::assembleRandomUnifracTree(vector<string> g) {
        randomLabels(g);
-       assembleTree();
+       assembleTree("noNameCounts");
 }
 /*************************************************************************************************/
 void Tree::assembleRandomUnifracTree(string groupA, string groupB) {
 
        vector<string> temp; temp.push_back(groupA); temp.push_back(groupB);
        randomLabels(temp);
-       assembleTree();
+       assembleTree("noNameCounts");
 }
 
 /*************************************************************************************************/
diff --git a/tree.h b/tree.h
index 59343d5fa828a2bcd15d4402764c070c51111e01..1c44d8c919805c24072e0f6ab314789393ed63ce 100644 (file)
--- a/tree.h
+++ b/tree.h
@@ -39,9 +39,10 @@ public:
        
        //this function takes the leaf info and populates the non leaf nodes
        int assembleTree();     
+       int assembleTree(string);       
        
        vector<Node> tree;              //the first n nodes are the leaves, where n is the number of sequences.
-       
+       map< string, vector<int> > groupNodeInfo;       //maps group to indexes of leaf nodes with that group, different groups may contain same node because of names file.
                        
 private:
        GlobalData* globaldata;
index 8a767f696415737284491fa691a708dd1c011573..3c1367a17eadd176edf05078c46b4e4a398d4640 100644 (file)
@@ -429,7 +429,7 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                vector<ofstream*> fastaFileNames;
                vector<ofstream*> qualFileNames;
                
-               
+       cout << "here" << endl; 
                if (oligoFile != "") {          
                        m->openOutputFile(groupFile, outGroups);   
                        for (int i = 0; i < fastaNames.size(); i++) {
@@ -446,17 +446,17 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                        #endif
                        }
                }
-               
+cout << "here " << filename << endl;           
                ifstream inFASTA;
                m->openInputFile(filename, inFASTA);
                inFASTA.seekg(line->start);
-               
+       cout << "here " << qFileName << endl;   
                ifstream qFile;
                if(qFileName != "")     {       m->openInputFile(qFileName, qFile);     qFile.seekg(qline->start);  }
                
                bool done = false;
                int count = 0;
-       
+       cout << "here" << endl;
                while (!done) {
                                
                        if (m->control_pressed) { 
index 848c85c9d20f14f2ede20987026666ab2cd62181..5fd26df8ccdd6e5510856ca26bd7574caa11ed43 100644 (file)
@@ -125,6 +125,8 @@ int UnifracUnweightedCommand::execute() {
                
                if (abort == true) { return 0; }
                
+               int start = time(NULL);
+               
                userData.resize(numComp,0);  //data[0] = unweightedscore 
                randomData.resize(numComp,0); //data[0] = unweightedscore
                //create new tree with same num nodes and leaves as users
@@ -239,6 +241,8 @@ int UnifracUnweightedCommand::execute() {
                
                if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) {        remove(outputNames[i].c_str());  }      return 0; }
                
+               m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.unweighted."); m->mothurOutEndLine();
+               
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
index 28d94d0adf502d21860099ba2928081169715e01..9cda8f2ec14c4accb670c174602fb5c9ab2d5502 100644 (file)
@@ -116,6 +116,8 @@ int UnifracWeightedCommand::execute() {
        
                if (abort == true) { return 0; }
                
+               int start = time(NULL);
+               
                Progress* reading;
                if (random) {   reading = new Progress("Comparing to random:", iters);  }
                
@@ -264,6 +266,8 @@ int UnifracWeightedCommand::execute() {
                        return 0; 
                }
                
+               m->mothurOut("It took " + toString(time(NULL) - start) + " secs to run unifrac.weighted."); m->mothurOutEndLine();
+               
                m->mothurOutEndLine();
                m->mothurOut("Output File Names: "); m->mothurOutEndLine();
                for (int i = 0; i < outputNames.size(); i++) {  m->mothurOut(outputNames[i]); m->mothurOutEndLine();    }
index faa1ff76697e88a4f7e0bb06c1e2e44df7c53a2d..c8b64e20446d363af2a6413964a25a1ad7b88dd4 100644 (file)
@@ -18,6 +18,10 @@ EstOutput Weighted::getValues(Tree* t) {
                vector<double> D;
                
                numGroups = globaldata->Groups.size();
+       
+               vector<double> sums = getBranchLengthSums(t);
+               
+               if (m->control_pressed) { return data; }
                
                //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3;
                int count = 0;
@@ -29,31 +33,33 @@ EstOutput Weighted::getValues(Tree* t) {
                                vector<string> groups; groups.push_back(globaldata->Groups[i]); groups.push_back(globaldata->Groups[l]);
                                
                                D.push_back(0.0000); //initialize a spot in D for each combination
+                       
+                               //adding the wieghted sums from group i
+                               for (int j = 0; j < t->groupNodeInfo[groups[0]].size(); j++) { //the leaf nodes that have seqs from group i
+                                       map<string, int>::iterator it = t->tree[t->groupNodeInfo[groups[0]][j]].pcount.find(groups[0]);
+                                       int numSeqsInGroupI = it->second;
+                                       
+                                       double weightedSum = ((numSeqsInGroupI * sums[t->groupNodeInfo[groups[0]][j]]) / (double)tmap->seqsPerGroup[groups[0]]);
                                
-                               /********************************************************/
+                                       D[count] += weightedSum;
+                               }
+                               
+                               //adding the wieghted sums from group l
+                               for (int j = 0; j < t->groupNodeInfo[groups[1]].size(); j++) { //the leaf nodes that have seqs from group l
+                                       map<string, int>::iterator it = t->tree[t->groupNodeInfo[groups[1]][j]].pcount.find(groups[1]);
+                                       int numSeqsInGroupL = it->second;
+                                       
+                                       double weightedSum = ((numSeqsInGroupL * sums[t->groupNodeInfo[groups[1]][j]]) / (double)tmap->seqsPerGroup[groups[1]]);
+                               
+                                       D[count] += weightedSum;
+                               }
+
+                               /********************************************************
                                //calculate a D value for each group combo
                                for(int v=0;v<t->getNumLeaves();v++){
                                
                                        if (m->control_pressed) { return data; }
                                        
-                                       int index = v;
-                                       double sum = 0.0000;
-               
-                                       //while you aren't at root
-                                       while(t->tree[index].getParent() != -1){
-                                                       
-                                               //if you have a BL
-                                               if(t->tree[index].getBranchLength() != -1){
-                                                       sum += abs(t->tree[index].getBranchLength());
-                                               }
-                                               index = t->tree[index].getParent();
-                                       }
-                                               
-                                       //get last breanch length added
-                                       if(t->tree[index].getBranchLength() != -1){
-                                               sum += abs(t->tree[index].getBranchLength());
-                                       }
-                                               
                                        //is this sum from a sequence which is in one of the users groups
                                        if (m->inUsersGroups(t->tree[v].getGroup(), globaldata->Groups) == true) {
                                                //is this sum from a sequence which is in this groupCombo
@@ -145,13 +151,38 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) {
                
                data.clear(); //clear out old values
                
+               vector<double> sums = getBranchLengthSums(t);
+               
+               if (m->control_pressed) { return data; }
+               
                //initialize weighted score
                WScore[(groupA+groupB)] = 0.0;
-               float D = 0.0;
+               double D = 0.0;
                
                vector<string> groups; groups.push_back(groupA); groups.push_back(groupB);
-                                               
-               /********************************************************/
+               
+               //adding the wieghted sums from groupA
+               for (int j = 0; j < t->groupNodeInfo[groupA].size(); j++) { //the leaf nodes that have seqs from group i
+                       map<string, int>::iterator it = t->tree[j].pcount.find(groupA);
+                       int numSeqsInGroupA = it->second;
+                       
+                       double weightedSum = ((numSeqsInGroupA * sums[j]) / (double)tmap->seqsPerGroup[groupA]);
+               
+                       D += weightedSum;
+               }
+               
+               //adding the wieghted sums from groupB
+               for (int j = 0; j < t->groupNodeInfo[groupB].size(); j++) { //the leaf nodes that have seqs from group l
+                       map<string, int>::iterator it = t->tree[j].pcount.find(groupB);
+                       int numSeqsInGroupB = it->second;
+                       
+                       double weightedSum = ((numSeqsInGroupB * sums[j]) / (double)tmap->seqsPerGroup[groupB]);
+               
+                       D += weightedSum;
+               }
+                               
+                                                                                                               
+               /********************************************************
                //calculate a D value for the group combo
                for(int v=0;v<t->getNumLeaves();v++){
                        if (m->control_pressed) { return data; }
@@ -240,12 +271,45 @@ EstOutput Weighted::getValues(Tree* t, string groupA, string groupB) {
                exit(1);
        }
 }
-
-
-
-
-
-
+/**************************************************************************************************/
+vector<double> Weighted::getBranchLengthSums(Tree* t) { 
+       try {
+               
+               vector<double> sums;
+               
+               for(int v=0;v<t->getNumLeaves();v++){
+                       
+                       if (m->control_pressed) { return sums; }
+                               
+                       int index = v;
+                       double sum = 0.0000;
+       
+                       //while you aren't at root
+                       while(t->tree[index].getParent() != -1){
+                                               
+                               //if you have a BL
+                               if(t->tree[index].getBranchLength() != -1){
+                                       sum += abs(t->tree[index].getBranchLength());
+                               }
+                               index = t->tree[index].getParent();
+                       }
+                                       
+                       //get last breanch length added
+                       if(t->tree[index].getBranchLength() != -1){
+                               sum += abs(t->tree[index].getBranchLength());
+                       }
+                       
+                       sums.push_back(sum);
+               }
+               
+               return sums;
+       }
+       catch(exception& e) {
+               m->errorOut(e, "Weighted", "getBranchLengthSums");
+               exit(1);
+       }
+}
+/**************************************************************************************************/
 
 
 
index 3914ef229112d8cf4bd5b55033de0f459bbef25a..fedfa0ce8d372420baba40964010fd46004fb716 100644 (file)
@@ -30,6 +30,8 @@ class Weighted : public TreeCalculator  {
                TreeMap* tmap;
                map<string, int>::iterator it;
                map<string, double> WScore; //a score for each group combination i.e. AB, AC, BC.
+               
+               vector<double> getBranchLengthSums(Tree*);
 };
 
 /***********************************************************************/