]> git.donarmstrong.com Git - mothur.git/commitdiff
fixed phylo.diversity and made trim.seqs with allfiles=T open one file at a time.
authorwestcott <westcott>
Tue, 19 Oct 2010 11:28:25 +0000 (11:28 +0000)
committerwestcott <westcott>
Tue, 19 Oct 2010 11:28:25 +0000 (11:28 +0000)
mothur
phylodiversitycommand.cpp
phylodiversitycommand.h
pipelinepdscommand.cpp
trimseqscommand.cpp

diff --git a/mothur b/mothur
index 741ad9ba0bad11c3d0ffe94313830d9218eab6f4..4499d7e5174e5842d39e0eab63fea42495397922 100755 (executable)
Binary files a/mothur and b/mothur differ
index 6198f139ccb117b157dea30fb31a5ef2330e1aae..1c90eca1180acc6287b0530e9c9d3e84fbea120d 100644 (file)
@@ -388,7 +388,7 @@ int PhyloDiversityCommand::driver(Tree* t, map< string, vector<float> >& div, ma
                                        if (m->control_pressed) { return 0; }
                                        
                                        //calc branch length of randomLeaf k
-                                       float br = calcBranchLength(t, randomLeaf[k], countedBranch);
+                                       vector<float> br = calcBranchLength(t, randomLeaf[k], countedBranch);
                        
                                        //for each group in the groups update the total branch length accounting for the names file
                                        vector<string> groups = t->tree[randomLeaf[k]].getGroup();
@@ -401,7 +401,7 @@ int PhyloDiversityCommand::driver(Tree* t, map< string, vector<float> >& div, ma
                                                        numSeqsInGroupJ = it->second;
                                                }
                                                
-                                               if (numSeqsInGroupJ != 0) {     div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br;  }
+                                               if (numSeqsInGroupJ != 0) {     div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j];  }
                                                
                                                for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
                                                        div[groups[j]][s] = div[groups[j]][s-1];  //update counts, but don't add in redundant branch lengths
@@ -498,37 +498,92 @@ void PhyloDiversityCommand::printData(set<int>& num, map< string, vector<float>
        }
 }
 //**********************************************************************************************************************
-float PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set<int> >& counted){
+//need a vector of floats one branch length for every group the node represents.
+vector<float> PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set<int> >& counted){
        try {
 
                //calc the branch length
                //while you aren't at root
-               float sum = 0.0;
+               vector<float> sums; 
                int index = leaf;
                
                vector<string> groups = t->tree[leaf].getGroup();
-                                       
-               while(t->tree[index].getParent() != -1){
-                       
-                       //if you have a BL
-                       if(t->tree[index].getBranchLength() != -1){
-                               if (counted[groups[0]].count(index) == 0) { //you have not already counted this branch
-                                       sum += abs(t->tree[index].getBranchLength());
-                                       for (int j = 0; j < groups.size(); j++) {  counted[groups[j]].insert(index);  }
-                               }
+               sums.resize(groups.size(), 0.0);
+               
+               map<string, map<int, double> > tempTotals; //maps node to total Branch Length
+               map< string, set<int> > tempCounted;
+               set<int>::iterator it;
+       
+               //you are a leaf
+               if(t->tree[index].getBranchLength() != -1){     
+                       for (int k = 0; k < groups.size(); k++) { 
+                               sums[k] += abs(t->tree[index].getBranchLength());       
+                               counted[groups[k]].insert(index);
                        }
-                       index = t->tree[index].getParent();
                }
+               
+               for (int k = 0; k < groups.size(); k++) { 
+                       tempTotals[groups[k]][index] = 0.0;     
+               }
+               
+               index = t->tree[index].getParent();     
                        
-               //get last breanch length added
-               if(t->tree[index].getBranchLength() != -1){
-                       if (counted[groups[0]].count(index) == 0) { //you have not already counted this branch
-                               sum += abs(t->tree[index].getBranchLength());
-                               for (int j = 0; j < groups.size(); j++) {  counted[groups[j]].insert(index);  }
+               //while you aren't at root
+               while(t->tree[index].getParent() != -1){
+
+                       if (m->control_pressed) {  return sums; }
+                       
+                       int pcountSize = 0;     
+                       for (int k = 0; k < groups.size(); k++) {
+                               map<string, int>::iterator itGroup = t->tree[index].pcount.find(groups[k]);
+                               if (itGroup != t->tree[index].pcount.end()) { pcountSize++;  } 
+                       
+                               //do both your chidren have have descendants from the users groups? 
+                               int lc = t->tree[index].getLChild();
+                               int rc = t->tree[index].getRChild();
+                       
+                               int LpcountSize = 0;
+                               itGroup = t->tree[lc].pcount.find(groups[k]);
+                               if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++;  } 
+                                                       
+                               int RpcountSize = 0;
+                               itGroup = t->tree[rc].pcount.find(groups[k]);
+                               if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++;  } 
+                                                               
+                               //if yes, add your childrens tempTotals
+                               if ((LpcountSize != 0) && (RpcountSize != 0)) {
+                                       sums[k] += tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc]; 
+                                       
+                                       for (it = tempCounted[groups[k]].begin(); it != tempCounted[groups[k]].end(); it++) { counted[groups[k]].insert(*it); }
+
+                                       //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl;
+                                       if (t->tree[index].getBranchLength() != -1) {
+                                               if (counted[groups[k]].count(index) == 0) {
+                                                       tempTotals[groups[k]][index] = abs(t->tree[index].getBranchLength());
+                                                       tempCounted[groups[k]].insert(index);
+                                               }else{
+                                                       tempTotals[groups[k]][index] = 0.0;
+                                               }
+                                       }else {
+                                               tempTotals[groups[k]][index] = 0.0;
+                                       }
+                               }else { //if no, your tempTotal is your childrens temp totals + your branch length
+                                       tempTotals[groups[k]][index] = tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc]; 
+                                                                       
+                                       if (counted[groups[k]].count(index) == 0) {
+                                               tempTotals[groups[k]][index] += abs(t->tree[index].getBranchLength());
+                                               tempCounted[groups[k]].insert(index);
+                                       }
+
+                               }
+                               //cout << "temptotal = "<< tempTotals[i] << endl;
                        }
+                       
+                       index = t->tree[index].getParent();     
                }
-               
-               return sum;
+
+               return sums;
+
        }
        catch(exception& e) {
                m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength");
index f6c3bbe1d06495717310a464d423c884fb1ac717..f5e205c4e40ce010239c0cc8beba6798e1a17aa4 100644 (file)
@@ -39,7 +39,7 @@ class PhyloDiversityCommand : public Command {
                
                void printData(set<int>&, map< string, vector<float> >&, ofstream&, int);
                void printSumData(map< string, vector<float> >&, ofstream&, int);
-               float calcBranchLength(Tree*, int, map< string, set<int> >&);
+               vector<float> calcBranchLength(Tree*, int, map< string, set<int> >&);
                int driver(Tree*, map< string, vector<float> >&, map<string, vector<float> >&, int, int, vector<int>&, set<int>&, ofstream&, ofstream&, bool);
                int createProcesses(vector<int>&, Tree*, map< string, vector<float> >&, map<string, vector<float> >&, int, int, vector<int>&, set<int>&, ofstream&, ofstream&);
 
index 26969d81c7b6d95a0f7c603489be1da984ea7d99..a747aa752389ccb3dd0339c0b33df5d1c8bd37ef 100644 (file)
@@ -590,38 +590,38 @@ void PipelineCommand::createPatsPipeline(){
                
                //sff.info command
                string thisCommand = "sffinfo(sff=" + sffFile + ")";
-               //commands.push_back(thisCommand);
+               commands.push_back(thisCommand);
                
                //trim.seqs command
                string fastaFile = m->getRootName(m->getSimpleName(sffFile)) + "fasta";
                string qualFile = m->getRootName(m->getSimpleName(sffFile)) + "qual";
-               //thisCommand = "trim.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", allfiles=F, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, oligos=" + oligosFile + ", qfile=" + qualFile + ")";
-               //commands.push_back(thisCommand);
+               thisCommand = "trim.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, oligos=" + oligosFile + ", qfile=" + qualFile + ")";
+               commands.push_back(thisCommand);
                
                //unique.seqs
                string groupFile = m->getRootName(m->getSimpleName(fastaFile)) + "groups";
                qualFile =  m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual";
                fastaFile =  m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta";
-               //thisCommand = "unique.seqs(fasta=" + fastaFile + ")"; 
-               //commands.push_back(thisCommand);
+               thisCommand = "unique.seqs(fasta=" + fastaFile + ")"; 
+               commands.push_back(thisCommand);
                
                //align.seqs
                string nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "names";
                fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "unique" + m->getExtension(fastaFile);
-               //thisCommand = "align.seqs(processors=" + toString(processors) + ", candidate=" + fastaFile + ", template=" + alignFile + ")";
-               //commands.push_back(thisCommand);
+               thisCommand = "align.seqs(processors=" + toString(processors) + ", candidate=" + fastaFile + ", template=" + alignFile + ")";
+               commands.push_back(thisCommand);
                
                //screen.seqs
                fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "align";
-               //thisCommand = "screen.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", name=" + nameFile + ", group=" + groupFile + ", optimize=end-minlength)";
-       //      commands.push_back(thisCommand);
+               thisCommand = "screen.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", name=" + nameFile + ", group=" + groupFile + ", optimize=end-minlength)";
+               commands.push_back(thisCommand);
                
                //chimera.slayer
                fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "good" + m->getExtension(fastaFile);
                nameFile = m->getRootName(m->getSimpleName(nameFile)) + "good" + m->getExtension(nameFile);
                groupFile = m->getRootName(m->getSimpleName(groupFile)) + "good" + m->getExtension(groupFile);
-               //thisCommand = "chimera.slayer(processors=" + toString(processors) + ", fasta=" + fastaFile + ", template=" + chimeraFile + ")";
-       //      commands.push_back(thisCommand);
+               thisCommand = "chimera.slayer(processors=" + toString(processors) + ", fasta=" + fastaFile + ", template=" + chimeraFile + ")";
+               commands.push_back(thisCommand);
                
                //remove.seqs
                string accnosFile = m->getRootName(m->getSimpleName(fastaFile))  + "slayer.accnos";
index 1f8dc579a986350d51d88f3be2e84cd9617f90d7..56fd8c54b53aaff9f4e43e8e694fc71f6987fd7c 100644 (file)
@@ -481,22 +481,40 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                }
                
                ofstream outGroups;
-               vector<ofstream*> fastaFileNames;
-               vector<ofstream*> qualFileNames;
+               //vector<ofstream*> fastaFileNames;
+               //vector<ofstream*> qualFileNames;
                
                if (oligoFile != "") {          
                        m->openOutputFile(groupFile, outGroups);   
                        for (int i = 0; i < fastaNames.size(); i++) {
 
                        #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
-                               fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); 
+                               fastaNames[i] = (fastaNames[i] + toString(getpid()) + ".temp");
+                               //fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); 
+                               //clear old file if it exists
+                               ofstream temp;
+                               m->openOutputFile(fastaNames[i], temp);
+                               temp.close();
                                if(qFileName != ""){
-                                       qualFileNames.push_back(new ofstream((qualNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); 
+                                       qualNames[i] = (qualNames[i] + toString(getpid()) + ".temp");
+                                       //qualFileNames.push_back(new ofstream((qualNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); 
+                                       //clear old file if it exists
+                                       ofstream temp2;
+                                       m->openOutputFile(qualNames[i], temp2);
+                                       temp2.close();
                                }
                        #else
-                               fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(i) + ".temp").c_str(), ios::ate));                      
+                               //fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(i) + ".temp").c_str(), ios::ate)); 
+                               fastaNames[i] = (fastaNames[i] + toString(i) + ".temp");
+                               ofstream temp;
+                               m->openOutputFile(fastaNames[i], temp);
+                               temp.close();                   
                                if(qFileName != ""){
-                                       qualFileNames.push_back(new ofstream((qualNames[i] + toString(i) + ".temp").c_str(), ios::ate));                        
+                                       //qualFileNames.push_back(new ofstream((qualNames[i] + toString(i) + ".temp").c_str(), ios::ate));      
+                                       qualNames[i] = (qualNames[i] + toString(i) + ".temp");
+                                       ofstream temp2;
+                                       m->openOutputFile(qualNames[i], temp2);
+                                       temp2.close();          
                                }
                        #endif
                        }
@@ -518,11 +536,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                inFASTA.close(); outFASTA.close(); scrapFASTA.close();
                                if (oligoFile != "") {   outGroups.close();   }
                                
-                               for(int i=0;i<fastaFileNames.size();i++){  fastaFileNames[i]->close(); delete fastaFileNames[i];  }     
+                               //for(int i=0;i<fastaFileNames.size();i++){  fastaFileNames[i]->close(); delete fastaFileNames[i];  }   
 
                                if(qFileName != ""){
                                        qFile.close();
-                                       for(int i=0;i<qualFileNames.size();i++){  qualFileNames[i]->close(); delete qualFileNames[i];  }        
+                                       //for(int i=0;i<qualFileNames.size();i++){  qualFileNames[i]->close(); delete qualFileNames[i];  }      
                                }
                                for (int i = 0; i < outputNames.size(); i++) {  remove(outputNames[i].c_str()); }
 
@@ -609,10 +627,18 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                                                }
                                                outGroups << currSeq.getName() << '\t' << thisGroup << endl;
                                                if(allFiles){
-                                                       currSeq.printSequence(*fastaFileNames[indexToFastaFile]);
+                                                       ofstream outTemp;
+                                                       m->openOutputFileAppend(fastaNames[indexToFastaFile], outTemp);
+                                                       //currSeq.printSequence(*fastaFileNames[indexToFastaFile]);
+                                                       currSeq.printSequence(outTemp);
+                                                       outTemp.close();
                                                        
                                                        if(qFileName != ""){
-                                                               currQual.printQScores(*qualFileNames[indexToFastaFile]);                                                        
+                                                               //currQual.printQScores(*qualFileNames[indexToFastaFile]);
+                                                               ofstream outTemp2;
+                                                               m->openOutputFileAppend(qualNames[indexToFastaFile], outTemp2);
+                                                               currQual.printQScores(outTemp2);
+                                                               outTemp2.close();                                                       
                                                        }
                                                }
                                        }
@@ -648,17 +674,17 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string
                if (oligoFile != "") {   outGroups.close();   }
                if(qFileName != "")     {       qFile.close();  scrapQual.close(); outQual.close();     }
                
-               for(int i=0;i<fastaFileNames.size();i++){
-                       fastaFileNames[i]->close();
-                       delete fastaFileNames[i];
-               }               
-               
-               if(qFileName != ""){
-                       for(int i=0;i<qualFileNames.size();i++){
-                               qualFileNames[i]->close();
-                               delete qualFileNames[i];
-                       }               
-               }                       
+               //for(int i=0;i<fastaFileNames.size();i++){
+               //      fastaFileNames[i]->close();
+               //      delete fastaFileNames[i];
+               //}             
+               
+               //if(qFileName != ""){
+                       //for(int i=0;i<qualFileNames.size();i++){
+                               //qualFileNames[i]->close();
+                               //delete qualFileNames[i];
+                       //}             
+               //}                     
                
                return count;
        }