From: westcott Date: Tue, 19 Oct 2010 11:28:25 +0000 (+0000) Subject: fixed phylo.diversity and made trim.seqs with allfiles=T open one file at a time. X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=83c530c577250f2bc4c3443b33d97449abf40f36 fixed phylo.diversity and made trim.seqs with allfiles=T open one file at a time. --- diff --git a/mothur b/mothur index 741ad9b..4499d7e 100755 Binary files a/mothur and b/mothur differ diff --git a/phylodiversitycommand.cpp b/phylodiversitycommand.cpp index 6198f13..1c90eca 100644 --- a/phylodiversitycommand.cpp +++ b/phylodiversitycommand.cpp @@ -388,7 +388,7 @@ int PhyloDiversityCommand::driver(Tree* t, map< string, vector >& div, ma if (m->control_pressed) { return 0; } //calc branch length of randomLeaf k - float br = calcBranchLength(t, randomLeaf[k], countedBranch); + vector br = calcBranchLength(t, randomLeaf[k], countedBranch); //for each group in the groups update the total branch length accounting for the names file vector groups = t->tree[randomLeaf[k]].getGroup(); @@ -401,7 +401,7 @@ int PhyloDiversityCommand::driver(Tree* t, map< string, vector >& div, ma numSeqsInGroupJ = it->second; } - if (numSeqsInGroupJ != 0) { div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br; } + if (numSeqsInGroupJ != 0) { div[groups[j]][(counts[groups[j]]+1)] = div[groups[j]][counts[groups[j]]] + br[j]; } for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) { div[groups[j]][s] = div[groups[j]][s-1]; //update counts, but don't add in redundant branch lengths @@ -498,37 +498,92 @@ void PhyloDiversityCommand::printData(set& num, map< string, vector } } //********************************************************************************************************************** -float PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set >& counted){ +//need a vector of floats one branch length for every group the node represents. +vector PhyloDiversityCommand::calcBranchLength(Tree* t, int leaf, map< string, set >& counted){ try { //calc the branch length //while you aren't at root - float sum = 0.0; + vector sums; int index = leaf; vector groups = t->tree[leaf].getGroup(); - - while(t->tree[index].getParent() != -1){ - - //if you have a BL - if(t->tree[index].getBranchLength() != -1){ - if (counted[groups[0]].count(index) == 0) { //you have not already counted this branch - sum += abs(t->tree[index].getBranchLength()); - for (int j = 0; j < groups.size(); j++) { counted[groups[j]].insert(index); } - } + sums.resize(groups.size(), 0.0); + + map > tempTotals; //maps node to total Branch Length + map< string, set > tempCounted; + set::iterator it; + + //you are a leaf + if(t->tree[index].getBranchLength() != -1){ + for (int k = 0; k < groups.size(); k++) { + sums[k] += abs(t->tree[index].getBranchLength()); + counted[groups[k]].insert(index); } - index = t->tree[index].getParent(); } + + for (int k = 0; k < groups.size(); k++) { + tempTotals[groups[k]][index] = 0.0; + } + + index = t->tree[index].getParent(); - //get last breanch length added - if(t->tree[index].getBranchLength() != -1){ - if (counted[groups[0]].count(index) == 0) { //you have not already counted this branch - sum += abs(t->tree[index].getBranchLength()); - for (int j = 0; j < groups.size(); j++) { counted[groups[j]].insert(index); } + //while you aren't at root + while(t->tree[index].getParent() != -1){ + + if (m->control_pressed) { return sums; } + + int pcountSize = 0; + for (int k = 0; k < groups.size(); k++) { + map::iterator itGroup = t->tree[index].pcount.find(groups[k]); + if (itGroup != t->tree[index].pcount.end()) { pcountSize++; } + + //do both your chidren have have descendants from the users groups? + int lc = t->tree[index].getLChild(); + int rc = t->tree[index].getRChild(); + + int LpcountSize = 0; + itGroup = t->tree[lc].pcount.find(groups[k]); + if (itGroup != t->tree[lc].pcount.end()) { LpcountSize++; } + + int RpcountSize = 0; + itGroup = t->tree[rc].pcount.find(groups[k]); + if (itGroup != t->tree[rc].pcount.end()) { RpcountSize++; } + + //if yes, add your childrens tempTotals + if ((LpcountSize != 0) && (RpcountSize != 0)) { + sums[k] += tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc]; + + for (it = tempCounted[groups[k]].begin(); it != tempCounted[groups[k]].end(); it++) { counted[groups[k]].insert(*it); } + + //cout << "added to total " << tempTotals[lc] << '\t' << tempTotals[rc] << endl; + if (t->tree[index].getBranchLength() != -1) { + if (counted[groups[k]].count(index) == 0) { + tempTotals[groups[k]][index] = abs(t->tree[index].getBranchLength()); + tempCounted[groups[k]].insert(index); + }else{ + tempTotals[groups[k]][index] = 0.0; + } + }else { + tempTotals[groups[k]][index] = 0.0; + } + }else { //if no, your tempTotal is your childrens temp totals + your branch length + tempTotals[groups[k]][index] = tempTotals[groups[k]][lc] + tempTotals[groups[k]][rc]; + + if (counted[groups[k]].count(index) == 0) { + tempTotals[groups[k]][index] += abs(t->tree[index].getBranchLength()); + tempCounted[groups[k]].insert(index); + } + + } + //cout << "temptotal = "<< tempTotals[i] << endl; } + + index = t->tree[index].getParent(); } - - return sum; + + return sums; + } catch(exception& e) { m->errorOut(e, "PhyloDiversityCommand", "calcBranchLength"); diff --git a/phylodiversitycommand.h b/phylodiversitycommand.h index f6c3bbe..f5e205c 100644 --- a/phylodiversitycommand.h +++ b/phylodiversitycommand.h @@ -39,7 +39,7 @@ class PhyloDiversityCommand : public Command { void printData(set&, map< string, vector >&, ofstream&, int); void printSumData(map< string, vector >&, ofstream&, int); - float calcBranchLength(Tree*, int, map< string, set >&); + vector calcBranchLength(Tree*, int, map< string, set >&); int driver(Tree*, map< string, vector >&, map >&, int, int, vector&, set&, ofstream&, ofstream&, bool); int createProcesses(vector&, Tree*, map< string, vector >&, map >&, int, int, vector&, set&, ofstream&, ofstream&); diff --git a/pipelinepdscommand.cpp b/pipelinepdscommand.cpp index 26969d8..a747aa7 100644 --- a/pipelinepdscommand.cpp +++ b/pipelinepdscommand.cpp @@ -590,38 +590,38 @@ void PipelineCommand::createPatsPipeline(){ //sff.info command string thisCommand = "sffinfo(sff=" + sffFile + ")"; - //commands.push_back(thisCommand); + commands.push_back(thisCommand); //trim.seqs command string fastaFile = m->getRootName(m->getSimpleName(sffFile)) + "fasta"; string qualFile = m->getRootName(m->getSimpleName(sffFile)) + "qual"; - //thisCommand = "trim.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", allfiles=F, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, oligos=" + oligosFile + ", qfile=" + qualFile + ")"; - //commands.push_back(thisCommand); + thisCommand = "trim.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", allfiles=T, maxambig=0, maxhomop=8, flip=T, bdiffs=1, pdiffs=2, qwindowaverage=35, qwindowsize=50, oligos=" + oligosFile + ", qfile=" + qualFile + ")"; + commands.push_back(thisCommand); //unique.seqs string groupFile = m->getRootName(m->getSimpleName(fastaFile)) + "groups"; qualFile = m->getRootName(m->getSimpleName(fastaFile)) + "trim.qual"; fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "trim.fasta"; - //thisCommand = "unique.seqs(fasta=" + fastaFile + ")"; - //commands.push_back(thisCommand); + thisCommand = "unique.seqs(fasta=" + fastaFile + ")"; + commands.push_back(thisCommand); //align.seqs string nameFile = m->getRootName(m->getSimpleName(fastaFile)) + "names"; fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "unique" + m->getExtension(fastaFile); - //thisCommand = "align.seqs(processors=" + toString(processors) + ", candidate=" + fastaFile + ", template=" + alignFile + ")"; - //commands.push_back(thisCommand); + thisCommand = "align.seqs(processors=" + toString(processors) + ", candidate=" + fastaFile + ", template=" + alignFile + ")"; + commands.push_back(thisCommand); //screen.seqs fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "align"; - //thisCommand = "screen.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", name=" + nameFile + ", group=" + groupFile + ", optimize=end-minlength)"; - // commands.push_back(thisCommand); + thisCommand = "screen.seqs(processors=" + toString(processors) + ", fasta=" + fastaFile + ", name=" + nameFile + ", group=" + groupFile + ", optimize=end-minlength)"; + commands.push_back(thisCommand); //chimera.slayer fastaFile = m->getRootName(m->getSimpleName(fastaFile)) + "good" + m->getExtension(fastaFile); nameFile = m->getRootName(m->getSimpleName(nameFile)) + "good" + m->getExtension(nameFile); groupFile = m->getRootName(m->getSimpleName(groupFile)) + "good" + m->getExtension(groupFile); - //thisCommand = "chimera.slayer(processors=" + toString(processors) + ", fasta=" + fastaFile + ", template=" + chimeraFile + ")"; - // commands.push_back(thisCommand); + thisCommand = "chimera.slayer(processors=" + toString(processors) + ", fasta=" + fastaFile + ", template=" + chimeraFile + ")"; + commands.push_back(thisCommand); //remove.seqs string accnosFile = m->getRootName(m->getSimpleName(fastaFile)) + "slayer.accnos"; diff --git a/trimseqscommand.cpp b/trimseqscommand.cpp index 1f8dc57..56fd8c5 100644 --- a/trimseqscommand.cpp +++ b/trimseqscommand.cpp @@ -481,22 +481,40 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string } ofstream outGroups; - vector fastaFileNames; - vector qualFileNames; + //vector fastaFileNames; + //vector qualFileNames; if (oligoFile != "") { m->openOutputFile(groupFile, outGroups); for (int i = 0; i < fastaNames.size(); i++) { #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); + fastaNames[i] = (fastaNames[i] + toString(getpid()) + ".temp"); + //fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); + //clear old file if it exists + ofstream temp; + m->openOutputFile(fastaNames[i], temp); + temp.close(); if(qFileName != ""){ - qualFileNames.push_back(new ofstream((qualNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); + qualNames[i] = (qualNames[i] + toString(getpid()) + ".temp"); + //qualFileNames.push_back(new ofstream((qualNames[i] + toString(getpid()) + ".temp").c_str(), ios::ate)); + //clear old file if it exists + ofstream temp2; + m->openOutputFile(qualNames[i], temp2); + temp2.close(); } #else - fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(i) + ".temp").c_str(), ios::ate)); + //fastaFileNames.push_back(new ofstream((fastaNames[i] + toString(i) + ".temp").c_str(), ios::ate)); + fastaNames[i] = (fastaNames[i] + toString(i) + ".temp"); + ofstream temp; + m->openOutputFile(fastaNames[i], temp); + temp.close(); if(qFileName != ""){ - qualFileNames.push_back(new ofstream((qualNames[i] + toString(i) + ".temp").c_str(), ios::ate)); + //qualFileNames.push_back(new ofstream((qualNames[i] + toString(i) + ".temp").c_str(), ios::ate)); + qualNames[i] = (qualNames[i] + toString(i) + ".temp"); + ofstream temp2; + m->openOutputFile(qualNames[i], temp2); + temp2.close(); } #endif } @@ -518,11 +536,11 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string inFASTA.close(); outFASTA.close(); scrapFASTA.close(); if (oligoFile != "") { outGroups.close(); } - for(int i=0;iclose(); delete fastaFileNames[i]; } + //for(int i=0;iclose(); delete fastaFileNames[i]; } if(qFileName != ""){ qFile.close(); - for(int i=0;iclose(); delete qualFileNames[i]; } + //for(int i=0;iclose(); delete qualFileNames[i]; } } for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } @@ -609,10 +627,18 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string } outGroups << currSeq.getName() << '\t' << thisGroup << endl; if(allFiles){ - currSeq.printSequence(*fastaFileNames[indexToFastaFile]); + ofstream outTemp; + m->openOutputFileAppend(fastaNames[indexToFastaFile], outTemp); + //currSeq.printSequence(*fastaFileNames[indexToFastaFile]); + currSeq.printSequence(outTemp); + outTemp.close(); if(qFileName != ""){ - currQual.printQScores(*qualFileNames[indexToFastaFile]); + //currQual.printQScores(*qualFileNames[indexToFastaFile]); + ofstream outTemp2; + m->openOutputFileAppend(qualNames[indexToFastaFile], outTemp2); + currQual.printQScores(outTemp2); + outTemp2.close(); } } } @@ -648,17 +674,17 @@ int TrimSeqsCommand::driverCreateTrim(string filename, string qFileName, string if (oligoFile != "") { outGroups.close(); } if(qFileName != "") { qFile.close(); scrapQual.close(); outQual.close(); } - for(int i=0;iclose(); - delete fastaFileNames[i]; - } - - if(qFileName != ""){ - for(int i=0;iclose(); - delete qualFileNames[i]; - } - } + //for(int i=0;iclose(); + // delete fastaFileNames[i]; + //} + + //if(qFileName != ""){ + //for(int i=0;iclose(); + //delete qualFileNames[i]; + //} + //} return count; }