From 202846c98b9eff0eca7b20a570bfffa8ee4a5f4e Mon Sep 17 00:00:00 2001 From: westcott Date: Fri, 30 Jul 2010 12:55:01 +0000 Subject: [PATCH] changes for 1.12.2 --- makefile | 2 +- mothur.cpp | 4 +- nast.cpp | 50 +++++++++++++---------- phylotree.cpp | 32 ++++++++------- sffinfocommand.cpp | 2 +- summarycommand.cpp | 94 ++++++++++++++++++++++++++++++++++++++++++- summarycommand.h | 3 +- taxonomyequalizer.cpp | 6 +-- 8 files changed, 146 insertions(+), 47 deletions(-) diff --git a/makefile b/makefile index d751bb6..3f0893c 100644 --- a/makefile +++ b/makefile @@ -13,7 +13,7 @@ CXXFLAGS += -O3 -MOTHUR_FILES = "\"../Release\"" +MOTHUR_FILES = "\"Enter_your_default_path_here\"" ifeq ($(strip $(MOTHUR_FILES)),"\"Enter_your_default_path_here\"") else CXXFLAGS += -DMOTHUR_FILES=${MOTHUR_FILES} diff --git a/mothur.cpp b/mothur.cpp index c413ced..ea3ea3e 100644 --- a/mothur.cpp +++ b/mothur.cpp @@ -98,9 +98,9 @@ int main(int argc, char *argv[]){ #endif //header - m->mothurOut("mothur v.1.12.1"); + m->mothurOut("mothur v.1.12.2"); m->mothurOutEndLine(); - m->mothurOut("Last updated: 7/29/2010"); + m->mothurOut("Last updated: 7/30/2010"); m->mothurOutEndLine(); m->mothurOutEndLine(); m->mothurOut("by"); diff --git a/nast.cpp b/nast.cpp index 9e26b11..8ed69fa 100644 --- a/nast.cpp +++ b/nast.cpp @@ -132,51 +132,56 @@ void Nast::removeExtraGaps(string& candAln, string tempAln, string newTemplateAl if((i-leftIndex) <= (rightIndex-i)){ // the left gap is closer - > move stuff left there's if(leftRoom >= insertLength){ // enough room to the left to move + //cout << "lr newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << i+insertLength << endl; string leftTemplateString = newTemplateAlign.substr(0,i); - string rightTemplateString = newTemplateAlign.substr(i+insertLength); + string rightTemplateString = newTemplateAlign.substr((i+insertLength)); newTemplateAlign = leftTemplateString + rightTemplateString; longAlignmentLength = newTemplateAlign.length(); - - string leftCandidateString = candAln.substr(0,leftIndex-insertLength+1); - string rightCandidateString = candAln.substr(leftIndex+1); + //cout << "lr candAln = " << candAln.length() << '\t' << leftIndex-insertLength+1 << '\t' << leftIndex+1 << endl; + string leftCandidateString = candAln.substr(0,(leftIndex-insertLength+1)); + string rightCandidateString = candAln.substr((leftIndex+1)); candAln = leftCandidateString + rightCandidateString; } else{ // not enough room to the left, have to steal some space to + + //cout << "in else lr newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << i+insertLength << endl; string leftTemplateString = newTemplateAlign.substr(0,i); // the right - string rightTemplateString = newTemplateAlign.substr(i+insertLength); + string rightTemplateString = newTemplateAlign.substr((i+insertLength)); newTemplateAlign = leftTemplateString + rightTemplateString; longAlignmentLength = newTemplateAlign.length(); - - string leftCandidateString = candAln.substr(0,leftIndex-leftRoom+1); - string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1); - string rightCandidateString = candAln.substr(rightIndex+(insertLength-leftRoom)); + //cout << " in else lr candAln = " << candAln.length() << '\t' << leftIndex-leftRoom+1 << '\t' << rightIndex-leftIndex-1 << '\t' << rightIndex+(insertLength-leftRoom) << endl; + string leftCandidateString = candAln.substr(0,(leftIndex-leftRoom+1)); + string insertString = candAln.substr((leftIndex+1),(rightIndex-leftIndex-1)); + string rightCandidateString = candAln.substr((rightIndex+(insertLength-leftRoom))); candAln = leftCandidateString + insertString + rightCandidateString; } } else{ // the right gap is closer - > move stuff right there's if(rightRoom >= insertLength){ // enough room to the right to move + //cout << "rr newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << i+insertLength << endl; string leftTemplateString = newTemplateAlign.substr(0,i); - string rightTemplateString = newTemplateAlign.substr(i+insertLength); + string rightTemplateString = newTemplateAlign.substr((i+insertLength)); newTemplateAlign = leftTemplateString + rightTemplateString; longAlignmentLength = newTemplateAlign.length(); - + //cout << "rr candAln = " << candAln.length() << '\t' << i << '\t' << rightIndex << '\t' << rightIndex+insertLength << endl; string leftCandidateString = candAln.substr(0,rightIndex); - string rightCandidateString = candAln.substr(rightIndex+insertLength); + string rightCandidateString = candAln.substr((rightIndex+insertLength)); candAln = leftCandidateString + rightCandidateString; } else{ // not enough room to the right, have to steal some // space to the left lets move left and then right... + //cout << "in else rr newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << i+insertLength << endl; string leftTemplateString = newTemplateAlign.substr(0,i); - string rightTemplateString = newTemplateAlign.substr(i+insertLength); + string rightTemplateString = newTemplateAlign.substr((i+insertLength)); newTemplateAlign = leftTemplateString + rightTemplateString; longAlignmentLength = newTemplateAlign.length(); - - string leftCandidateString = candAln.substr(0,leftIndex-(insertLength-rightRoom)+1); - string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1); - string rightCandidateString = candAln.substr(rightIndex+rightRoom); + //cout << "in else rr candAln = " << candAln.length() << '\t' << '\t' << (leftIndex-(insertLength-rightRoom)+1) << '\t' << (leftIndex+1,rightIndex-leftIndex-1) << '\t' << (rightIndex+rightRoom) << endl; + string leftCandidateString = candAln.substr(0,(leftIndex-(insertLength-rightRoom)+1)); + string insertString = candAln.substr((leftIndex+1),(rightIndex-leftIndex-1)); + string rightCandidateString = candAln.substr((rightIndex+rightRoom)); candAln = leftCandidateString + insertString + rightCandidateString; } @@ -186,15 +191,16 @@ void Nast::removeExtraGaps(string& candAln, string tempAln, string newTemplateAl } else{ // there could be a case where there isn't enough room in either direction to move stuff - +//cout << "in else else newTemplateAlign = " << newTemplateAlign.length() << '\t' << i << '\t' << (i+leftRoom+rightRoom) << endl; string leftTemplateString = newTemplateAlign.substr(0,i); - string rightTemplateString = newTemplateAlign.substr(i+leftRoom+rightRoom); + string rightTemplateString = newTemplateAlign.substr((i+leftRoom+rightRoom)); newTemplateAlign = leftTemplateString + rightTemplateString; longAlignmentLength = newTemplateAlign.length(); - string leftCandidateString = candAln.substr(0,leftIndex-leftRoom+1); - string insertString = candAln.substr(leftIndex+1,rightIndex-leftIndex-1); - string rightCandidateString = candAln.substr(rightIndex+rightRoom); + //cout << "in else else newTemplateAlign = " << candAln.length() << '\t' << (leftIndex-leftRoom+1) << '\t' << (leftIndex+1) << '\t' << (rightIndex-leftIndex-1) << '\t' << (rightIndex+rightRoom) << endl; + string leftCandidateString = candAln.substr(0,(leftIndex-leftRoom+1)); + string insertString = candAln.substr((leftIndex+1),(rightIndex-leftIndex-1)); + string rightCandidateString = candAln.substr((rightIndex+rightRoom)); candAln = leftCandidateString + insertString + rightCandidateString; i -= (leftRoom + rightRoom); diff --git a/phylotree.cpp b/phylotree.cpp index d085f6c..6b32e27 100644 --- a/phylotree.cpp +++ b/phylotree.cpp @@ -370,18 +370,18 @@ void PhyloTree::binUnclassified(string file){ map::iterator childPointer; vector copy = tree; - + //fill out tree fillOutTree(0, copy); - - //get leaf nodes that may need externsion + + //get leaf nodes that may need extension for (int i = 0; i < copy.size(); i++) { if (copy[i].children.size() == 0) { leafNodes[i] = i; } } - + //cout << "maxLevel = " << maxLevel << endl; int copyNodes = copy.size(); //go through the seqs and if a sequence finest taxon is not the same level as the most finely defined taxon then classify it as unclassified where necessary @@ -394,7 +394,7 @@ void PhyloTree::binUnclassified(string file){ int currentNode = itLeaf->second; //this sequence is unclassified at some levels - while(level <= maxLevel){ + while(level < maxLevel){ level++; @@ -434,19 +434,21 @@ void PhyloTree::fillOutTree(int index, vector& copy) { try { map::iterator it; - it = copy[index].children.find("unclassified"); - if (it == copy[index].children.end()) { //no unclassified at this level - string taxon = "unclassified"; - copy.push_back(TaxNode(taxon)); - copy[index].children[taxon] = copy.size()-1; - copy[copy.size()-1].parent = index; - copy[copy.size()-1].level = copy[index].level + 1; - } + if (copy[index].level < maxLevel) { + it = copy[index].children.find("unclassified"); + if (it == copy[index].children.end()) { //no unclassified at this level + string taxon = "unclassified"; + copy.push_back(TaxNode(taxon)); + copy[index].children[taxon] = copy.size()-1; + copy[copy.size()-1].parent = index; + copy[copy.size()-1].level = copy[index].level + 1; + } - if (tree[index].level <= maxLevel) { - for(it=tree[index].children.begin();it!=tree[index].children.end();it++){ //check your children + + for(it=copy[index].children.begin();it!=copy[index].children.end();it++){ //check your children fillOutTree(it->second, copy); } + } } diff --git a/sffinfocommand.cpp b/sffinfocommand.cpp index f8ba25b..38322ac 100644 --- a/sffinfocommand.cpp +++ b/sffinfocommand.cpp @@ -405,7 +405,7 @@ int SffInfoCommand::readHeader(ifstream& in, Header& header){ char buffer4 [2]; in.read(buffer4, 2); header.clipQualLeft = be_int2(*(unsigned short *)(&buffer4)); - header.clipQualLeft = 5; + header.clipQualLeft = 5; //read clip qual right char buffer5 [2]; diff --git a/summarycommand.cpp b/summarycommand.cpp index b086dff..95f4262 100644 --- a/summarycommand.cpp +++ b/summarycommand.cpp @@ -44,7 +44,7 @@ SummaryCommand::SummaryCommand(string option) { else { //valid paramters for this command - string Array[] = {"label","calc","abund","size","outputdir","inputdir"}; + string Array[] = {"label","calc","abund","size","outputdir","groupmode","inputdir"}; vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); OptionParser parser(option); @@ -94,6 +94,10 @@ SummaryCommand::SummaryCommand(string option) { temp = validParameter.validFile(parameters, "size", false); if (temp == "not found") { temp = "0"; } convert(temp, size); + + temp = validParameter.validFile(parameters, "groupmode", false); if (temp == "not found") { temp = "F"; } + groupMode = isTrue(temp); + } } @@ -108,12 +112,13 @@ void SummaryCommand::help(){ try { m->mothurOut("The summary.single command can only be executed after a successful read.otu WTIH ONE EXECEPTION.\n"); m->mothurOut("The summary.single command can be executed after a successful cluster command. It will use the .list file from the output of the cluster.\n"); - m->mothurOut("The summary.single command parameters are label, calc, abund. No parameters are required.\n"); + m->mothurOut("The summary.single command parameters are label, calc, abund and groupmode. No parameters are required.\n"); m->mothurOut("The summary.single command should be in the following format: \n"); m->mothurOut("summary.single(label=yourLabel, calc=yourEstimators).\n"); m->mothurOut("Example summary.single(label=unique-.01-.03, calc=sobs-chao-ace-jack-bootstrap-shannon-npshannon-simpson).\n"); validCalculator->printCalc("summary", cout); m->mothurOut("The default value calc is sobs-chao-ace-jack-shannon-npshannon-simpson\n"); + m->mothurOut("If you are running summary.single with a shared file and would like your summary results collated in one file, set groupmode=t. (Default=False).\n"); m->mothurOut("The label parameter is used to analyze specific labels in your input.\n"); m->mothurOut("Note: No spaces between parameter labels (i.e. label), '=' and parameters (i.e.yourLabels).\n\n"); } @@ -142,8 +147,14 @@ int SummaryCommand::execute(){ if (m->control_pressed) { if (hadShared != "") { globaldata->setSharedFile(hadShared); globaldata->setFormat("sharedfile"); } return 0; } + int numLines = 0; + int numCols = 0; + for (int p = 0; p < inputFileNames.size(); p++) { + numLines = 0; + numCols = 0; + string fileNameRoot = outputDir + getRootName(getSimpleName(inputFileNames[p])) + "summary"; globaldata->inputFileName = inputFileNames[p]; outputNames.push_back(fileNameRoot); @@ -221,9 +232,11 @@ int SummaryCommand::execute(){ for(int i=0;igetCols() == 1){ outputFileHandle << '\t' << sumCalculators[i]->getName(); + numCols++; } else{ outputFileHandle << '\t' << sumCalculators[i]->getName() << "\t" << sumCalculators[i]->getName() << "_lci\t" << sumCalculators[i]->getName() << "_hci"; + numCols += 3; } } outputFileHandle << endl; @@ -254,6 +267,7 @@ int SummaryCommand::execute(){ sumCalculators[i]->print(outputFileHandle); } outputFileHandle << endl; + numLines++; } if ((anyLabelsToProcess(sabund->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) { @@ -276,6 +290,7 @@ int SummaryCommand::execute(){ sumCalculators[i]->print(outputFileHandle); } outputFileHandle << endl; + numLines++; //restore real lastlabel to save below sabund->setLabel(saveLabel); @@ -318,6 +333,7 @@ int SummaryCommand::execute(){ sumCalculators[i]->print(outputFileHandle); } outputFileHandle << endl; + numLines++; delete sabund; } @@ -337,6 +353,11 @@ int SummaryCommand::execute(){ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + //create summary file containing all the groups data for each label - this function just combines the info from the files already created. + if ((hadShared != "") && (groupMode)) { outputNames.push_back(createGroupSummaryFile(numLines, numCols, outputNames)); } + + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { remove(outputNames[i].c_str()); } return 0; } + m->mothurOutEndLine(); m->mothurOut("Output File Names: "); m->mothurOutEndLine(); for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); } @@ -409,3 +430,72 @@ vector SummaryCommand::parseSharedFile(string filename) { } } //********************************************************************************************************************** +string SummaryCommand::createGroupSummaryFile(int numLines, int numCols, vector outputNames) { + try { + + ofstream out; + string combineFileName = outputDir + getRootName(getSimpleName(globaldata->inputFileName)) + "groups.summary"; + + //open combined file + openOutputFile(combineFileName, out); + + //open each groups summary file + string newLabel = ""; + ifstream* temp; + map filehandles; + for (int i=0; i> tempLabel; + + if (j == 1) { newLabel += "group\t" + tempLabel + '\t'; + }else{ newLabel += tempLabel + '\t'; } + } + }else{ for (int j = 0; j < numCols+1; j++) { *(temp) >> tempLabel; } } + + gobble(*(temp)); + } + + //output label line to new file + out << newLabel << endl; + + //for each label + for (int i = 0; i < numLines; i++) { + + //grab summary data for each group + for (int i=0; i> tempLabel; + + //print to combined file + if (j == 1) { out << groups[i] << '\t' << tempLabel << '\t'; } + else{ out << tempLabel << '\t'; } + } + + out << endl; + gobble(*(filehandles[outputNames[i]])); + } + } + + //close each groups summary file + for (int i=0; ierrorOut(e, "SummaryCommand", "createGroupSummaryFile"); + exit(1); + } +} +//********************************************************************************************************************** diff --git a/summarycommand.h b/summarycommand.h index 6f04fc7..6b3fa69 100644 --- a/summarycommand.h +++ b/summarycommand.h @@ -35,7 +35,7 @@ private: SAbundVector* sabund; int abund, size; - bool abort, allLines; + bool abort, allLines, groupMode; set labels; //holds labels to be used string label, calc, outputDir; vector Estimators; @@ -43,6 +43,7 @@ private: vector groups; vector parseSharedFile(string); + string createGroupSummaryFile(int, int, vector); }; diff --git a/taxonomyequalizer.cpp b/taxonomyequalizer.cpp index c7e8cb9..1d2832d 100644 --- a/taxonomyequalizer.cpp +++ b/taxonomyequalizer.cpp @@ -88,19 +88,19 @@ int TaxEqualizer::getHighestLevel(ifstream& in) { //save sequences level seqLevels[name] = thisLevel; - + //is this the longest taxonomy? if (thisLevel > level) { level = thisLevel; testTax = tax; //testTax is used to figure out if this file has confidences we need to strip out - } + } } int pos = testTax.find_first_of('('); //if there are '(' then there are confidences we need to take out if (pos != -1) { containsConfidence = true; } - + return level; } -- 2.39.2