From: Sarah Westcott Date: Tue, 3 Apr 2012 17:36:38 +0000 (-0400) Subject: fixed bug in corr.axes so that it now uses the shared files bin numbers. finished... X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=e41b86a600fe30f5df9507d7e55027e7b8bd7dd6 fixed bug in corr.axes so that it now uses the shared files bin numbers. finished adding subsample and tiers parameters to dist.shared. --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 2088e37..b181f5b 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -3,7 +3,7 @@ archiveVersion = 1; classes = { }; - objectVersion = 45; + objectVersion = 46; objects = { /* Begin PBXBuildFile section */ @@ -1938,10 +1938,11 @@ 08FB7793FE84155DC02AAC07 /* Project object */ = { isa = PBXProject; attributes = { + LastUpgradeCheck = 0420; ORGANIZATIONNAME = "Schloss Lab"; }; buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "Mothur" */; - compatibilityVersion = "Xcode 3.1"; + compatibilityVersion = "Xcode 3.2"; developmentRegion = English; hasScannedForEncodings = 1; knownRegions = ( @@ -2319,7 +2320,6 @@ ALWAYS_SEARCH_USER_PATHS = NO; COPY_PHASE_STRIP = NO; GCC_DYNAMIC_NO_PIC = NO; - GCC_ENABLE_FIX_AND_CONTINUE = YES; GCC_MODEL_TUNING = G5; GCC_OPTIMIZATION_LEVEL = 0; INSTALL_PATH = /usr/local/bin; @@ -2373,7 +2373,6 @@ "-lncurses", "-lreadline", ); - PREBINDING = NO; SDKROOT = macosx10.6; USER_HEADER_SEARCH_PATHS = ""; }; @@ -2411,7 +2410,6 @@ "-lncurses", "-lreadline", ); - PREBINDING = NO; SDKROOT = macosx10.6; }; name = Release; diff --git a/corraxescommand.cpp b/corraxescommand.cpp index a1c3a3d..c27eb4b 100644 --- a/corraxescommand.cpp +++ b/corraxescommand.cpp @@ -321,7 +321,7 @@ int CorrAxesCommand::calcPearson(map >& axes, ofstream& ou //for each otu for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) { - if (metadatafile == "") { out << i+1; } + if (metadatafile == "") { out << m->currentBinLabels[i]; } else { out << metadataLabels[i]; } //find the averages this otu - Y @@ -456,7 +456,7 @@ int CorrAxesCommand::calcSpearman(map >& axes, ofstream& o //for each otu for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) { - if (metadatafile == "") { out << i+1; } + if (metadatafile == "") { out << m->currentBinLabels[i]; } else { out << metadataLabels[i]; } //find the ranks of this otu - Y @@ -609,7 +609,7 @@ int CorrAxesCommand::calcKendall(map >& axes, ofstream& ou //for each otu for (int i = 0; i < lookupFloat[0]->getNumBins(); i++) { - if (metadatafile == "") { out << i+1; } + if (metadatafile == "") { out << m->currentBinLabels[i]; } else { out << metadataLabels[i]; } //find the ranks of this otu - Y diff --git a/matrixoutputcommand.cpp b/matrixoutputcommand.cpp index cada2f1..73c38f7 100644 --- a/matrixoutputcommand.cpp +++ b/matrixoutputcommand.cpp @@ -425,7 +425,7 @@ int MatrixOutputCommand::execute(){ } } /***********************************************************/ -void MatrixOutputCommand::printSims(ostream& out, vector< vector >& simMatrix) { +void MatrixOutputCommand::printSims(ostream& out, vector< vector >& simMatrix) { try { out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); @@ -464,7 +464,7 @@ int MatrixOutputCommand::process(vector thisLookup){ vector< vector< vector > > calcDistsTotals; //each iter, one for each calc, then each groupCombos dists. this will be used to make .dist files vector< vector > calcDists; calcDists.resize(matrixCalculators.size()); - + for (int thisIter = 0; thisIter < iters; thisIter++) { vector thisItersLookup = thisLookup; @@ -472,9 +472,26 @@ int MatrixOutputCommand::process(vector thisLookup){ if (subsample) { SubSample sample; vector tempLabels; //dont need since we arent printing the sampled sharedRabunds - thisItersLookup = sample.getSamplePreserve(thisLookup, tempLabels, subsampleSize); + + //make copy of lookup so we don't get access violations + vector newLookup; + for (int k = 0; k < thisItersLookup.size(); k++) { + SharedRAbundVector* temp = new SharedRAbundVector(); + temp->setLabel(thisItersLookup[k]->getLabel()); + temp->setGroup(thisItersLookup[k]->getGroup()); + newLookup.push_back(temp); + } + + //for each bin + for (int k = 0; k < thisItersLookup[0]->getNumBins(); k++) { + if (m->control_pressed) { for (int j = 0; j < newLookup.size(); j++) { delete newLookup[j]; } return 0; } + for (int j = 0; j < thisItersLookup.size(); j++) { newLookup[j]->push_back(thisItersLookup[j]->getAbundance(k), thisItersLookup[j]->getGroup()); } + } + + tempLabels = sample.getSample(newLookup, subsampleSize); + thisItersLookup = newLookup; } - cout << thisIter << endl; + if(processors == 1){ driver(thisItersLookup, 0, numGroups, calcDists); }else{ @@ -608,9 +625,11 @@ int MatrixOutputCommand::process(vector thisLookup){ calcDistsTotals.push_back(calcDists); if (subsample) { + //clean up memory - // for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; } - // thisItersLookup.clear(); + for (int i = 0; i < thisItersLookup.size(); i++) { delete thisItersLookup[i]; } + thisItersLookup.clear(); + for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); } } } @@ -619,7 +638,7 @@ int MatrixOutputCommand::process(vector thisLookup){ vector< vector > calcAverages; calcAverages.resize(matrixCalculators.size()); for (int i = 0; i < calcAverages.size(); i++) { //initialize sums to zero. - calcAverages[i].resize(calcDists[i].size()); + calcAverages[i].resize(calcDistsTotals[0][i].size()); for (int j = 0; j < calcAverages[i].size(); j++) { calcAverages[i][j].seq1 = calcDists[i][j].seq1; @@ -645,7 +664,7 @@ int MatrixOutputCommand::process(vector thisLookup){ //find standard deviation vector< vector > stdDev; stdDev.resize(matrixCalculators.size()); for (int i = 0; i < stdDev.size(); i++) { //initialize sums to zero. - stdDev[i].resize(calcDists[i].size()); + stdDev[i].resize(calcDistsTotals[0][i].size()); for (int j = 0; j < stdDev[i].size(); j++) { stdDev[i][j].seq1 = calcDists[i][j].seq1; @@ -671,11 +690,11 @@ int MatrixOutputCommand::process(vector thisLookup){ //print results for (int i = 0; i < calcDists.size(); i++) { - vector< vector > matrix; //square matrix to represent the distance + vector< vector > matrix; //square matrix to represent the distance matrix.resize(thisLookup.size()); for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); } - vector< vector > stdmatrix; //square matrix to represent the stdDev + vector< vector > stdmatrix; //square matrix to represent the stdDev stdmatrix.resize(thisLookup.size()); for (int k = 0; k < thisLookup.size(); k++) { stdmatrix[k].resize(thisLookup.size(), 0.0); } @@ -692,54 +711,57 @@ int MatrixOutputCommand::process(vector thisLookup){ stdmatrix[column][row] = stdDist; } - string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + ".results"; - outputNames.push_back(distFileName); outputTypes["subsample"].push_back(distFileName); + string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".ave.dist"; + outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName); + ofstream outAve; + m->openOutputFile(distFileName, outAve); + outAve.setf(ios::fixed, ios::floatfield); outAve.setf(ios::showpoint); + + printSims(outAve, matrix); + + outAve.close(); + + distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".std.dist"; + outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName); + ofstream outSTD; + m->openOutputFile(distFileName, outSTD); + outSTD.setf(ios::fixed, ios::floatfield); outSTD.setf(ios::showpoint); + + printSims(outSTD, stdmatrix); + + outSTD.close(); + + } + }else { + + for (int i = 0; i < calcDists.size(); i++) { + if (m->control_pressed) { break; } + + //initialize matrix + vector< vector > matrix; //square matrix to represent the distance + matrix.resize(thisLookup.size()); + for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); } + + for (int j = 0; j < calcDists[i].size(); j++) { + int row = calcDists[i][j].seq1; + int column = calcDists[i][j].seq2; + double dist = calcDists[i][j].dist; + + matrix[row][column] = dist; + matrix[column][row] = dist; + } + + string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".dist"; + outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName); ofstream outDist; m->openOutputFile(distFileName, outDist); outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint); - outDist << "Group1\tGroup2\tAverageDist\tStdDev\n"; - for (int m = 0; m < matrix.size(); m++) { - for (int n = 0; n < m; n++) { - outDist << lookup[m]->getGroup() << '\t' << lookup[n]->getGroup() << '\t'; - outDist << matrix[m][n] << '\t' << stdmatrix[m][n] << endl; - } - } - outDist.close(); - } - - //output averages as distance matrix - calcDists = calcAverages; - } - - for (int i = 0; i < calcDists.size(); i++) { - if (m->control_pressed) { break; } - - //initialize matrix - vector< vector > matrix; //square matrix to represent the distance - matrix.resize(thisLookup.size()); - for (int k = 0; k < thisLookup.size(); k++) { matrix[k].resize(thisLookup.size(), 0.0); } - - for (int j = 0; j < calcDists[i].size(); j++) { - int row = calcDists[i][j].seq1; - int column = calcDists[i][j].seq2; - float dist = calcDists[i][j].dist; + printSims(outDist, matrix); - matrix[row][column] = dist; - matrix[column][row] = dist; + outDist.close(); } - - string distFileName = outputDir + m->getRootName(m->getSimpleName(sharedfile)) + matrixCalculators[i]->getName() + "." + thisLookup[0]->getLabel() + "." + output + ".dist"; - outputNames.push_back(distFileName); outputTypes["phylip"].push_back(distFileName); - ofstream outDist; - m->openOutputFile(distFileName, outDist); - outDist.setf(ios::fixed, ios::floatfield); outDist.setf(ios::showpoint); - - printSims(outDist, matrix); - - outDist.close(); } - return 0; } @@ -751,7 +773,6 @@ int MatrixOutputCommand::process(vector thisLookup){ /**************************************************************************************************/ int MatrixOutputCommand::driver(vector thisLookup, int start, int end, vector< vector >& calcDists) { try { - vector subset; for (int k = start; k < end; k++) { // pass cdd each set of groups to compare diff --git a/matrixoutputcommand.h b/matrixoutputcommand.h index f915dfc..8af539b 100644 --- a/matrixoutputcommand.h +++ b/matrixoutputcommand.h @@ -88,7 +88,7 @@ private: }; vector lines; - void printSims(ostream&, vector< vector >&); + void printSims(ostream&, vector< vector >&); int process(vector); vector matrixCalculators; diff --git a/mothur.h b/mothur.h index 1a07b6f..2c143e8 100644 --- a/mothur.h +++ b/mothur.h @@ -131,9 +131,9 @@ struct clusterNode { struct seqDist { int seq1; int seq2; - float dist; + double dist; seqDist() {} - seqDist(int s1, int s2, float d) : seq1(s1), seq2(s2), dist(d) {} + seqDist(int s1, int s2, double d) : seq1(s1), seq2(s2), dist(d) {} ~seqDist() {} }; /************************************************************/ diff --git a/sequenceparser.cpp b/sequenceparser.cpp index 6c98c04..fd94b24 100644 --- a/sequenceparser.cpp +++ b/sequenceparser.cpp @@ -316,6 +316,7 @@ int SequenceParser::getSeqs(string g, string filename, bool uchimeFormat=false){ } }else { + //m->mothurOut("Group " + g + " contains " + toString(seqForThisGroup.size()) + " unique seqs.\n"); for (int i = 0; i < seqForThisGroup.size(); i++) { if(m->control_pressed) { out.close(); m->mothurRemove(filename); return 1; } diff --git a/subsample.cpp b/subsample.cpp index d5b4e3e..e6dd845 100644 --- a/subsample.cpp +++ b/subsample.cpp @@ -8,66 +8,6 @@ #include "subsample.h" -//********************************************************************************************************************** -vector SubSample::getSamplePreserve(vector& thislookup, vector& newLabels, int size) { - try { - - vector newlookup; newlookup.resize(thislookup.size(), NULL); - - //save mothurOut's binLabels to restore for next label - vector saveBinLabels = m->currentBinLabels; - - int numBins = thislookup[0]->getNumBins(); - for (int i = 0; i < thislookup.size(); i++) { - int thisSize = thislookup[i]->getNumSeqs(); - - if (thisSize != size) { - - string thisgroup = thislookup[i]->getGroup(); - - OrderVector order; - for(int p=0;pgetAbundance(p);j++){ - order.push_back(p); - } - } - random_shuffle(order.begin(), order.end()); - - SharedRAbundVector* temp = new SharedRAbundVector(numBins); - temp->setLabel(thislookup[i]->getLabel()); - temp->setGroup(thislookup[i]->getGroup()); - - newlookup[i] = temp; - - for (int j = 0; j < size; j++) { - - if (m->control_pressed) { return newlookup; } - - int bin = order.get(j); - - int abund = newlookup[i]->getAbundance(bin); - newlookup[i]->set(bin, (abund+1), thisgroup); - } - } - } - - //subsampling may have created some otus with no sequences in them - eliminateZeroOTUS(newlookup); - - if (m->control_pressed) { return newlookup; } - - //save mothurOut's binLabels to restore for next label - newLabels = m->currentBinLabels; - m->currentBinLabels = saveBinLabels; - - return newlookup; - - } - catch(exception& e) { - m->errorOut(e, "SubSample", "getSamplePreserve"); - exit(1); - } -} //********************************************************************************************************************** vector SubSample::getSample(vector& thislookup, int size) { try { diff --git a/subsample.h b/subsample.h index 9156e09..09c7dcd 100644 --- a/subsample.h +++ b/subsample.h @@ -21,9 +21,8 @@ class SubSample { SubSample() { m = MothurOut::getInstance(); } ~SubSample() {} - vector getSample(vector&, int); //returns the bin labels for the subsample, mothurOuts binlabels are preserved so you can run this multiple times. + vector getSample(vector&, int); //returns the bin labels for the subsample, mothurOuts binlabels are preserved so you can run this multiple times. Overwrites original vector passed in, if you need to preserve it deep copy first. - vector getSamplePreserve(vector&, vector&, int); private: