From 55386dddad84cc1140d736cabaf4dd0ae16f2e01 Mon Sep 17 00:00:00 2001 From: westcott Date: Mon, 22 Aug 2011 19:10:08 +0000 Subject: [PATCH] added multiple processors option for Windows users to align.seqs, dist.seqs, summary.seqs. forced mothurOut, groupmap and treemap groups to be private so we can force the order to be sorted --- Mothur.xcodeproj/project.pbxproj | 2 +- aligncommand.cpp | 24 ++-- aligncommand.h | 7 +- alignmentdb.cpp | 9 +- alignmentdb.h | 4 +- bayesian.cpp | 3 +- bayesian.h | 2 +- blastdb.cpp | 43 ++++-- blastdb.hpp | 9 +- bootstrapsharedcommand.cpp | 2 +- chimeracheckrdp.cpp | 2 +- chimeraslayer.cpp | 10 +- chimeraslayer.h | 6 +- chimeraslayercommand.cpp | 188 +++++++++++++++---------- chimeraslayercommand.h | 232 +++++++++++++++++++++++++++++++ classify.cpp | 4 +- classify.h | 1 + classifyseqscommand.cpp | 6 +- clustersplitcommand.cpp | 9 +- collect.cpp | 20 +-- collectcommand.cpp | 10 +- collectdisplay.h | 7 +- collectsharedcommand.cpp | 16 ++- corraxescommand.cpp | 9 +- countgroupscommand.cpp | 9 +- countseqscommand.cpp | 3 +- deuniquetreecommand.cpp | 6 +- engine.cpp | 12 +- getgroupscommand.cpp | 8 +- getoturepcommand.cpp | 6 +- getotuscommand.cpp | 4 +- getrelabundcommand.cpp | 10 +- getsharedotucommand.cpp | 12 +- groupmap.cpp | 14 +- groupmap.h | 9 +- heatmapcommand.cpp | 14 +- heatmapsim.cpp | 2 +- heatmapsimcommand.cpp | 14 +- indicatorcommand.cpp | 31 +++-- knn.cpp | 4 +- knn.h | 2 +- libshuffcommand.cpp | 52 +++---- matrixoutputcommand.cpp | 14 +- mergegroupscommand.cpp | 10 +- metastatscommand.cpp | 15 +- mothurout.h | 17 ++- normalizesharedcommand.cpp | 28 ++-- parselistscommand.cpp | 28 ++-- parsimony.cpp | 18 +-- parsimonycommand.cpp | 34 +++-- pcacommand.cpp | 4 +- phylodiversitycommand.cpp | 64 +++++---- phylosummary.cpp | 33 +++-- rarefactcommand.cpp | 4 +- rarefactsharedcommand.cpp | 12 +- readtree.cpp | 2 +- removegroupscommand.cpp | 21 +-- removeotuscommand.cpp | 3 +- removerarecommand.cpp | 5 +- sharedcommand.cpp | 88 +++++++++--- sharedcommand.h | 1 + sharedjackknife.cpp | 2 +- sharedlistvector.cpp | 19 +-- sharedordervector.cpp | 19 ++- sharedrabundfloatvector.cpp | 15 +- sharedrabundvector.cpp | 18 ++- sharedutilities.cpp | 12 ++ splitabundcommand.cpp | 2 +- splitgroupscommand.cpp | 3 +- subsamplecommand.cpp | 15 +- summarysharedcommand.cpp | 12 +- tree.cpp | 8 +- treegroupscommand.cpp | 13 +- treemap.h | 7 +- unifracunweightedcommand.cpp | 36 ++--- unifracweightedcommand.cpp | 36 ++--- unweighted.cpp | 28 ++-- venncommand.cpp | 10 +- weighted.cpp | 4 +- 79 files changed, 993 insertions(+), 504 deletions(-) diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index 73a36ad..c9681af 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -1629,8 +1629,8 @@ A7E9B80512D37EC400DA6239 /* sharedlistvector.h */, A7E9B80E12D37EC400DA6239 /* sharedordervector.h */, A7E9B80D12D37EC400DA6239 /* sharedordervector.cpp */, - A7E9B80F12D37EC400DA6239 /* sharedrabundfloatvector.cpp */, A7E9B81012D37EC400DA6239 /* sharedrabundfloatvector.h */, + A7E9B80F12D37EC400DA6239 /* sharedrabundfloatvector.cpp */, A7E9B81112D37EC400DA6239 /* sharedrabundvector.cpp */, A7E9B81212D37EC400DA6239 /* sharedrabundvector.h */, A7E9B81312D37EC400DA6239 /* sharedsabundvector.cpp */, diff --git a/aligncommand.cpp b/aligncommand.cpp index 87ceb2c..05ecd46 100644 --- a/aligncommand.cpp +++ b/aligncommand.cpp @@ -302,7 +302,7 @@ int AlignCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } - templateDB = new AlignmentDB(templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch); + templateDB = new AlignmentDB(templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, rand()); for (int s = 0; s < candidateFileNames.size(); s++) { if (m->control_pressed) { outputTypes.clear(); return 0; } @@ -431,14 +431,18 @@ int AlignCommand::execute(){ positions = m->divideFile(candidateFileNames[s], processors); for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } #else - positions = m->setFilePosFasta(candidateFileNames[s], numFastaSeqs); - - //figure out how many sequences you have to process - int numSeqsPerProcessor = numFastaSeqs / processors; - for (int i = 0; i < processors; i++) { - int startIndex = i * numSeqsPerProcessor; - if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } - lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor)); + if (processors == 1) { + lines.push_back(new linePair(0, 1000)); + }else { + positions = m->setFilePosFasta(candidateFileNames[s], numFastaSeqs); + + //figure out how many sequences you have to process + int numSeqsPerProcessor = numFastaSeqs / processors; + for (int i = 0; i < processors; i++) { + int startIndex = i * numSeqsPerProcessor; + if(i == (processors - 1)){ numSeqsPerProcessor = numFastaSeqs - i * numSeqsPerProcessor; } + lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor)); + } } #endif @@ -919,7 +923,7 @@ int AlignCommand::createProcesses(string alignFileName, string reportFileName, s string extension = ""; if (i != 0) { extension = toString(i) + ".temp"; } - alignData* tempalign = new alignData((alignFileName + extension), (reportFileName + extension), (accnosFName + extension), filename, align, search, kmerSize, m, lines[i]->start, lines[i]->end, flip, match, misMatch, gapOpen, gapExtend, threshold); + alignData* tempalign = new alignData((alignFileName + extension), (reportFileName + extension), (accnosFName + extension), filename, align, search, kmerSize, m, lines[i]->start, lines[i]->end, flip, match, misMatch, gapOpen, gapExtend, threshold, i); pDataArray.push_back(tempalign); processIDS.push_back(i); diff --git a/aligncommand.h b/aligncommand.h index 193eeac..4a5eb11 100644 --- a/aligncommand.h +++ b/aligncommand.h @@ -91,10 +91,10 @@ typedef struct alignData { MothurOut* m; //AlignmentDB* templateDB; float match, misMatch, gapOpen, gapExtend, threshold; - int count, kmerSize; + int count, kmerSize, threadID; alignData(){} - alignData(string a, string r, string ac, string f, string al, string se, int ks, MothurOut* mout, unsigned long int st, unsigned long int en, bool fl, float ma, float misMa, float gapO, float gapE, float thr) { + alignData(string a, string r, string ac, string f, string al, string se, int ks, MothurOut* mout, unsigned long int st, unsigned long int en, bool fl, float ma, float misMa, float gapO, float gapE, float thr, int tid) { alignFName = a; reportFName = r; accnosFName = ac; @@ -113,6 +113,7 @@ typedef struct alignData { search = se; count = 0; kmerSize = ks; + threadID = tid; } }; @@ -144,7 +145,7 @@ static DWORD WINAPI MyAlignThreadFunction(LPVOID lpParam){ pDataArray->count = pDataArray->end; - AlignmentDB* templateDB = new AlignmentDB("saved-silent", pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch); + AlignmentDB* templateDB = new AlignmentDB("saved-silent", pDataArray->search, pDataArray->kmerSize, pDataArray->gapOpen, pDataArray->gapExtend, pDataArray->match, pDataArray->misMatch, pDataArray->threadID); //moved this into driver to avoid deep copies in windows paralellized version Alignment* alignment; diff --git a/alignmentdb.cpp b/alignmentdb.cpp index 6ccfd39..57975d6 100644 --- a/alignmentdb.cpp +++ b/alignmentdb.cpp @@ -15,7 +15,7 @@ /**************************************************************************************************/ //deep copy -AlignmentDB::AlignmentDB(const AlignmentDB& adb) : numSeqs(adb.numSeqs), longest(adb.longest), method(adb.method), emptySequence(adb.emptySequence) { +AlignmentDB::AlignmentDB(const AlignmentDB& adb) : numSeqs(adb.numSeqs), longest(adb.longest), method(adb.method), emptySequence(adb.emptySequence), threadID(adb.threadID) { try { m = MothurOut::getInstance(); @@ -41,7 +41,7 @@ AlignmentDB::AlignmentDB(const AlignmentDB& adb) : numSeqs(adb.numSeqs), longest } /**************************************************************************************************/ -AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch){ // This assumes that the template database is in fasta format, may +AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int tid){ // This assumes that the template database is in fasta format, may try { // need to alter this in the future? m = MothurOut::getInstance(); longest = 0; @@ -49,6 +49,7 @@ AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gap bool needToGenerate = true; ReferenceDB* rdb = ReferenceDB::getInstance(); bool silent = false; + threadID = tid; if (fastaFileName == "saved-silent") { fastaFileName = "saved"; silent = true; @@ -189,7 +190,7 @@ AlignmentDB::AlignmentDB(string fastaFileName, string s, int kmerSize, float gap #endif } else if(method == "suffix") { search = new SuffixDB(numSeqs); } - else if(method == "blast") { search = new BlastDB(fastaFileName.substr(0,fastaFileName.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, ""); } + else if(method == "blast") { search = new BlastDB(fastaFileName.substr(0,fastaFileName.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "", threadID); } else { method = "kmer"; m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); @@ -231,7 +232,7 @@ AlignmentDB::AlignmentDB(string s){ method = s; if(method == "suffix") { search = new SuffixDB(); } - else if(method == "blast") { search = new BlastDB(""); } + else if(method == "blast") { search = new BlastDB("", 0); } else { search = new KmerDB(); } diff --git a/alignmentdb.h b/alignmentdb.h index 27d0434..900aadc 100644 --- a/alignmentdb.h +++ b/alignmentdb.h @@ -20,7 +20,7 @@ class AlignmentDB { public: - AlignmentDB(string, string, int, float, float, float, float); //reads fastafile passed in and stores sequences + AlignmentDB(string, string, int, float, float, float, float, int); //reads fastafile passed in and stores sequences AlignmentDB(string); AlignmentDB(const AlignmentDB& adb); ~AlignmentDB(); @@ -30,7 +30,7 @@ public: int getLongestBase() { return longest; } private: - int numSeqs, longest; + int numSeqs, longest, threadID; string method; Database* search; diff --git a/bayesian.cpp b/bayesian.cpp index 1d5c46b..a12afed 100644 --- a/bayesian.cpp +++ b/bayesian.cpp @@ -12,11 +12,12 @@ #include "phylosummary.h" #include "referencedb.h" /**************************************************************************************************/ -Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, int i) : +Bayesian::Bayesian(string tfile, string tempFile, string method, int ksize, int cutoff, int i, int tid) : Classify(), kmerSize(ksize), confidenceThreshold(cutoff), iters(i) { try { ReferenceDB* rdb = ReferenceDB::getInstance(); + threadID = tid; string baseName = tempFile; if (baseName == "saved") { baseName = rdb->getSavedReference(); } diff --git a/bayesian.h b/bayesian.h index a1f693b..1cf5145 100644 --- a/bayesian.h +++ b/bayesian.h @@ -18,7 +18,7 @@ class Bayesian : public Classify { public: - Bayesian(string, string, string, int, int, int); + Bayesian(string, string, string, int, int, int, int); ~Bayesian(); string getTaxonomy(Sequence*); diff --git a/blastdb.cpp b/blastdb.cpp index af15720..bc646df 100644 --- a/blastdb.cpp +++ b/blastdb.cpp @@ -14,19 +14,23 @@ /**************************************************************************************************/ -BlastDB::BlastDB(string tag, float gO, float gE, float mm, float mM, string b) : Database(), +BlastDB::BlastDB(string tag, float gO, float gE, float mm, float mM, string b, int tid) : Database(), gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) { try { count = 0; path = b; + threadID = tid; int randNumber = rand(); //int randNumber = 12345; string pid = ""; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - pid += toString(getpid()); + pid += getpid(); +#else + pid += toString(threadID); #endif + dbFileName = tag + pid + toString(randNumber) + ".template.unaligned.fasta"; queryFileName = tag + pid + toString(randNumber) + ".candidate.unaligned.fasta"; blastFileName = tag + pid + toString(randNumber) + ".blast"; @@ -96,11 +100,12 @@ gapOpen(gO), gapExtend(gE), match(mm), misMatch(mM) { } /**************************************************************************************************/ -BlastDB::BlastDB(string b) : Database() { +BlastDB::BlastDB(string b, int tid) : Database() { try { count = 0; path = b; + threadID = tid; //make sure blast exists in the write place if (path == "") { @@ -119,7 +124,9 @@ BlastDB::BlastDB(string b) : Database() { int randNumber = rand(); string pid = ""; #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - pid += toString(getpid()); + pid += getpid(); +#else + pid += toString(threadID); #endif dbFileName = pid + toString(randNumber) + ".template.unaligned.fasta"; @@ -206,10 +213,7 @@ vector BlastDB::findClosestSequences(Sequence* seq, int n) { ofstream queryFile; int randNumber = rand(); - string pid = ""; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - pid += toString(getpid()); -#endif + string pid = scrubName(seq->getName()); m->openOutputFile((queryFileName+pid+toString(randNumber)), queryFile); queryFile << '>' << seq->getName() << endl; @@ -273,10 +277,7 @@ vector BlastDB::findClosestMegaBlast(Sequence* seq, int n, int minPerID) { ofstream queryFile; int randNumber = rand(); - string pid = ""; -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - pid += toString(getpid()); -#endif + string pid = scrubName(seq->getName()); m->openOutputFile((queryFileName+pid+toString(randNumber)), queryFile); queryFile << '>' << seq->getName() << endl; @@ -382,6 +383,24 @@ void BlastDB::generateDB() { } } /**************************************************************************************************/ +string BlastDB::scrubName(string seqName) { + try { + + string cleanName = ""; + + for (int i = 0; i < seqName.length(); i++) { + if (isalnum(seqName[i])) { cleanName += seqName[i]; } + else { cleanName += "_"; } + } + + return cleanName; + } + catch(exception& e) { + m->errorOut(e, "BlastDB", "scrubName"); + exit(1); + } +} +/**************************************************************************************************/ /**************************************************************************************************/ diff --git a/blastdb.hpp b/blastdb.hpp index 0c3ac4b..e2f4f57 100644 --- a/blastdb.hpp +++ b/blastdb.hpp @@ -16,8 +16,8 @@ class BlastDB : public Database { public: - BlastDB(string, float, float, float, float, string); - BlastDB(string); + BlastDB(string, float, float, float, float, string, int); + BlastDB(string, int); BlastDB(const BlastDB& bdb) : dbFileName(bdb.dbFileName), queryFileName(bdb.queryFileName), blastFileName(bdb.blastFileName), path(bdb.path), count(bdb.count), gapOpen(bdb.gapOpen), gapExtend(bdb.gapExtend), match(bdb.match), misMatch(bdb.misMatch), Database(bdb) {} ~BlastDB(); @@ -28,12 +28,15 @@ public: vector findClosestMegaBlast(Sequence*, int, int); private: + + string scrubName(string); + string dbFileName; string queryFileName; string blastFileName; string path; - int count; + int count, threadID; float gapOpen; float gapExtend; float match; diff --git a/bootstrapsharedcommand.cpp b/bootstrapsharedcommand.cpp index 783016a..d249421 100644 --- a/bootstrapsharedcommand.cpp +++ b/bootstrapsharedcommand.cpp @@ -140,7 +140,7 @@ BootSharedCommand::BootSharedCommand(string option) { if (groups == "not found") { groups = ""; } else { m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } calc = validParameter.validFile(parameters, "calc", false); diff --git a/chimeracheckrdp.cpp b/chimeracheckrdp.cpp index be59315..42b5312 100644 --- a/chimeracheckrdp.cpp +++ b/chimeracheckrdp.cpp @@ -20,7 +20,7 @@ ChimeraCheckRDP::ChimeraCheckRDP(string filename, string temp, string n, bool s, kmerSize = k; outputDir = o; - templateDB = new AlignmentDB(templateFileName, "kmer", kmerSize, 0.0,0.0,0.0,0.0); + templateDB = new AlignmentDB(templateFileName, "kmer", kmerSize, 0.0,0.0,0.0,0.0, rand()); m->mothurOutEndLine(); kmer = new Kmer(kmerSize); diff --git a/chimeraslayer.cpp b/chimeraslayer.cpp index 9da0d3c..b637000 100644 --- a/chimeraslayer.cpp +++ b/chimeraslayer.cpp @@ -14,7 +14,7 @@ //*************************************************************************************************************** ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, string mode, int k, int ms, int mms, int win, float div, -int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas) : Chimera() { +int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera() { try { fastafile = file; templateFileName = temp; templateSeqs = readSeqs(temp); @@ -36,6 +36,7 @@ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int num trimChimera = trim; numNoParents = 0; blastlocation = blas; + threadID = tid; doPrep(); } @@ -47,7 +48,7 @@ int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int num //*************************************************************************************************************** //template=self ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, map& prior, string mode, int k, int ms, int mms, int win, float div, - int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas) : Chimera() { + int minsim, int mincov, int minbs, int minsnp, int par, int it, int inc, int numw, bool r, string blas, int tid) : Chimera() { try { fastafile = file; templateSeqs = readSeqs(fastafile); templateFileName = temp; @@ -70,6 +71,7 @@ ChimeraSlayer::ChimeraSlayer(string file, string temp, bool trim, mapgetRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3, blastlocation); + databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(fastafile)), -1.0, -1.0, 1, -3, blastlocation, threadID); if (m->control_pressed) { return 0; } @@ -332,7 +334,7 @@ vector ChimeraSlayer::getTemplate(Sequence q, vector& user }else if (searchMethod == "blast") { //generate blastdb - databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3, blastlocation); + databaseLeft = new BlastDB(m->getRootName(m->getSimpleName(templateFileName)), -1.0, -1.0, 1, -3, blastlocation, threadID); if (m->control_pressed) { return userTemplate; } diff --git a/chimeraslayer.h b/chimeraslayer.h index ef7b3c3..c2815a1 100644 --- a/chimeraslayer.h +++ b/chimeraslayer.h @@ -24,8 +24,8 @@ class ChimeraSlayer : public Chimera { public: - ChimeraSlayer(string, string, bool, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool, string); - ChimeraSlayer(string, string, bool, map&, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool, string); + ChimeraSlayer(string, string, bool, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool, string, int); + ChimeraSlayer(string, string, bool, map&, string, int, int, int, int, float, int, int, int, int, int, int, int, int, bool, string, int); ~ChimeraSlayer(); @@ -50,7 +50,7 @@ class ChimeraSlayer : public Chimera { Database* databaseLeft; map priority; //for template=self, seqname, seqAligned, abundance set chimericSeqs; //for template=self, so we don't add chimeric sequences to the userTemplate set - int numNoParents; + int numNoParents, threadID; vector chimeraResults; data_results printResults; diff --git a/chimeraslayercommand.cpp b/chimeraslayercommand.cpp index 7c35453..0732bc2 100644 --- a/chimeraslayercommand.cpp +++ b/chimeraslayercommand.cpp @@ -8,7 +8,6 @@ */ #include "chimeraslayercommand.h" -#include "chimeraslayer.h" #include "deconvolutecommand.h" #include "referencedb.h" @@ -470,9 +469,7 @@ int ChimeraSlayerCommand::execute(){ int start = time(NULL); - if (templatefile != "self") { //you want to run slayer with a refernce template - chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation); - }else { + if (templatefile == "self") { if (processors != 1) { m->mothurOut("When using template=self, mothur can only use 1 processor, continuing."); m->mothurOutEndLine(); processors = 1; } string nameFile = ""; if (nameFileNames.size() != 0) { //you provided a namefile and we don't need to create one @@ -484,7 +481,7 @@ int ChimeraSlayerCommand::execute(){ string inputString = "fasta=" + fastaFileNames[s]; m->mothurOut("/******************************************/"); m->mothurOutEndLine(); m->mothurOut("Running command: unique.seqs(" + inputString + ")"); m->mothurOutEndLine(); - + Command* uniqueCommand = new DeconvoluteCommand(inputString); uniqueCommand->execute(); @@ -500,19 +497,25 @@ int ChimeraSlayerCommand::execute(){ //sort fastafile by abundance, returns new sorted fastafile name m->mothurOut("Sorting fastafile according to abundance..."); cout.flush(); - map priority = sortFastaFile(fastaFileNames[s], nameFile); + priority = sortFastaFile(fastaFileNames[s], nameFile); m->mothurOut("Done."); m->mothurOutEndLine(); if (m->control_pressed) { for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } - - chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, priority, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation); } - + if (outputDir == "") { outputDir = m->hasPath(fastaFileNames[s]); }//if user entered a file with a path then preserve it string outputFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.chimera"; string accnosFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.accnos"; string trimFastaFileName = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.fasta"; + //create chimera here if you are mac or linux because fork will copy for you. Create in create processes instead if you are windows. + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + if (templatefile != "self") { //you want to run slayer with a reference template + chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand()); + }else { + chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, priority, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand()); + } + if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } if (chimera->getUnaligned()) { @@ -521,6 +524,24 @@ int ChimeraSlayerCommand::execute(){ return 0; } templateSeqsLength = chimera->getLength(); + #else + if (processors == 1) { + if (templatefile != "self") { //you want to run slayer with a reference template + chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand()); + }else { + chimera = new ChimeraSlayer(fastaFileNames[s], templatefile, trim, priority, search, ksize, match, mismatch, window, divR, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, realign, blastlocation, rand()); + } + + if (m->control_pressed) { delete chimera; for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } return 0; } + + if (chimera->getUnaligned()) { + m->mothurOut("Your template sequences are different lengths, please correct."); m->mothurOutEndLine(); + delete chimera; + return 0; + } + templateSeqsLength = chimera->getLength(); + } + #endif #ifdef USE_MPI int pid, numSeqsPerProcessor; @@ -637,79 +658,67 @@ int ChimeraSlayerCommand::execute(){ MPI_Barrier(MPI_COMM_WORLD); //make everyone wait - just in case #else - ofstream outHeader; - string tempHeader = outputDir + m->getRootName(m->getSimpleName(fastaFileNames[s])) + "slayer.chimeras.tempHeader"; - m->openOutputFile(tempHeader, outHeader); - - chimera->printHeader(outHeader); - outHeader.close(); - - vector positions = m->divideFile(fastaFileNames[s], processors); - - for (int i = 0; i < (positions.size()-1); i++) { - lines.push_back(new linePair(positions[i], positions[(i+1)])); - } - //break up file - #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) - if(processors == 1){ - numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName, trimFastaFileName); - - int numNoParents = chimera->getNumNoParents(); - if (numNoParents == numSeqs) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); } - - if (m->control_pressed) { outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName); m->mothurRemove(tempHeader); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; } - - }else{ - processIDS.resize(0); - - numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, trimFastaFileName); + vector positions; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + positions = m->divideFile(fastaFileNames[s], processors); + for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(new linePair(positions[i], positions[(i+1)])); } +#else + if (processors == 1) { + lines.push_back(new linePair(0, 1000)); + }else { + positions = m->setFilePosFasta(fastaFileNames[s], numSeqs); - rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str()); - rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str()); - if (trim) { rename((trimFastaFileName + toString(processIDS[0]) + ".temp").c_str(), trimFastaFileName.c_str()); } - - //append output files - for(int i=1;iappendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName); - m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp")); - } - - //append output files - for(int i=1;iappendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName); - m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp")); - } - - if (trim) { - for(int i=1;iappendFiles((trimFastaFileName + toString(processIDS[i]) + ".temp"), trimFastaFileName); - m->mothurRemove((trimFastaFileName + toString(processIDS[i]) + ".temp")); - } - } - - if (m->control_pressed) { outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; } + //figure out how many sequences you have to process + int numSeqsPerProcessor = numSeqs / processors; + for (int i = 0; i < processors; i++) { + int startIndex = i * numSeqsPerProcessor; + if(i == (processors - 1)){ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; } + lines.push_back(new linePair(positions[startIndex], numSeqsPerProcessor)); } - - #else + } +#endif + + if(processors == 1){ numSeqs = driver(lines[0], outputFileName, fastaFileNames[s], accnosFileName, trimFastaFileName); int numNoParents = chimera->getNumNoParents(); if (numNoParents == numSeqs) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); } - - if (m->control_pressed) { outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName); m->mothurRemove(tempHeader); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; } + if (m->control_pressed) { outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; } - #endif + }else{ + processIDS.resize(0); + + numSeqs = createProcesses(outputFileName, fastaFileNames[s], accnosFileName, trimFastaFileName); - m->appendFiles(outputFileName, tempHeader); - - m->mothurRemove(outputFileName); - rename(tempHeader.c_str(), outputFileName.c_str()); + rename((outputFileName + toString(processIDS[0]) + ".temp").c_str(), outputFileName.c_str()); + rename((accnosFileName + toString(processIDS[0]) + ".temp").c_str(), accnosFileName.c_str()); + if (trim) { rename((trimFastaFileName + toString(processIDS[0]) + ".temp").c_str(), trimFastaFileName.c_str()); } + + //append output files + for(int i=1;iappendFiles((outputFileName + toString(processIDS[i]) + ".temp"), outputFileName); + m->mothurRemove((outputFileName + toString(processIDS[i]) + ".temp")); + + m->appendFiles((accnosFileName + toString(processIDS[i]) + ".temp"), accnosFileName); + m->mothurRemove((accnosFileName + toString(processIDS[i]) + ".temp")); + + if (trim) { + m->appendFiles((trimFastaFileName + toString(processIDS[i]) + ".temp"), trimFastaFileName); + m->mothurRemove((trimFastaFileName + toString(processIDS[i]) + ".temp")); + } + } + + if (m->control_pressed) { outputTypes.clear(); if (trim) { m->mothurRemove(trimFastaFileName); } m->mothurRemove(outputFileName); m->mothurRemove(accnosFileName); for (int j = 0; j < outputNames.size(); j++) { m->mothurRemove(outputNames[j]); } for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); delete chimera; return 0; } + } + #endif - delete chimera; + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + delete chimera; + #endif for (int i = 0; i < lines.size(); i++) { delete lines[i]; } lines.clear(); @@ -764,6 +773,8 @@ int ChimeraSlayerCommand::driver(linePair* filePos, string outputFName, string f m->openInputFile(filename, inFASTA); inFASTA.seekg(filePos->start); + + if (filePos->start == 0) { chimera->printHeader(out); } bool done = false; int count = 0; @@ -993,10 +1004,11 @@ int ChimeraSlayerCommand::driverMPI(int start, int num, MPI_File& inMPI, MPI_Fil int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename, string accnos, string fasta) { try { -#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) int process = 0; int num = 0; + int numNoParents = 0; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) //loop through and create all the processes you want while (process != processors) { int pid = fork(); @@ -1027,7 +1039,6 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename wait(&temp); } - int numNoParents = 0; for (int i = 0; i < processIDS.size(); i++) { ifstream in; string tempFile = outputFileName + toString(processIDS[i]) + ".num.temp"; @@ -1035,11 +1046,42 @@ int ChimeraSlayerCommand::createProcesses(string outputFileName, string filename if (!in.eof()) { int tempNum = 0; int tempNumParents = 0; in >> tempNum >> tempNumParents; num += tempNum; numNoParents += tempNumParents; } in.close(); m->mothurRemove(tempFile); } +#else - if (num == numNoParents) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); } + ////////////////////////////////////////////////////////////////////////////////////////////////////// + //Windows version shared memory, so be careful when passing variables through the slayerData struct. + //Above fork() will clone, so memory is separate, but that's not the case with windows, + ////////////////////////////////////////////////////////////////////////////////////////////////////// + + vector pDataArray; + DWORD dwThreadIdArray[processors]; + HANDLE hThreadArray[processors]; + //Create processor worker threads. + for( int i=0; istart, lines[i]->end, ksize, match, mismatch, window, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted, divR, priority, i); + pDataArray.push_back(tempslayer); + processIDS.push_back(i); + + //MySeqSumThreadFunction is in header. It must be global or static to work with the threads. + //default security attributes, thread function name, argument to thread function, use default creation flags, returns the thread identifier + hThreadArray[i] = CreateThread(NULL, 0, MySlayerThreadFunction, pDataArray[i], 0, &dwThreadIdArray[i]); + } + + //Wait until all threads have terminated. + WaitForMultipleObjects(processors, hThreadArray, TRUE, INFINITE); + + //Close all thread handles and free memory allocations. + for(int i=0; i < pDataArray.size(); i++){ + num += pDataArray[i]->count; + numNoParents += pDataArray[i]->numNoParents; + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } +#endif + if (num == numNoParents) { m->mothurOut("[WARNING]: megablast returned 0 potential parents for all your sequences. This could be due to formatdb.exe not being setup properly, please check formatdb.log for errors."); m->mothurOutEndLine(); } return num; -#endif } catch(exception& e) { m->errorOut(e, "ChimeraSlayerCommand", "createProcesses"); diff --git a/chimeraslayercommand.h b/chimeraslayercommand.h index 1b17391..a54b24f 100644 --- a/chimeraslayercommand.h +++ b/chimeraslayercommand.h @@ -13,6 +13,7 @@ #include "mothur.h" #include "command.hpp" #include "chimera.h" +#include "chimeraslayer.h" /***********************************************************/ @@ -42,6 +43,7 @@ private: vector processIDS; //processid vector lines; + map priority; int driver(linePair*, string, string, string, string); int createProcesses(string, string, string, string); @@ -66,6 +68,236 @@ private: /***********************************************************/ +//custom data structure for threads to use. +// This is passed by void pointer so it can be any data type +// that can be passed using a single void pointer (LPVOID). +typedef struct slayerData { + string outputFName; + string fasta; + string accnos; + string filename; + string templatefile; + string search; + string blastlocation; + bool trimera; + bool trim, realign; + unsigned long int start; + unsigned long int end; + int ksize, match, mismatch, window, minSimilarity, minCoverage, minBS, minSNP, parents, iters, increment, numwanted; + MothurOut* m; + float divR; + map priority; + int count; + int numNoParents; + int threadId; + + slayerData(){} + slayerData(string o, string fa, string ac, string f, string te, string se, string bl, bool tri, bool trm, bool re, MothurOut* mout, unsigned long int st, unsigned long int en, int ks, int ma, int mis, int win, int minS, int minC, int miBS, int minSN, int par, int it, int inc, int numw, float div, map prior, int tid) { + outputFName = o; + fasta = fa; + accnos = ac; + filename = f; + templatefile = te; + search = se; + blastlocation = bl; + trimera = tri; + trim = trm; + realign = re; + m = mout; + start = st; + end = en; + ksize = ks; + match = ma; + mismatch = mis; + window = win; + minSimilarity = minS; + minCoverage = minC; + minBS = miBS; + minSNP = minSN; + parents = par; + iters = it; + increment = inc; + numwanted = numw; + divR = div; + priority = prior; + threadId = tid; + count = 0; + numNoParents = 0; + } +}; + +/**************************************************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#else +static DWORD WINAPI MySlayerThreadFunction(LPVOID lpParam){ + slayerData* pDataArray; + pDataArray = (slayerData*)lpParam; + + try { + ofstream out; + pDataArray->m->openOutputFile(pDataArray->outputFName, out); + + ofstream out2; + pDataArray->m->openOutputFile(pDataArray->accnos, out2); + + ofstream out3; + if (pDataArray->trim) { pDataArray->m->openOutputFile(pDataArray->fasta, out3); } + + ifstream inFASTA; + pDataArray->m->openInputFile(pDataArray->filename, inFASTA); + + + + Chimera* chimera; + if (pDataArray->templatefile != "self") { //you want to run slayer with a reference template + chimera = new ChimeraSlayer(pDataArray->filename, pDataArray->templatefile, pDataArray->trim, pDataArray->search, pDataArray->ksize, pDataArray->match, pDataArray->mismatch, pDataArray->window, pDataArray->divR, pDataArray->minSimilarity, pDataArray->minCoverage, pDataArray->minBS, pDataArray->minSNP, pDataArray->parents, pDataArray->iters, pDataArray->increment, pDataArray->numwanted, pDataArray->realign, pDataArray->blastlocation, pDataArray->threadId); + }else { + chimera = new ChimeraSlayer(pDataArray->filename, pDataArray->templatefile, pDataArray->trim, pDataArray->priority, pDataArray->search, pDataArray->ksize, pDataArray->match, pDataArray->mismatch, pDataArray->window, pDataArray->divR, pDataArray->minSimilarity, pDataArray->minCoverage, pDataArray->minBS, pDataArray->minSNP, pDataArray->parents, pDataArray->iters, pDataArray->increment, pDataArray->numwanted, pDataArray->realign, pDataArray->blastlocation, pDataArray->threadId); + } + + //print header if you are process 0 + if ((pDataArray->start == 0) || (pDataArray->start == 1)) { + chimera->printHeader(out); + inFASTA.seekg(0); + }else { //this accounts for the difference in line endings. + inFASTA.seekg(pDataArray->start-1); pDataArray->m->gobble(inFASTA); + } + + pDataArray->count = pDataArray->end; + + if (pDataArray->m->control_pressed) { out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera; return 0; } + + if (chimera->getUnaligned()) { + pDataArray->m->mothurOut("Your template sequences are different lengths, please correct."); pDataArray->m->mothurOutEndLine(); + out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); + delete chimera; + return 0; + } + int templateSeqsLength = chimera->getLength(); + + if (pDataArray->start == 0) { chimera->printHeader(out); } + + int count = 0; + for(int i = 0; i < pDataArray->end; i++){ + + if (pDataArray->m->control_pressed) { out.close(); out2.close(); if (pDataArray->trim) { out3.close(); } inFASTA.close(); delete chimera; return 1; } + + Sequence* candidateSeq = new Sequence(inFASTA); pDataArray->m->gobble(inFASTA); + string candidateAligned = candidateSeq->getAligned(); + + if (candidateSeq->getName() != "") { //incase there is a commented sequence at the end of a file + if (candidateSeq->getAligned().length() != templateSeqsLength) { + pDataArray->m->mothurOut(candidateSeq->getName() + " is not the same length as the template sequences. Skipping."); pDataArray->m->mothurOutEndLine(); + }else{ + //find chimeras + chimera->getChimeras(candidateSeq); + + if (pDataArray->m->control_pressed) { delete candidateSeq; delete chimera; return 1; } + + //if you are not chimeric, then check each half + data_results wholeResults = chimera->getResults(); + + //determine if we need to split + bool isChimeric = false; + + if (wholeResults.flag == "yes") { + string chimeraFlag = "no"; + if( (wholeResults.results[0].bsa >= pDataArray->minBS && wholeResults.results[0].divr_qla_qrb >= pDataArray->divR) + || + (wholeResults.results[0].bsb >= pDataArray->minBS && wholeResults.results[0].divr_qlb_qra >= pDataArray->divR) ) { chimeraFlag = "yes"; } + + + if (chimeraFlag == "yes") { + if ((wholeResults.results[0].bsa >= pDataArray->minBS) || (wholeResults.results[0].bsb >= pDataArray->minBS)) { isChimeric = true; } + } + } + + if ((!isChimeric) && pDataArray->trimera) { + + //split sequence in half by bases + string leftQuery, rightQuery; + Sequence tempSeq(candidateSeq->getName(), candidateAligned); + //divideInHalf(tempSeq, leftQuery, rightQuery); + string queryUnAligned = tempSeq.getUnaligned(); + int numBases = int(queryUnAligned.length() * 0.5); + + string queryAligned = tempSeq.getAligned(); + leftQuery = tempSeq.getAligned(); + rightQuery = tempSeq.getAligned(); + + int baseCount = 0; + int leftSpot = 0; + for (int i = 0; i < queryAligned.length(); i++) { + //if you are a base + if (isalpha(queryAligned[i])) { + baseCount++; + } + + //if you have half + if (baseCount >= numBases) { leftSpot = i; break; } //first half + } + + //blank out right side + for (int i = leftSpot; i < leftQuery.length(); i++) { leftQuery[i] = '.'; } + + //blank out left side + for (int i = 0; i < leftSpot; i++) { rightQuery[i] = '.'; } + + //run chimeraSlayer on each piece + Sequence* left = new Sequence(candidateSeq->getName(), leftQuery); + Sequence* right = new Sequence(candidateSeq->getName(), rightQuery); + + //find chimeras + chimera->getChimeras(left); + data_results leftResults = chimera->getResults(); + + chimera->getChimeras(right); + data_results rightResults = chimera->getResults(); + + //if either piece is chimeric then report + Sequence trimmed = chimera->print(out, out2, leftResults, rightResults); + if (pDataArray->trim) { trimmed.printSequence(out3); } + + delete left; delete right; + + }else { //already chimeric + //print results + Sequence trimmed = chimera->print(out, out2); + if (pDataArray->trim) { trimmed.printSequence(out3); } + } + + + } + count++; + } + + delete candidateSeq; + //report progress + if((count) % 100 == 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(count)); pDataArray->m->mothurOutEndLine(); } + } + //report progress + if((count) % 100 != 0){ pDataArray->m->mothurOut("Processing sequence: " + toString(count)); pDataArray->m->mothurOutEndLine(); } + + pDataArray->numNoParents = chimera->getNumNoParents(); + out.close(); + out2.close(); + if (pDataArray->trim) { out3.close(); } + inFASTA.close(); + delete chimera; + + return 0; + + } + catch(exception& e) { + pDataArray->m->errorOut(e, "ChimeraSlayerCommand", "MySlayerThreadFunction"); + exit(1); + } +} +#endif + +/**************************************************************************************************/ + + #endif diff --git a/classify.cpp b/classify.cpp index 4c6c6d8..e31e8cc 100644 --- a/classify.cpp +++ b/classify.cpp @@ -47,7 +47,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me } } else if(method == "suffix") { database = new SuffixDB(numSeqs); } - else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, ""); } + else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "", threadID); } else if(method == "distance") { database = new DistanceDB(); } else { m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); @@ -176,7 +176,7 @@ void Classify::generateDatabaseAndNames(string tfile, string tempFile, string me } } else if(method == "suffix") { database = new SuffixDB(numSeqs); } - else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, ""); } + else if(method == "blast") { database = new BlastDB(tempFile.substr(0,tempFile.find_last_of(".")+1), gapOpen, gapExtend, match, misMatch, "", threadID); } else if(method == "distance") { database = new DistanceDB(); } else { m->mothurOut(method + " is not a valid search option. I will run the command using kmer, ksize=8."); diff --git a/classify.h b/classify.h index e92569d..2e209f5 100644 --- a/classify.h +++ b/classify.h @@ -44,6 +44,7 @@ protected: string taxFile, templateFile, simpleTax; vector names; + int threadID; int readTaxonomy(string); vector parseTax(string); diff --git a/classifyseqscommand.cpp b/classifyseqscommand.cpp index d9bd698..4291132 100644 --- a/classifyseqscommand.cpp +++ b/classifyseqscommand.cpp @@ -475,12 +475,12 @@ int ClassifySeqsCommand::execute(){ try { if (abort == true) { if (calledHelp) { return 0; } return 2; } - if(method == "bayesian"){ classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters); } - else if(method == "knn"){ classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted); } + if(method == "bayesian"){ classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand()); } + else if(method == "knn"){ classify = new Knn(taxonomyFileName, templateFileName, search, kmerSize, gapOpen, gapExtend, match, misMatch, numWanted, rand()); } else { m->mothurOut(search + " is not a valid method option. I will run the command using bayesian."); m->mothurOutEndLine(); - classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters); + classify = new Bayesian(taxonomyFileName, templateFileName, search, kmerSize, cutoff, iters, rand()); } if (m->control_pressed) { delete classify; return 0; } diff --git a/clustersplitcommand.cpp b/clustersplitcommand.cpp index 1f99efc..dc913d8 100644 --- a/clustersplitcommand.cpp +++ b/clustersplitcommand.cpp @@ -555,17 +555,23 @@ int ClusterSplitCommand::execute(){ MPI_Barrier(MPI_COMM_WORLD); #else - + + //sanity check + if (processors > distName.size()) { processors = distName.size(); } + #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) if(processors == 1){ listFileNames = cluster(distName, labels); //clusters individual files and returns names of list files }else{ + + cout << processors << '\t' << distName.size() << endl; vector < vector < map > > dividedNames; //distNames[1] = vector of filenames for process 1... dividedNames.resize(processors); //for each file group figure out which process will complete it //want to divide the load intelligently so the big files are spread between processes for (int i = 0; i < distName.size(); i++) { + cout << i << endl; int processToAssign = (i+1) % processors; if (processToAssign == 0) { processToAssign = processors; } @@ -574,6 +580,7 @@ int ClusterSplitCommand::execute(){ //not lets reverse the order of ever other process, so we balance big files running with little ones for (int i = 0; i < processors; i++) { + cout << i << endl; int remainder = ((i+1) % processors); if (remainder) { reverse(dividedNames[i].begin(), dividedNames[i].end()); } } diff --git a/collect.cpp b/collect.cpp index 78fd05c..9a89020 100644 --- a/collect.cpp +++ b/collect.cpp @@ -74,10 +74,11 @@ try { vector subset; //create and initialize vector of sharedvectors, one for each group - for (int i = 0; i < m->Groups.size(); i++) { + vector mGroups = m->getGroups(); + for (int i = 0; i < mGroups.size(); i++) { SharedRAbundVector* temp = new SharedRAbundVector(sharedorder->getNumBins()); temp->setLabel(sharedorder->getLabel()); - temp->setGroup(m->Groups[i]); + temp->setGroup(mGroups[i]); lookup.push_back(temp); } @@ -142,14 +143,14 @@ try { if ((w != k) && (w != l)) { subset.push_back(lookup[w]); } } - ccd->updateSharedData(subset, i+1, m->Groups.size(), pair); + ccd->updateSharedData(subset, i+1, m->getNumGroups(), pair); } n++; } //if this is a calculator that can do multiples then do them pair = false; - ccd->updateSharedData(lookup, i+1, m->Groups.size(), pair); + ccd->updateSharedData(lookup, i+1, m->getNumGroups(), pair); } totalNumSeq = i+1; @@ -171,13 +172,13 @@ try { if ((w != k) && (w != l)) { subset.push_back(lookup[w]); } } - ccd->updateSharedData(subset, totalNumSeq, m->Groups.size(), pair); + ccd->updateSharedData(subset, totalNumSeq, m->getNumGroups(), pair); } n++; } //if this is a calculator that can do multiples then do them pair = false; - ccd->updateSharedData(lookup, totalNumSeq, m->Groups.size(), pair); + ccd->updateSharedData(lookup, totalNumSeq, m->getNumGroups(), pair); } //resets output files @@ -208,9 +209,10 @@ void Collect::getGroupComb() { numGroupComb = 0; int n = 1; - for (int i = 0; i < (m->Groups.size() - 1); i++) { - for (int l = n; l < m->Groups.size(); l++) { - group = m->Groups[i] + m->Groups[l]; + vector mGroups = m->getGroups(); + for (int i = 0; i < (m->getNumGroups() - 1); i++) { + for (int l = n; l < m->getNumGroups(); l++) { + group = mGroups[i] + mGroups[l]; groupComb.push_back(group); numGroupComb++; } diff --git a/collectcommand.cpp b/collectcommand.cpp index 89ab0a4..a65b417 100644 --- a/collectcommand.cpp +++ b/collectcommand.cpp @@ -307,7 +307,7 @@ int CollectCommand::execute(){ for (int p = 0; p < inputFileNames.size(); p++) { - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } m->Groups.clear(); return 0; } + if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } m->clearGroups(); return 0; } if (outputDir == "") { outputDir += m->hasPath(inputFileNames[p]); } string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])); @@ -416,7 +416,7 @@ int CollectCommand::execute(){ for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); delete input; delete order; - m->Groups.clear(); + m->clearGroups(); return 0; } @@ -428,7 +428,7 @@ int CollectCommand::execute(){ for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); delete input; delete order; - m->Groups.clear(); + m->clearGroups(); return 0; } @@ -476,7 +476,7 @@ int CollectCommand::execute(){ for(int i=0;imothurRemove(outputNames[i]); } outputTypes.clear(); delete input; - m->Groups.clear(); + m->clearGroups(); return 0; } @@ -509,7 +509,7 @@ int CollectCommand::execute(){ for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); delete input; delete order; - m->Groups.clear(); + m->clearGroups(); return 0; } delete order; diff --git a/collectdisplay.h b/collectdisplay.h index f7c8a48..781cea1 100644 --- a/collectdisplay.h +++ b/collectdisplay.h @@ -31,9 +31,10 @@ public: //because we randomizes the order we need to put the results in the correct column in the output file int group1Index, group2Index, pos; - for (int i = 0; i < m->Groups.size(); i++) { - if (shared[0]->getGroup() == m->Groups[i]) { group1Index = i; } - if (shared[1]->getGroup() == m->Groups[i]) { group2Index = i; } + vector mGroups = m->getGroups(); + for (int i = 0; i < mGroups.size(); i++) { + if (shared[0]->getGroup() == mGroups[i]) { group1Index = i; } + if (shared[1]->getGroup() == mGroups[i]) { group2Index = i; } } numGroupComb = 0; diff --git a/collectsharedcommand.cpp b/collectsharedcommand.cpp index 3b43a40..a5721b0 100644 --- a/collectsharedcommand.cpp +++ b/collectsharedcommand.cpp @@ -270,7 +270,7 @@ CollectSharedCommand::CollectSharedCommand(string option) { else { m->splitAtDash(groups, Groups); } - m->Groups = Groups; + m->setGroups(Groups); string temp; temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; } @@ -440,7 +440,11 @@ int CollectSharedCommand::execute(){ //set users groups SharedUtil* util = new SharedUtil(); - util->setGroups(m->Groups, m->namesOfGroups, "collect"); + Groups = m->getGroups(); + vector allGroups = m->getAllGroups(); + util->setGroups(Groups, allGroups, "collect"); + m->setGroups(Groups); + m->setAllGroups(allGroups); delete util; while((order != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { @@ -448,7 +452,7 @@ int CollectSharedCommand::execute(){ for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); for(int i=0;iGroups.clear(); + m->clearGroups(); return 0; } @@ -495,7 +499,7 @@ int CollectSharedCommand::execute(){ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); for(int i=0;iGroups.clear(); + m->clearGroups(); delete input; return 0; } @@ -528,7 +532,7 @@ int CollectSharedCommand::execute(){ for(int i=0;iGroups.clear(); + m->clearGroups(); return 0; } @@ -538,7 +542,7 @@ int CollectSharedCommand::execute(){ for(int i=0;iGroups.clear(); + m->clearGroups(); delete input; m->mothurOutEndLine(); diff --git a/corraxescommand.cpp b/corraxescommand.cpp index ea98dfb..f10fe4d 100644 --- a/corraxescommand.cpp +++ b/corraxescommand.cpp @@ -158,7 +158,7 @@ CorrAxesCommand::CorrAxesCommand(string option) { pickedGroups = true; m->splitAtDash(groups, Groups); } - m->Groups = Groups; + m->setGroups(Groups); outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(inputFileName); } @@ -907,12 +907,13 @@ int CorrAxesCommand::getMetadata(){ //remove any groups the user does not want, and set globaldata->groups with only valid groups SharedUtil* util; util = new SharedUtil(); - - util->setGroups(m->Groups, groupNames); + Groups = m->getGroups(); + util->setGroups(Groups, groupNames); + m->setGroups(Groups); for (int i = 0; i < lookupFloat.size(); i++) { //if this sharedrabund is not from a group the user wants then delete it. - if (util->isValidGroup(lookupFloat[i]->getGroup(), m->Groups) == false) { + if (util->isValidGroup(lookupFloat[i]->getGroup(), m->getGroups()) == false) { delete lookupFloat[i]; lookupFloat[i] = NULL; lookupFloat.erase(lookupFloat.begin()+i); i--; diff --git a/countgroupscommand.cpp b/countgroupscommand.cpp index af9993c..24b4214 100644 --- a/countgroupscommand.cpp +++ b/countgroupscommand.cpp @@ -127,7 +127,7 @@ CountGroupsCommand::CountGroupsCommand(string option) { if (groups == "not found") { groups = ""; } else { m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } sharedfile = validParameter.validFile(parameters, "shared", true); @@ -153,7 +153,7 @@ CountGroupsCommand::CountGroupsCommand(string option) { } } - if ((accnosfile == "") && (Groups.size() == 0)) { Groups.push_back("all"); m->Groups = Groups; } + if ((accnosfile == "") && (Groups.size() == 0)) { Groups.push_back("all"); m->setGroups(Groups); } } } @@ -179,7 +179,8 @@ int CountGroupsCommand::execute(){ //make sure groups are valid //takes care of user setting groupNames that are invalid or setting groups=all SharedUtil util; - util.setGroups(Groups, groupMap.namesOfGroups); + vector nameGroups = groupMap.getNamesOfGroups(); + util.setGroups(Groups, nameGroups); for (int i = 0; i < Groups.size(); i++) { m->mothurOut(Groups[i] + " contains " + toString(groupMap.getNumSeqs(Groups[i])) + "."); m->mothurOutEndLine(); @@ -224,7 +225,7 @@ void CountGroupsCommand::readAccnos(){ } in.close(); - m->Groups = Groups; + m->setGroups(Groups); } catch(exception& e) { diff --git a/countseqscommand.cpp b/countseqscommand.cpp index 0d8ddf5..9cdd033 100644 --- a/countseqscommand.cpp +++ b/countseqscommand.cpp @@ -157,7 +157,8 @@ int CountSeqsCommand::execute(){ //make sure groups are valid. takes care of user setting groupNames that are invalid or setting groups=all SharedUtil* util = new SharedUtil(); - util->setGroups(Groups, groupMap->namesOfGroups); + vector nameGroups = groupMap->getNamesOfGroups(); + util->setGroups(Groups, nameGroups); delete util; //sort groupNames so that the group title match the counts below, this is needed because the map object automatically sorts diff --git a/deuniquetreecommand.cpp b/deuniquetreecommand.cpp index dca81e5..64ea9b7 100644 --- a/deuniquetreecommand.cpp +++ b/deuniquetreecommand.cpp @@ -104,8 +104,8 @@ DeuniqueTreeCommand::DeuniqueTreeCommand(string option) { } m->runParse = true; - m->Groups.clear(); - m->namesOfGroups.clear(); + m->clearGroups(); + m->clearAllGroups(); m->Treenames.clear(); m->names.clear(); @@ -182,7 +182,7 @@ int DeuniqueTreeCommand::execute() { if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->Groups.clear(); + m->clearGroups(); return 0; } diff --git a/engine.cpp b/engine.cpp index af88e4f..a80eba4 100644 --- a/engine.cpp +++ b/engine.cpp @@ -180,8 +180,8 @@ bool InteractEngine::getInput(){ #endif //executes valid command mout->runParse = true; - mout->Groups.clear(); - mout->namesOfGroups.clear(); + mout->clearGroups(); + mout->clearAllGroups(); mout->Treenames.clear(); mout->names.clear(); mout->saveNextLabel = ""; @@ -364,8 +364,8 @@ bool BatchEngine::getInput(){ #endif //executes valid command mout->runParse = true; - mout->Groups.clear(); - mout->namesOfGroups.clear(); + mout->clearGroups(); + mout->clearAllGroups(); mout->Treenames.clear(); mout->names.clear(); mout->saveNextLabel = ""; @@ -531,8 +531,8 @@ bool ScriptEngine::getInput(){ #endif //executes valid command mout->runParse = true; - mout->Groups.clear(); - mout->namesOfGroups.clear(); + mout->clearGroups(); + mout->clearAllGroups(); mout->Treenames.clear(); mout->names.clear(); mout->saveNextLabel = ""; diff --git a/getgroupscommand.cpp b/getgroupscommand.cpp index d3470f1..5021f40 100644 --- a/getgroupscommand.cpp +++ b/getgroupscommand.cpp @@ -203,7 +203,7 @@ GetGroupsCommand::GetGroupsCommand(string option) { if (groups == "not found") { groups = ""; } else { m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } sharedfile = validParameter.validFile(parameters, "shared", true); @@ -273,7 +273,9 @@ int GetGroupsCommand::execute(){ //make sure groups are valid //takes care of user setting groupNames that are invalid or setting groups=all SharedUtil* util = new SharedUtil(); - util->setGroups(Groups, groupMap->namesOfGroups); + vector gNamesOfGroups = groupMap->getNamesOfGroups(); + util->setGroups(Groups, gNamesOfGroups); + groupMap->setNamesOfGroups(gNamesOfGroups); delete util; //fill names with names of sequences that are from the groups we want to remove @@ -711,7 +713,7 @@ void GetGroupsCommand::readAccnos(){ } in.close(); - m->Groups = Groups; + m->setGroups(Groups); } catch(exception& e) { diff --git a/getoturepcommand.cpp b/getoturepcommand.cpp index 9ed887a..378e918 100644 --- a/getoturepcommand.cpp +++ b/getoturepcommand.cpp @@ -290,7 +290,7 @@ GetOTURepCommand::GetOTURepCommand(string option) { m->splitAtDash(groups, Groups); } } - m->Groups = Groups; + m->setGroups(Groups); string temp = validParameter.validFile(parameters, "large", false); if (temp == "not found") { temp = "F"; } large = m->isTrue(temp); @@ -429,7 +429,9 @@ int GetOTURepCommand::execute(){ if (Groups.size() != 0) { SharedUtil* util = new SharedUtil(); - util->setGroups(Groups, groupMap->namesOfGroups, "getoturep"); + vector gNamesOfGroups = groupMap->getNamesOfGroups(); + util->setGroups(Groups, gNamesOfGroups, "getoturep"); + groupMap->setNamesOfGroups(gNamesOfGroups); delete util; } } diff --git a/getotuscommand.cpp b/getotuscommand.cpp index b94b42a..4e8d319 100644 --- a/getotuscommand.cpp +++ b/getotuscommand.cpp @@ -187,7 +187,9 @@ int GetOtusCommand::execute(){ //make sure groups are valid //takes care of user setting groupNames that are invalid or setting groups=all SharedUtil* util = new SharedUtil(); - util->setGroups(Groups, groupMap->namesOfGroups); + vector gNamesOfGroups = groupMap->getNamesOfGroups(); + util->setGroups(Groups, gNamesOfGroups); + groupMap->setNamesOfGroups(gNamesOfGroups); delete util; if (m->control_pressed) { delete groupMap; return 0; } diff --git a/getrelabundcommand.cpp b/getrelabundcommand.cpp index 26d7958..8c8d8e1 100644 --- a/getrelabundcommand.cpp +++ b/getrelabundcommand.cpp @@ -131,7 +131,7 @@ GetRelAbundCommand::GetRelAbundCommand(string option) { else { pickedGroups = true; m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } scale = validParameter.validFile(parameters, "scale", false); if (scale == "not found") { scale = "totalgroup"; } @@ -170,7 +170,7 @@ int GetRelAbundCommand::execute(){ //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->Groups.clear(); delete input; out.close(); m->mothurRemove(outputFileName); return 0; } + if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); delete input; out.close(); m->mothurRemove(outputFileName); return 0; } if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ @@ -202,13 +202,13 @@ int GetRelAbundCommand::execute(){ //prevent memory leak for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } - if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); delete input; out.close(); m->mothurRemove(outputFileName); return 0; } + if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; out.close(); m->mothurRemove(outputFileName); return 0; } //get next line to process lookup = input->getSharedRAbundVectors(); } - if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); delete input; out.close(); m->mothurRemove(outputFileName); return 0; } + if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; out.close(); m->mothurRemove(outputFileName); return 0; } //output error messages about any remaining user labels set::iterator it; @@ -236,7 +236,7 @@ int GetRelAbundCommand::execute(){ } //reset groups parameter - m->Groups.clear(); + m->clearGroups(); delete input; out.close(); diff --git a/getsharedotucommand.cpp b/getsharedotucommand.cpp index 0246136..2312649 100644 --- a/getsharedotucommand.cpp +++ b/getsharedotucommand.cpp @@ -182,7 +182,7 @@ GetSharedOTUCommand::GetSharedOTUCommand(string option) { else { userGroups = "unique." + groups; m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } @@ -191,7 +191,7 @@ GetSharedOTUCommand::GetSharedOTUCommand(string option) { else { userGroups = groups; m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); unique = false; } @@ -221,7 +221,7 @@ int GetSharedOTUCommand::execute(){ if (m->control_pressed) { delete groupMap; return 0; } if (Groups.size() == 0) { - Groups = groupMap->namesOfGroups; + Groups = groupMap->getNamesOfGroups(); //make string for outputfile name userGroups = "unique."; @@ -230,7 +230,9 @@ int GetSharedOTUCommand::execute(){ }else{ //sanity check for group names SharedUtil util; - util.setGroups(Groups, groupMap->namesOfGroups); + vector namesOfGroups = groupMap->getNamesOfGroups(); + util.setGroups(Groups, namesOfGroups); + groupMap->setNamesOfGroups(namesOfGroups); } //put groups in map to find easier @@ -325,7 +327,7 @@ int GetSharedOTUCommand::execute(){ //reset groups parameter - m->Groups.clear(); + m->clearGroups(); if (lastlist != NULL) { delete lastlist; } diff --git a/groupmap.cpp b/groupmap.cpp index 58e04c5..481fd1d 100644 --- a/groupmap.cpp +++ b/groupmap.cpp @@ -44,7 +44,7 @@ int GroupMap::readMap() { m->gobble(fileHandle); } fileHandle.close(); - m->namesOfGroups = namesOfGroups; + m->setAllGroups(namesOfGroups); return error; } /************************************************************/ @@ -70,7 +70,7 @@ int GroupMap::readDesignMap() { m->gobble(fileHandle); } fileHandle.close(); - m->namesOfGroups = namesOfGroups; + m->setAllGroups(namesOfGroups); return error; } @@ -91,7 +91,15 @@ string GroupMap::getGroup(string sequenceName) { /************************************************************/ void GroupMap::setGroup(string sequenceName, string groupN) { - groupmap[sequenceName] = groupN; + setNamesOfGroups(groupN); + + it = groupmap.find(sequenceName); + + if (it != groupmap.end()) { m->mothurOut("Your groupfile contains more than 1 sequence named " + sequenceName + ", sequence names must be unique. Please correct."); m->mothurOutEndLine(); } + else { + groupmap[sequenceName] = groupN; //store data in map + seqsPerGroup[groupN]++; //increment number of seqs in that group + } } /************************************************************/ diff --git a/groupmap.h b/groupmap.h index 54085a1..99891a4 100644 --- a/groupmap.h +++ b/groupmap.h @@ -26,7 +26,13 @@ public: bool isValidGroup(string); //return true if string is a valid group string getGroup(string); void setGroup(string, string); - vector namesOfGroups; + vector getNamesOfGroups() { + sort(namesOfGroups.begin(), namesOfGroups.end()); + groupIndex.clear(); + for (int i = 0; i < namesOfGroups.size(); i++) { groupIndex[namesOfGroups[i]] = i; } + return namesOfGroups; + } + void setNamesOfGroups(vector sn) { namesOfGroups = sn; } map groupIndex; //groupname, vectorIndex in namesOfGroups. - used by collectdisplays and libshuff commands. int getNumSeqs() { return groupmap.size(); } vector getNamesSeqs(); @@ -34,6 +40,7 @@ public: int getNumSeqs(string); //return the number of seqs in a given group private: + vector namesOfGroups; MothurOut* m; ifstream fileHandle; string groupFileName; diff --git a/heatmapcommand.cpp b/heatmapcommand.cpp index 9051a5d..57c4c86 100644 --- a/heatmapcommand.cpp +++ b/heatmapcommand.cpp @@ -222,7 +222,7 @@ HeatMapCommand::HeatMapCommand(string option) { if (groups == "not found") { groups = ""; } else { m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } string temp = validParameter.validFile(parameters, "numotu", false); if (temp == "not found") { temp = "0"; } @@ -287,7 +287,7 @@ int HeatMapCommand::execute(){ if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } for (int i = 0; i < outputNames.size(); i++) { if (outputNames[i] != "control") { m->mothurRemove(outputNames[i]); } } outputTypes.clear(); - m->Groups.clear(); + m->clearGroups(); delete input; delete heatmap; return 0; } @@ -330,7 +330,7 @@ int HeatMapCommand::execute(){ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { if (outputNames[i] != "control") { m->mothurRemove(outputNames[i]); } } outputTypes.clear(); - m->Groups.clear(); + m->clearGroups(); delete input; delete heatmap; return 0; } @@ -359,7 +359,7 @@ int HeatMapCommand::execute(){ } //reset groups parameter - m->Groups.clear(); + m->clearGroups(); }else if ((format == "list") || (format == "rabund") || (format == "sabund")) { @@ -440,7 +440,7 @@ int HeatMapCommand::execute(){ if (m->control_pressed) { for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } for (int i = 0; i < outputNames.size(); i++) { if (outputNames[i] != "control") { m->mothurRemove(outputNames[i]); } } outputTypes.clear(); - m->Groups.clear(); + m->clearGroups(); delete input; delete heatmap; return 0; } @@ -482,7 +482,7 @@ int HeatMapCommand::execute(){ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { if (outputNames[i] != "control") { m->mothurRemove(outputNames[i]); } } outputTypes.clear(); - m->Groups.clear(); + m->clearGroups(); delete input; delete heatmap; return 0; } @@ -511,7 +511,7 @@ int HeatMapCommand::execute(){ } //reset groups parameter - m->Groups.clear(); + m->clearGroups(); } diff --git a/heatmapsim.cpp b/heatmapsim.cpp index 9c5e780..18043c9 100644 --- a/heatmapsim.cpp +++ b/heatmapsim.cpp @@ -69,7 +69,7 @@ vector HeatMapSim::getPic(vector lookup, vectorgetValues(subset); - sims.push_back(data[0]); + sims.push_back(1.0 - data[0]); //save biggest similairity to set relative sim // if (data[0] > biggest) { biggest = data[0]; } diff --git a/heatmapsimcommand.cpp b/heatmapsimcommand.cpp index e791f0c..1b6fc5b 100644 --- a/heatmapsimcommand.cpp +++ b/heatmapsimcommand.cpp @@ -234,7 +234,7 @@ HeatMapSimCommand::HeatMapSimCommand(string option) { if (groups == "not found") { groups = ""; } else { m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } @@ -325,12 +325,12 @@ int HeatMapSimCommand::runCommandShared() { set processedLabels; set userLabels = labels; - if (m->control_pressed) { delete input; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->Groups.clear(); return 0; } + if (m->control_pressed) { delete input; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0; } //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { delete input; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->Groups.clear(); return 0; } + if (m->control_pressed) { delete input; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0; } if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ @@ -370,7 +370,7 @@ int HeatMapSimCommand::runCommandShared() { } - if (m->control_pressed) { delete input; m->Groups.clear(); return 0; } + if (m->control_pressed) { delete input; m->clearGroups(); return 0; } //output error messages about any remaining user labels set::iterator it; @@ -385,7 +385,7 @@ int HeatMapSimCommand::runCommandShared() { } } - if (m->control_pressed) { delete input; m->Groups.clear(); return 0; } + if (m->control_pressed) { delete input; m->clearGroups(); return 0; } //run last label if you need to if (needToRun == true) { @@ -399,10 +399,10 @@ int HeatMapSimCommand::runCommandShared() { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } - if (m->control_pressed) { delete input; m->Groups.clear(); return 0; } + if (m->control_pressed) { delete input; m->clearGroups(); return 0; } //reset groups parameter - m->Groups.clear(); + m->clearGroups(); delete input; diff --git a/indicatorcommand.cpp b/indicatorcommand.cpp index 797f62c..40d3bcd 100644 --- a/indicatorcommand.cpp +++ b/indicatorcommand.cpp @@ -96,8 +96,8 @@ IndicatorCommand::IndicatorCommand(string option) { } m->runParse = true; - m->Groups.clear(); - m->namesOfGroups.clear(); + m->clearGroups(); + m->clearAllGroups(); m->Treenames.clear(); m->names.clear(); @@ -169,7 +169,7 @@ IndicatorCommand::IndicatorCommand(string option) { groups = validParameter.validFile(parameters, "groups", false); if (groups == "not found") { groups = ""; Groups.push_back("all"); } else { m->splitAtDash(groups, Groups); } - m->Groups = Groups; + m->setGroups(Groups); label = validParameter.validFile(parameters, "label", false); if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; } @@ -237,11 +237,14 @@ int IndicatorCommand::execute(){ //fill Groups - checks for "all" and for any typo groups SharedUtil* util = new SharedUtil(); - util->setGroups(Groups, designMap->namesOfGroups); + vector nameGroups = designMap->getNamesOfGroups(); + util->setGroups(Groups, nameGroups); + designMap->setNamesOfGroups(nameGroups); delete util; //loop through the Groups and fill Globaldata's Groups with the design file info - m->Groups = designMap->getNamesSeqs(Groups); + vector namesSeqs = designMap->getNamesSeqs(Groups); + m->setGroups(namesSeqs); } /***************************************************/ @@ -258,7 +261,7 @@ int IndicatorCommand::execute(){ } //reset groups if needed - if (designfile != "") { m->Groups = Groups; } + if (designfile != "") { m->setGroups(Groups); } /***************************************************/ // reading tree info // @@ -273,7 +276,7 @@ int IndicatorCommand::execute(){ for (int i = 0; i < m->Treenames.size(); i++) { //sanity check - is this a group that is not in the sharedfile? if (designfile == "") { - if (!(m->inUsersGroups(m->Treenames[i], m->namesOfGroups))) { + if (!(m->inUsersGroups(m->Treenames[i], m->getAllGroups()))) { m->mothurOut("[ERROR]: " + m->Treenames[i] + " is not a group in your shared or relabund file."); m->mothurOutEndLine(); mismatch = true; } @@ -283,7 +286,7 @@ int IndicatorCommand::execute(){ vector myNames = designMap->getNamesSeqs(myGroups); for(int k = 0; k < myNames.size(); k++) { - if (!(m->inUsersGroups(myNames[k], m->namesOfGroups))) { + if (!(m->inUsersGroups(myNames[k], m->getAllGroups()))) { m->mothurOut("[ERROR]: " + myNames[k] + " is not a group in your shared or relabund file."); m->mothurOutEndLine(); mismatch = true; } @@ -323,9 +326,9 @@ int IndicatorCommand::execute(){ /***************************************************/ // create ouptut tree - respecting pickedGroups // /***************************************************/ - Tree* outputTree = new Tree(m->Groups.size(), treeMap); + Tree* outputTree = new Tree(m->getNumGroups(), treeMap); - outputTree->getSubTree(T[0], m->Groups); + outputTree->getSubTree(T[0], m->getGroups()); outputTree->assembleTree(); //no longer need original tree, we have output tree to use and label @@ -413,11 +416,11 @@ int IndicatorCommand::GetIndicatorSpecies(){ vector subset; //for each grouping - for (int i = 0; i < designMap->namesOfGroups.size(); i++) { + for (int i = 0; i < (designMap->getNamesOfGroups()).size(); i++) { for (int k = 0; k < lookup.size(); k++) { //are you from this grouping? - if (designMap->getGroup(lookup[k]->getGroup()) == designMap->namesOfGroups[i]) { + if (designMap->getGroup(lookup[k]->getGroup()) == (designMap->getNamesOfGroups())[i]) { subset.push_back(lookup[k]); groupsAlreadyAdded.insert(lookup[k]->getGroup()); } @@ -437,10 +440,10 @@ int IndicatorCommand::GetIndicatorSpecies(){ vector subset; //for each grouping - for (int i = 0; i < designMap->namesOfGroups.size(); i++) { + for (int i = 0; i < (designMap->getNamesOfGroups()).size(); i++) { for (int k = 0; k < lookupFloat.size(); k++) { //are you from this grouping? - if (designMap->getGroup(lookupFloat[k]->getGroup()) == designMap->namesOfGroups[i]) { + if (designMap->getGroup(lookupFloat[k]->getGroup()) == (designMap->getNamesOfGroups())[i]) { subset.push_back(lookupFloat[k]); groupsAlreadyAdded.insert(lookupFloat[k]->getGroup()); } diff --git a/knn.cpp b/knn.cpp index 1835c48..6053b6e 100644 --- a/knn.cpp +++ b/knn.cpp @@ -10,9 +10,11 @@ #include "knn.h" /**************************************************************************************************/ -Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int n) +Knn::Knn(string tfile, string tempFile, string method, int kmerSize, float gapOpen, float gapExtend, float match, float misMatch, int n, int tid) : Classify(), num(n), search(method) { try { + threadID = tid; + //create search database and names vector generateDatabaseAndNames(tfile, tempFile, method, kmerSize, gapOpen, gapExtend, match, misMatch); } diff --git a/knn.h b/knn.h index 1965382..c544235 100644 --- a/knn.h +++ b/knn.h @@ -18,7 +18,7 @@ class Knn : public Classify { public: - Knn(string, string, string, int, float, float, float, float, int); + Knn(string, string, string, int, float, float, float, float, int, int); ~Knn(); void setDistName(string s); diff --git a/libshuffcommand.cpp b/libshuffcommand.cpp index 96abbbe..1e8102f 100644 --- a/libshuffcommand.cpp +++ b/libshuffcommand.cpp @@ -165,7 +165,7 @@ LibShuffCommand::LibShuffCommand(string option) { else { savegroups = groups; m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } string temp; @@ -252,9 +252,9 @@ int LibShuffCommand::execute(){ //this is needed because when we read the matrix we sort it into groups in alphabetical order //the rest of the command and the classes used in this command assume specific order /********************************************************************************************/ - matrix->setGroups(groupMap->namesOfGroups); + matrix->setGroups(groupMap->getNamesOfGroups()); vector sizes; - for (int i = 0; i < groupMap->namesOfGroups.size(); i++) { sizes.push_back(groupMap->getNumSeqs(groupMap->namesOfGroups[i])); } + for (int i = 0; i < (groupMap->getNamesOfGroups()).size(); i++) { sizes.push_back(groupMap->getNumSeqs((groupMap->getNamesOfGroups())[i])); } matrix->setSizes(sizes); @@ -268,21 +268,21 @@ int LibShuffCommand::execute(){ savedDXYValues = form->evaluateAll(); savedMinValues = form->getSavedMins(); - if (m->control_pressed) { delete form; m->Groups.clear(); delete matrix; delete groupMap; return 0; } + if (m->control_pressed) { delete form; m->clearGroups(); delete matrix; delete groupMap; return 0; } pValueCounts.resize(numGroups); for(int i=0;icontrol_pressed) { outputTypes.clear(); delete form; m->Groups.clear(); delete matrix; delete groupMap; return 0; } + if (m->control_pressed) { outputTypes.clear(); delete form; m->clearGroups(); delete matrix; delete groupMap; return 0; } Progress* reading = new Progress(); for(int i=0;icontrol_pressed) { outputTypes.clear(); delete form; m->Groups.clear(); delete matrix; delete groupMap; delete reading; return 0; } + if (m->control_pressed) { outputTypes.clear(); delete form; m->clearGroups(); delete matrix; delete groupMap; delete reading; return 0; } reading->newLine(groupNames[i]+'-'+groupNames[j], iters); int spoti = groupMap->groupIndex[groupNames[i]]; //neccessary in case user selects groups so you know where they are in the matrix @@ -290,13 +290,13 @@ int LibShuffCommand::execute(){ for(int p=0;pcontrol_pressed) { outputTypes.clear(); delete form; m->Groups.clear(); delete matrix; delete groupMap; delete reading; return 0; } + if (m->control_pressed) { outputTypes.clear(); delete form; m->clearGroups(); delete matrix; delete groupMap; delete reading; return 0; } form->randomizeGroups(spoti,spotj); if(form->evaluatePair(spoti,spotj) >= savedDXYValues[spoti][spotj]) { pValueCounts[i][j]++; } if(form->evaluatePair(spotj,spoti) >= savedDXYValues[spotj][spoti]) { pValueCounts[j][i]++; } - if (m->control_pressed) { outputTypes.clear(); delete form; m->Groups.clear(); delete matrix; delete groupMap; delete reading; return 0; } + if (m->control_pressed) { outputTypes.clear(); delete form; m->clearGroups(); delete matrix; delete groupMap; delete reading; return 0; } reading->update(p); } @@ -305,7 +305,7 @@ int LibShuffCommand::execute(){ } } - if (m->control_pressed) { outputTypes.clear(); delete form; m->Groups.clear(); delete matrix; delete groupMap; delete reading; return 0; } + if (m->control_pressed) { outputTypes.clear(); delete form; m->clearGroups(); delete matrix; delete groupMap; delete reading; return 0; } reading->finish(); delete reading; @@ -315,7 +315,7 @@ int LibShuffCommand::execute(){ printCoverageFile(); //clear out users groups - m->Groups.clear(); + m->clearGroups(); delete form; delete matrix; delete groupMap; @@ -492,49 +492,51 @@ int LibShuffCommand::printSummaryFile() { void LibShuffCommand::setGroups() { try { + vector myGroups = m->getGroups(); //if the user has not entered specific groups to analyze then do them all - if (m->Groups.size() == 0) { + if (m->getNumGroups() == 0) { numGroups = groupMap->getNumGroups(); for (int i=0; i < numGroups; i++) { - m->Groups.push_back(groupMap->namesOfGroups[i]); + myGroups.push_back((groupMap->getNamesOfGroups())[i]); } } else { if (savegroups != "all") { //check that groups are valid - for (int i = 0; i < m->Groups.size(); i++) { - if (groupMap->isValidGroup(m->Groups[i]) != true) { - m->mothurOut(m->Groups[i] + " is not a valid group, and will be disregarded."); m->mothurOutEndLine(); + for (int i = 0; i < myGroups.size(); i++) { + if (groupMap->isValidGroup(myGroups[i]) != true) { + m->mothurOut(myGroups[i] + " is not a valid group, and will be disregarded."); m->mothurOutEndLine(); // erase the invalid group from globaldata->Groups - m->Groups.erase(m->Groups.begin()+i); + myGroups.erase(myGroups.begin()+i); } } //if the user only entered invalid groups - if ((m->Groups.size() == 0) || (m->Groups.size() == 1)) { + if ((myGroups.size() == 0) || (myGroups.size() == 1)) { numGroups = groupMap->getNumGroups(); for (int i=0; i < numGroups; i++) { - m->Groups.push_back(groupMap->namesOfGroups[i]); + myGroups.push_back((groupMap->getNamesOfGroups())[i]); } m->mothurOut("When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile."); m->mothurOutEndLine(); - } else { numGroups = m->Groups.size(); } + } else { numGroups = myGroups.size(); } } else { //users wants all groups numGroups = groupMap->getNumGroups(); - m->Groups.clear(); + myGroups.clear(); for (int i=0; i < numGroups; i++) { - m->Groups.push_back(groupMap->namesOfGroups[i]); + myGroups.push_back((groupMap->getNamesOfGroups())[i]); } } } //sort so labels match - sort(m->Groups.begin(), m->Groups.end()); + sort(myGroups.begin(), myGroups.end()); //sort - sort(groupMap->namesOfGroups.begin(), groupMap->namesOfGroups.end()); + //sort(groupMap->namesOfGroups.begin(), groupMap->namesOfGroups.end()); - for (int i = 0; i < groupMap->namesOfGroups.size(); i++) { groupMap->groupIndex[groupMap->namesOfGroups[i]] = i; } + for (int i = 0; i < (groupMap->getNamesOfGroups()).size(); i++) { groupMap->groupIndex[(groupMap->getNamesOfGroups())[i]] = i; } - groupNames = m->Groups; + groupNames = myGroups; + m->setGroups(myGroups); } catch(exception& e) { diff --git a/matrixoutputcommand.cpp b/matrixoutputcommand.cpp index 35a4553..f5737c7 100644 --- a/matrixoutputcommand.cpp +++ b/matrixoutputcommand.cpp @@ -181,7 +181,7 @@ MatrixOutputCommand::MatrixOutputCommand(string option) { if (groups == "not found") { groups = ""; } else { m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } @@ -330,12 +330,12 @@ int MatrixOutputCommand::execute(){ lines[i].end = int (sqrt(float(i+1)/float(processors)) * numGroups); } - if (m->control_pressed) { delete input; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->Groups.clear(); return 0; } + if (m->control_pressed) { delete input; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0; } //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { outputTypes.clear(); delete input; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } m->Groups.clear(); return 0; } + if (m->control_pressed) { outputTypes.clear(); delete input; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } m->clearGroups(); return 0; } if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine(); @@ -368,7 +368,7 @@ int MatrixOutputCommand::execute(){ lookup = input->getSharedRAbundVectors(); } - if (m->control_pressed) { outputTypes.clear(); delete input; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } m->Groups.clear(); return 0; } + if (m->control_pressed) { outputTypes.clear(); delete input; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } m->clearGroups(); return 0; } //output error messages about any remaining user labels set::iterator it; @@ -383,7 +383,7 @@ int MatrixOutputCommand::execute(){ } } - if (m->control_pressed) { outputTypes.clear(); delete input; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } m->Groups.clear(); return 0; } + if (m->control_pressed) { outputTypes.clear(); delete input; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } m->clearGroups(); return 0; } //run last label if you need to if (needToRun == true) { @@ -395,10 +395,10 @@ int MatrixOutputCommand::execute(){ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } } - if (m->control_pressed) { outputTypes.clear(); delete input; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } m->Groups.clear(); return 0; } + if (m->control_pressed) { outputTypes.clear(); delete input; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } m->clearGroups(); return 0; } //reset groups parameter - m->Groups.clear(); + m->clearGroups(); //set phylip file as new current phylipfile string current = ""; diff --git a/mergegroupscommand.cpp b/mergegroupscommand.cpp index 8df6b62..af2af96 100644 --- a/mergegroupscommand.cpp +++ b/mergegroupscommand.cpp @@ -150,7 +150,7 @@ MergeGroupsCommand::MergeGroupsCommand(string option) { groups = validParameter.validFile(parameters, "groups", false); if (groups == "not found") { groups = "all"; } m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } } @@ -187,7 +187,7 @@ int MergeGroupsCommand::execute(){ //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { out.close(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + if (m->control_pressed) { out.close(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ @@ -221,13 +221,13 @@ int MergeGroupsCommand::execute(){ //prevent memory leak for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } - if (m->control_pressed) { out.close(); m->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + if (m->control_pressed) { out.close(); m->clearGroups(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //get next line to process lookup = input.getSharedRAbundVectors(); } - if (m->control_pressed) { out.close(); m->Groups.clear(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + if (m->control_pressed) { out.close(); m->clearGroups(); delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //output error messages about any remaining user labels set::iterator it; @@ -257,7 +257,7 @@ int MergeGroupsCommand::execute(){ out.close(); //reset groups parameter - m->Groups.clear(); + m->clearGroups(); delete designMap; if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0;} diff --git a/metastatscommand.cpp b/metastatscommand.cpp index 1aaa41e..b493c96 100644 --- a/metastatscommand.cpp +++ b/metastatscommand.cpp @@ -167,7 +167,7 @@ MetaStatsCommand::MetaStatsCommand(string option) { else { pickedGroups = true; m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } sets = validParameter.validFile(parameters, "sets", false); @@ -215,8 +215,9 @@ int MetaStatsCommand::execute(){ //setup the pairwise comparions of sets for metastats //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3; //make sure sets are all in designMap - SharedUtil* util = new SharedUtil(); - util->setGroups(Sets, designMap->namesOfGroups); + SharedUtil* util = new SharedUtil(); + vector dGroups = designMap->getNamesOfGroups(); + util->setGroups(Sets, dGroups); delete util; int numGroups = Sets.size(); @@ -250,7 +251,7 @@ int MetaStatsCommand::execute(){ //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->Groups.clear(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ @@ -281,13 +282,13 @@ int MetaStatsCommand::execute(){ //prevent memory leak for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } - if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //get next line to process lookup = input->getSharedRAbundVectors(); } - if (m->control_pressed) { outputTypes.clear(); m->Groups.clear(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } + if (m->control_pressed) { outputTypes.clear(); m->clearGroups(); delete input; delete designMap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } //output error messages about any remaining user labels set::iterator it; @@ -315,7 +316,7 @@ int MetaStatsCommand::execute(){ } //reset groups parameter - m->Groups.clear(); + m->clearGroups(); delete input; delete designMap; diff --git a/mothurout.h b/mothurout.h index 47e5ca4..6f04f89 100644 --- a/mothurout.h +++ b/mothurout.h @@ -36,10 +36,20 @@ class MothurOut { void setReleaseDate(string r) { releaseDate = r; } string getVersion() { return version; } void setVersion(string r) { version = r; } - vector Groups; + + void addGroup(string g) { Groups.push_back(g); } + void setGroups(vector& g) { sort(g.begin(), g.end()); Groups = g; } + void clearGroups() { Groups.clear(); } + int getNumGroups() { return Groups.size(); } + vector getGroups() { sort(Groups.begin(), Groups.end()); return Groups; } + void addAllGroup(string g) { namesOfGroups.push_back(g); } + void setAllGroups(vector& g) { sort(g.begin(), g.end()); namesOfGroups = g; } + void clearAllGroups() { namesOfGroups.clear(); } + int getNumAllGroups() { return namesOfGroups.size(); } + + vector getAllGroups() { sort(namesOfGroups.begin(), namesOfGroups.end()); return namesOfGroups; } vector Treenames; map names; - vector namesOfGroups; vector binLabelsInFile; vector currentBinLabels; string saveNextLabel, argv, sharedHeaderMode; @@ -195,7 +205,8 @@ class MothurOut { string accnosfile, phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, designfile, taxonomyfile; string orderfile, treefile, sharedfile, ordergroupfile, relabundfile, fastafile, qualfile, sfffile, oligosfile, processors, flowfile; - + vector Groups; + vector namesOfGroups; ofstream out; int mem_usage(double&, double&); diff --git a/normalizesharedcommand.cpp b/normalizesharedcommand.cpp index 51c755c..bcec00a 100644 --- a/normalizesharedcommand.cpp +++ b/normalizesharedcommand.cpp @@ -166,7 +166,7 @@ NormalizeSharedCommand::NormalizeSharedCommand(string option) { else { pickedGroups = true; m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } method = validParameter.validFile(parameters, "method", false); if (method == "not found") { method = "totalgroup"; } @@ -206,18 +206,20 @@ int NormalizeSharedCommand::execute(){ //look for groups whose numseqs is below norm and remove them, warning the user if (norm != 0) { - m->Groups.clear(); + m->clearGroups(); + vector mGroups; vector temp; for (int i = 0; i < lookup.size(); i++) { if (lookup[i]->getNumSeqs() < norm) { m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine(); delete lookup[i]; }else { - m->Groups.push_back(lookup[i]->getGroup()); + mGroups.push_back(lookup[i]->getGroup()); temp.push_back(lookup[i]); } } lookup = temp; + m->setGroups(mGroups); } //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label. @@ -240,7 +242,7 @@ int NormalizeSharedCommand::execute(){ //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->Groups.clear(); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } m->clearGroups(); return 0; } if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){ @@ -271,13 +273,13 @@ int NormalizeSharedCommand::execute(){ //prevent memory leak for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; } - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->Groups.clear(); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->clearGroups(); return 0; } //get next line to process lookup = input->getSharedRAbundVectors(); } - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->Groups.clear(); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->clearGroups(); return 0; } //output error messages about any remaining user labels set::iterator it; @@ -314,18 +316,20 @@ int NormalizeSharedCommand::execute(){ //look for groups whose numseqs is below norm and remove them, warning the user if (norm != 0) { - m->Groups.clear(); + m->clearGroups(); + vector mGroups; vector temp; for (int i = 0; i < lookupFloat.size(); i++) { if (lookupFloat[i]->getNumSeqs() < norm) { m->mothurOut(lookupFloat[i]->getGroup() + " contains " + toString(lookupFloat[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine(); delete lookupFloat[i]; }else { - m->Groups.push_back(lookupFloat[i]->getGroup()); + mGroups.push_back(lookupFloat[i]->getGroup()); temp.push_back(lookupFloat[i]); } } lookupFloat = temp; + m->setGroups(mGroups); } //set norm to smallest group number @@ -343,7 +347,7 @@ int NormalizeSharedCommand::execute(){ //as long as you are not at the end of the file or done wih the lines you want while((lookupFloat[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } m->Groups.clear(); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; } m->clearGroups(); return 0; } if(allLines == 1 || labels.count(lookupFloat[0]->getLabel()) == 1){ @@ -376,13 +380,13 @@ int NormalizeSharedCommand::execute(){ //prevent memory leak for (int i = 0; i < lookupFloat.size(); i++) { delete lookupFloat[i]; lookupFloat[i] = NULL; } - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->Groups.clear(); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->clearGroups(); return 0; } //get next line to process lookupFloat = input->getSharedRAbundFloatVectors(); } - if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->Groups.clear(); return 0; } + if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); m->clearGroups(); return 0; } //output error messages about any remaining user labels set::iterator it; @@ -411,7 +415,7 @@ int NormalizeSharedCommand::execute(){ } //reset groups parameter - m->Groups.clear(); + m->clearGroups(); delete input; if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); return 0;} diff --git a/parselistscommand.cpp b/parselistscommand.cpp index 9459294..bb096d8 100644 --- a/parselistscommand.cpp +++ b/parselistscommand.cpp @@ -175,11 +175,12 @@ int ParseListCommand::execute(){ //fill filehandles with neccessary ofstreams int i; ofstream* temp; - for (i=0; inamesOfGroups.size(); i++) { + vector gGroups = groupMap->getNamesOfGroups(); + for (i=0; inamesOfGroups[i]] = temp; + filehandles[gGroups[i]] = temp; - string filename = fileroot + groupMap->namesOfGroups[i] + ".list"; + string filename = fileroot + gGroups[i] + ".list"; outputNames.push_back(filename); outputTypes["list"].push_back(filename); m->openOutputFile(filename, *temp); } @@ -194,7 +195,8 @@ int ParseListCommand::execute(){ if (m->control_pressed) { delete input; delete list; delete groupMap; - for (i=0; inamesOfGroups.size(); i++) { (*(filehandles[groupMap->namesOfGroups[i]])).close(); delete filehandles[groupMap->namesOfGroups[i]]; } + vector gGroups = groupMap->getNamesOfGroups(); + for (i=0; imothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } @@ -203,7 +205,7 @@ int ParseListCommand::execute(){ if (m->control_pressed) { delete input; delete list; delete groupMap; - for (i=0; inamesOfGroups.size(); i++) { (*(filehandles[groupMap->namesOfGroups[i]])).close(); delete filehandles[groupMap->namesOfGroups[i]]; } + for (i=0; imothurRemove(outputNames[i]); } outputTypes.clear(); return 0; } @@ -241,10 +243,10 @@ int ParseListCommand::execute(){ } if (m->control_pressed) { - delete input; delete groupMap; - for (i=0; inamesOfGroups.size(); i++) { (*(filehandles[groupMap->namesOfGroups[i]])).close(); delete filehandles[groupMap->namesOfGroups[i]]; } - for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - return 0; + delete input; delete groupMap; + for (i=0; imothurRemove(outputNames[i]); } outputTypes.clear(); + return 0; } //output error messages about any remaining user labels @@ -262,10 +264,10 @@ int ParseListCommand::execute(){ } if (m->control_pressed) { - delete input; delete groupMap; - for (i=0; inamesOfGroups.size(); i++) { (*(filehandles[groupMap->namesOfGroups[i]])).close(); delete filehandles[groupMap->namesOfGroups[i]]; } - for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - return 0; + delete input; delete groupMap; + for (i=0; imothurRemove(outputNames[i]); } outputTypes.clear(); + return 0; } //run last label if you need to diff --git a/parsimony.cpp b/parsimony.cpp index 6ec6c57..49a7350 100644 --- a/parsimony.cpp +++ b/parsimony.cpp @@ -17,15 +17,16 @@ EstOutput Parsimony::getValues(Tree* t, int p, string o) { outputDir = o; //if the users enters no groups then give them the score of all groups - int numGroups = m->Groups.size(); + vector mGroups = m->getGroups(); + int numGroups = mGroups.size(); //calculate number of comparsions int numComp = 0; vector< vector > namesOfGroupCombos; for (int r=0; r groups; groups.push_back(m->Groups[r]); groups.push_back(m->Groups[l]); + vector groups; groups.push_back(mGroups[r]); groups.push_back(mGroups[l]); //cout << globaldata->Groups[r] << '\t' << globaldata->Groups[l] << endl; namesOfGroupCombos.push_back(groups); } @@ -36,16 +37,17 @@ EstOutput Parsimony::getValues(Tree* t, int p, string o) { vector groups; if (numGroups == 0) { //get score for all users groups - for (int i = 0; i < tmap->namesOfGroups.size(); i++) { - if (tmap->namesOfGroups[i] != "xxx") { - groups.push_back(tmap->namesOfGroups[i]); + vector tGroups = tmap->getNamesOfGroups(); + for (int i = 0; i < tGroups.size(); i++) { + if (tGroups[i] != "xxx") { + groups.push_back(tGroups[i]); //cout << tmap->namesOfGroups[i] << endl; } } namesOfGroupCombos.push_back(groups); }else { - for (int i = 0; i < m->Groups.size(); i++) { - groups.push_back(m->Groups[i]); + for (int i = 0; i < mGroups.size(); i++) { + groups.push_back(mGroups[i]); //cout << globaldata->Groups[i] << endl; } namesOfGroupCombos.push_back(groups); diff --git a/parsimonycommand.cpp b/parsimonycommand.cpp index 66f8193..3752935 100644 --- a/parsimonycommand.cpp +++ b/parsimonycommand.cpp @@ -126,8 +126,8 @@ ParsimonyCommand::ParsimonyCommand(string option) { } m->runParse = true; - m->Groups.clear(); - m->namesOfGroups.clear(); + m->clearGroups(); + m->clearAllGroups(); m->Treenames.clear(); m->names.clear(); @@ -164,10 +164,10 @@ ParsimonyCommand::ParsimonyCommand(string option) { //check for optional parameter and set defaults // ...at some point should added some additional type checking... groups = validParameter.validFile(parameters, "groups", false); - if (groups == "not found") { groups = ""; m->Groups.clear(); } + if (groups == "not found") { groups = ""; m->clearGroups(); } else { m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; } @@ -242,7 +242,7 @@ int ParsimonyCommand::execute() { if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->Groups.clear(); + m->clearGroups(); return 0; } @@ -280,8 +280,11 @@ int ParsimonyCommand::execute() { //set users groups to analyze util = new SharedUtil(); - util->setGroups(m->Groups, tmap->namesOfGroups, allGroups, numGroups, "parsimony"); //sets the groups the user wants to analyze - util->getCombos(groupComb, m->Groups, numComp); + vector mGroups = m->getGroups(); + vector tGroups = tmap->getNamesOfGroups(); + util->setGroups(mGroups, tGroups, allGroups, numGroups, "parsimony"); //sets the groups the user wants to analyze + util->getCombos(groupComb, mGroups, numComp); + m->setGroups(mGroups); delete util; if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); } @@ -297,7 +300,7 @@ int ParsimonyCommand::execute() { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (randomtree == "") { outSum.close(); } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->Groups.clear(); + m->clearGroups(); return 0; } @@ -322,7 +325,7 @@ int ParsimonyCommand::execute() { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (randomtree == "") { outSum.close(); } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->Groups.clear(); + m->clearGroups(); return 0; } @@ -361,7 +364,7 @@ int ParsimonyCommand::execute() { if (randomtree == "") { outSum.close(); } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } - m->Groups.clear(); + m->clearGroups(); return 0; } @@ -398,7 +401,7 @@ int ParsimonyCommand::execute() { delete reading; delete pars; delete output; delete randT; delete tmap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->Groups.clear(); + m->clearGroups(); return 0; } @@ -410,7 +413,7 @@ int ParsimonyCommand::execute() { delete reading; delete pars; delete output; delete randT; delete tmap; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->Groups.clear(); + m->clearGroups(); return 0; } @@ -467,7 +470,7 @@ int ParsimonyCommand::execute() { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } if (randomtree == "") { outSum.close(); } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->Groups.clear(); + m->clearGroups(); return 0; } @@ -480,7 +483,7 @@ int ParsimonyCommand::execute() { if (randomtree == "") { printUSummaryFile(); } //reset groups parameter - m->Groups.clear(); + m->clearGroups(); delete pars; delete output; delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } @@ -585,6 +588,7 @@ void ParsimonyCommand::getUserInput() { count = 1; numEachGroup.resize(numGroups, 0); + for (int i = 1; i <= numGroups; i++) { m->mothurOut("Please enter the number of sequences in group " + toString(i) + ": "); cin >> num; @@ -592,7 +596,7 @@ void ParsimonyCommand::getUserInput() { //set tmaps seqsPerGroup tmap->seqsPerGroup[toString(i)] = num; - tmap->namesOfGroups.push_back(toString(i)); + tmap->addGroup(toString(i)); //set tmaps namesOfSeqs for (int j = 0; j < num; j++) { diff --git a/pcacommand.cpp b/pcacommand.cpp index 3a92fa0..27e448b 100644 --- a/pcacommand.cpp +++ b/pcacommand.cpp @@ -157,7 +157,7 @@ PCACommand::PCACommand(string option) { groups = validParameter.validFile(parameters, "groups", false); if (groups == "not found") { groups = ""; } else { m->splitAtDash(groups, Groups); } - m->Groups = Groups; + m->setGroups(Groups); } @@ -365,7 +365,7 @@ int PCACommand::process(vector& lookupFloat){ string fbase = outputDir + m->getRootName(m->getSimpleName(inputFile)); string outputFileName = fbase + lookupFloat[0]->getLabel(); - output(outputFileName, m->Groups, X, d); + output(outputFileName, m->getGroups(), X, d); if (metric) { diff --git a/phylodiversitycommand.cpp b/phylodiversitycommand.cpp index d273e1d..ca0d02a 100644 --- a/phylodiversitycommand.cpp +++ b/phylodiversitycommand.cpp @@ -137,8 +137,8 @@ PhyloDiversityCommand::PhyloDiversityCommand(string option) { } m->runParse = true; - m->Groups.clear(); - m->namesOfGroups.clear(); + m->clearGroups(); + m->clearAllGroups(); m->Treenames.clear(); m->names.clear(); @@ -193,7 +193,7 @@ PhyloDiversityCommand::PhyloDiversityCommand(string option) { if (groups == "not found") { groups = ""; } else { m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } if ((!collect) && (!rarefy) && (!summary)) { m->mothurOut("No outputs selected. You must set either collect, rarefy or summary to true, summary=T by default."); m->mothurOutEndLine(); abort=true; } @@ -258,7 +258,7 @@ int PhyloDiversityCommand::execute(){ if (m->control_pressed) { delete tmap; for (int i = 0; i < trees.size(); i++) { delete trees[i]; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->Groups.clear(); + m->clearGroups(); return 0; } @@ -277,11 +277,14 @@ int PhyloDiversityCommand::execute(){ } SharedUtil* util = new SharedUtil(); - util->setGroups(m->Groups, tmap->namesOfGroups, "phylo.diversity"); //sets the groups the user wants to analyze + vector mGroups = m->getGroups(); + vector tGroups = tmap->getNamesOfGroups(); + util->setGroups(mGroups, tGroups, "phylo.diversity"); //sets the groups the user wants to analyze delete util; //incase the user had some mismatches between the tree and group files we don't want group xxx to be analyzed - for (int i = 0; i < m->Groups.size(); i++) { if (m->Groups[i] == "xxx") { m->Groups.erase(m->Groups.begin()+i); break; } } + for (int i = 0; i < mGroups.size(); i++) { if (mGroups[i] == "xxx") { mGroups.erase(mGroups.begin()+i); break; } } + m->setGroups(mGroups); vector outputNames; @@ -304,7 +307,7 @@ int PhyloDiversityCommand::execute(){ //create a vector containing indexes of leaf nodes, randomize it, select nodes to send to calculator vector randomLeaf; for (int j = 0; j < numLeafNodes; j++) { - if (m->inUsersGroups(trees[i]->tree[j].getGroup(), m->Groups) == true) { //is this a node from the group the user selected. + if (m->inUsersGroups(trees[i]->tree[j].getGroup(), mGroups) == true) { //is this a node from the group the user selected. randomLeaf.push_back(j); } } @@ -319,15 +322,15 @@ int PhyloDiversityCommand::execute(){ //find largest group total int largestGroup = 0; - for (int j = 0; j < m->Groups.size(); j++) { - if (tmap->seqsPerGroup[m->Groups[j]] > largestGroup) { largestGroup = tmap->seqsPerGroup[m->Groups[j]]; } + for (int j = 0; j < mGroups.size(); j++) { + if (tmap->seqsPerGroup[mGroups[j]] > largestGroup) { largestGroup = tmap->seqsPerGroup[mGroups[j]]; } //initialize diversity - diversity[m->Groups[j]].resize(tmap->seqsPerGroup[m->Groups[j]]+1, 0.0); //numSampled + diversity[mGroups[j]].resize(tmap->seqsPerGroup[mGroups[j]]+1, 0.0); //numSampled //groupA 0.0 0.0 //initialize sumDiversity - sumDiversity[m->Groups[j]].resize(tmap->seqsPerGroup[m->Groups[j]]+1, 0.0); + sumDiversity[mGroups[j]].resize(tmap->seqsPerGroup[mGroups[j]]+1, 0.0); } //convert freq percentage to number @@ -341,8 +344,8 @@ int PhyloDiversityCommand::execute(){ if(largestGroup % increment != 0){ numSampledList.insert(largestGroup); } //add other groups ending points - for (int j = 0; j < m->Groups.size(); j++) { - if (numSampledList.count(diversity[m->Groups[j]].size()-1) == 0) { numSampledList.insert(diversity[m->Groups[j]].size()-1); } + for (int j = 0; j < mGroups.size(); j++) { + if (numSampledList.count(diversity[mGroups[j]].size()-1) == 0) { numSampledList.insert(diversity[mGroups[j]].size()-1); } } #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) @@ -484,6 +487,7 @@ int PhyloDiversityCommand::createProcesses(vector& procIters, Tree* t, map< int PhyloDiversityCommand::driver(Tree* t, map< string, vector >& div, map >& sumDiv, int numIters, int increment, vector& randomLeaf, set& numSampledList, ofstream& outCollect, ofstream& outSum, bool doSumCollect){ try { int numLeafNodes = randomLeaf.size(); + vector mGroups = m->getGroups(); for (int l = 0; l < numIters; l++) { random_shuffle(randomLeaf.begin(), randomLeaf.end()); @@ -491,7 +495,7 @@ int PhyloDiversityCommand::driver(Tree* t, map< string, vector >& div, ma //initialize counts map counts; map< string, set > countedBranch; - for (int j = 0; j < m->Groups.size(); j++) { counts[m->Groups[j]] = 0; countedBranch[m->Groups[j]].insert(-2); } //add dummy index to initialize countedBranch sets + for (int j = 0; j < mGroups.size(); j++) { counts[mGroups[j]] = 0; countedBranch[mGroups[j]].insert(-2); } //add dummy index to initialize countedBranch sets for(int k = 0; k < numLeafNodes; k++){ @@ -522,9 +526,9 @@ int PhyloDiversityCommand::driver(Tree* t, map< string, vector >& div, ma if (rarefy) { //add this diversity to the sum - for (int j = 0; j < m->Groups.size(); j++) { - for (int g = 0; g < div[m->Groups[j]].size(); g++) { - sumDiv[m->Groups[j]][g] += div[m->Groups[j]][g]; + for (int j = 0; j < mGroups.size(); j++) { + for (int g = 0; g < div[mGroups[j]].size(); g++) { + sumDiv[mGroups[j]][g] += div[mGroups[j]][g]; } } } @@ -550,15 +554,16 @@ void PhyloDiversityCommand::printSumData(map< string, vector >& div, ofst out << "Groups\tnumSampled\tphyloDiversity" << endl; out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); - - for (int j = 0; j < m->Groups.size(); j++) { - int numSampled = (div[m->Groups[j]].size()-1); - out << m->Groups[j] << '\t' << numSampled << '\t'; + + vector mGroups = m->getGroups(); + for (int j = 0; j < mGroups.size(); j++) { + int numSampled = (div[mGroups[j]].size()-1); + out << mGroups[j] << '\t' << numSampled << '\t'; float score; - if (scale) { score = (div[m->Groups[j]][numSampled] / (float)numIters) / (float)numSampled; } - else { score = div[m->Groups[j]][numSampled] / (float)numIters; } + if (scale) { score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; } + else { score = div[mGroups[j]][numSampled] / (float)numIters; } out << setprecision(4) << score << endl; } @@ -577,7 +582,8 @@ void PhyloDiversityCommand::printData(set& num, map< string, vector try { out << "numSampled\t"; - for (int i = 0; i < m->Groups.size(); i++) { out << m->Groups[i] << '\t'; } + vector mGroups = m->getGroups(); + for (int i = 0; i < mGroups.size(); i++) { out << mGroups[i] << '\t'; } out << endl; out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); @@ -586,12 +592,12 @@ void PhyloDiversityCommand::printData(set& num, map< string, vector int numSampled = *it; out << numSampled << '\t'; - - for (int j = 0; j < m->Groups.size(); j++) { - if (numSampled < div[m->Groups[j]].size()) { + + for (int j = 0; j < mGroups.size(); j++) { + if (numSampled < div[mGroups[j]].size()) { float score; - if (scale) { score = (div[m->Groups[j]][numSampled] / (float)numIters) / (float)numSampled; } - else { score = div[m->Groups[j]][numSampled] / (float)numIters; } + if (scale) { score = (div[mGroups[j]][numSampled] / (float)numIters) / (float)numSampled; } + else { score = div[mGroups[j]][numSampled] / (float)numIters; } out << setprecision(4) << score << '\t'; }else { out << "NA" << '\t'; } diff --git a/phylosummary.cpp b/phylosummary.cpp index 873e5d1..58274b3 100644 --- a/phylosummary.cpp +++ b/phylosummary.cpp @@ -168,8 +168,9 @@ int PhyloSummary::addSeqToTree(string seqName, string seqTaxonomy){ //initialize groupcounts if (groupmap != NULL) { - for (int j = 0; j < groupmap->namesOfGroups.size(); j++) { - tree[index].groupCount[groupmap->namesOfGroups[j]] = 0; + vector mGroups = groupmap->getNamesOfGroups(); + for (int j = 0; j < mGroups.size(); j++) { + tree[index].groupCount[mGroups[j]] = 0; } //find out the sequences group @@ -234,8 +235,9 @@ int PhyloSummary::addSeqToTree(string seqTaxonomy, vector names){ if (groupmap != NULL) { map containsGroup; - for (int j = 0; j < groupmap->namesOfGroups.size(); j++) { - containsGroup[groupmap->namesOfGroups[j]] = false; + vector mGroups = groupmap->getNamesOfGroups(); + for (int j = 0; j < mGroups.size(); j++) { + containsGroup[mGroups[j]] = false; } for (int k = 0; k < names.size(); k++) { @@ -273,9 +275,10 @@ int PhyloSummary::addSeqToTree(string seqTaxonomy, vector names){ //initialize groupcounts if (groupmap != NULL) { map containsGroup; - for (int j = 0; j < groupmap->namesOfGroups.size(); j++) { - tree[index].groupCount[groupmap->namesOfGroups[j]] = 0; - containsGroup[groupmap->namesOfGroups[j]] = false; + vector mGroups = groupmap->getNamesOfGroups(); + for (int j = 0; j < mGroups.size(); j++) { + tree[index].groupCount[mGroups[j]] = 0; + containsGroup[mGroups[j]] = false; } @@ -349,9 +352,9 @@ void PhyloSummary::print(ofstream& out){ if (groupmap != NULL) { //so the labels match the counts below, since the map sorts them automatically... //sort(groupmap->namesOfGroups.begin(), groupmap->namesOfGroups.end()); - - for (int i = 0; i < groupmap->namesOfGroups.size(); i++) { - out << groupmap->namesOfGroups[i] << '\t'; + vector mGroups = groupmap->getNamesOfGroups(); + for (int i = 0; i < mGroups.size(); i++) { + out << mGroups[i] << '\t'; } } @@ -372,7 +375,8 @@ void PhyloSummary::print(ofstream& out){ //for (itGroup = tree[0].groupCount.begin(); itGroup != tree[0].groupCount.end(); itGroup++) { // out << itGroup->second << '\t'; //} - for (int i = 0; i < groupmap->namesOfGroups.size(); i++) { out << tree[0].groupCount[groupmap->namesOfGroups[i]] << '\t'; } + vector mGroups = groupmap->getNamesOfGroups(); + for (int i = 0; i < mGroups.size(); i++) { out << tree[0].groupCount[mGroups[i]] << '\t'; } } out << endl; @@ -409,7 +413,8 @@ void PhyloSummary::print(int i, ofstream& out){ //for (itGroup = tree[it->second].groupCount.begin(); itGroup != tree[it->second].groupCount.end(); itGroup++) { // out << itGroup->second << '\t'; //} - for (int i = 0; i < groupmap->namesOfGroups.size(); i++) { out << tree[it->second].groupCount[groupmap->namesOfGroups[i]] << '\t'; } + vector mGroups = groupmap->getNamesOfGroups(); + for (int i = 0; i < mGroups.size(); i++) { out << tree[it->second].groupCount[mGroups[i]] << '\t'; } } out << endl; @@ -453,8 +458,8 @@ void PhyloSummary::readTreeStruct(ifstream& in){ //initialize groupcounts if (groupmap != NULL) { - for (int j = 0; j < groupmap->namesOfGroups.size(); j++) { - tree[i].groupCount[groupmap->namesOfGroups[j]] = 0; + for (int j = 0; j < (groupmap->getNamesOfGroups()).size(); j++) { + tree[i].groupCount[(groupmap->getNamesOfGroups())[j]] = 0; } } diff --git a/rarefactcommand.cpp b/rarefactcommand.cpp index 8221114..8d883aa 100644 --- a/rarefactcommand.cpp +++ b/rarefactcommand.cpp @@ -286,7 +286,7 @@ int RareFactCommand::execute(){ string fileNameRoot = outputDir + m->getRootName(m->getSimpleName(inputFileNames[p])); - if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } m->Groups.clear(); return 0; } + if (m->control_pressed) { outputTypes.clear(); for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } m->clearGroups(); return 0; } if (inputFileNames.size() > 1) { m->mothurOutEndLine(); m->mothurOut("Processing group " + groups[p]); m->mothurOutEndLine(); m->mothurOutEndLine(); @@ -499,7 +499,7 @@ vector RareFactCommand::parseSharedFile(string filename) { } delete input; - m->Groups.clear(); + m->clearGroups(); return filenames; } diff --git a/rarefactsharedcommand.cpp b/rarefactsharedcommand.cpp index 02f434a..b0d6650 100644 --- a/rarefactsharedcommand.cpp +++ b/rarefactsharedcommand.cpp @@ -158,7 +158,7 @@ RareFactSharedCommand::RareFactSharedCommand(string option) { else { m->splitAtDash(groups, Groups); } - m->Groups = Groups; + m->setGroups(Groups); string temp; temp = validParameter.validFile(parameters, "freq", false); if (temp == "not found") { temp = "100"; } @@ -211,7 +211,7 @@ int RareFactSharedCommand::execute(){ string lastLabel = lookup[0]->getLabel(); if (m->control_pressed) { - m->Groups.clear(); + m->clearGroups(); delete input; for(int i=0;imothurRemove(outputNames[i]); } @@ -233,7 +233,7 @@ int RareFactSharedCommand::execute(){ //as long as you are not at the end of the file or done wih the lines you want while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) { if (m->control_pressed) { - m->Groups.clear(); + m->clearGroups(); delete input; for(int i=0;imothurRemove(outputNames[i]); } @@ -278,7 +278,7 @@ int RareFactSharedCommand::execute(){ } if (m->control_pressed) { - m->Groups.clear(); + m->clearGroups(); delete input; for(int i=0;imothurRemove(outputNames[i]); } @@ -299,7 +299,7 @@ int RareFactSharedCommand::execute(){ } if (m->control_pressed) { - m->Groups.clear(); + m->clearGroups(); delete input; for(int i=0;imothurRemove(outputNames[i]); } @@ -319,7 +319,7 @@ int RareFactSharedCommand::execute(){ } for(int i=0;iGroups.clear(); + m->clearGroups(); delete input; if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } diff --git a/readtree.cpp b/readtree.cpp index be7dc79..74b4268 100644 --- a/readtree.cpp +++ b/readtree.cpp @@ -425,7 +425,7 @@ int ReadNewickTree::readNewickInt(istream& f, int& n, Tree* T, TreeMap* tmap) { map::iterator it; it = tmap->seqsPerGroup.find("xxx"); if (it == tmap->seqsPerGroup.end()) { //its a new group - tmap->namesOfGroups.push_back("xxx"); + tmap->addGroup("xxx"); tmap->seqsPerGroup["xxx"] = 1; }else { tmap->seqsPerGroup["xxx"]++; diff --git a/removegroupscommand.cpp b/removegroupscommand.cpp index 252a925..81722dd 100644 --- a/removegroupscommand.cpp +++ b/removegroupscommand.cpp @@ -204,7 +204,7 @@ RemoveGroupsCommand::RemoveGroupsCommand(string option) { if (groups == "not found") { groups = ""; } else { m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } sharedfile = validParameter.validFile(parameters, "shared", true); @@ -273,7 +273,8 @@ int RemoveGroupsCommand::execute(){ //make sure groups are valid //takes care of user setting groupNames that are invalid or setting groups=all SharedUtil* util = new SharedUtil(); - util->setGroups(Groups, groupMap->namesOfGroups); + vector namesGroups = groupMap->getNamesOfGroups(); + util->setGroups(Groups, namesGroups); delete util; //fill names with names of sequences that are from the groups we want to remove @@ -403,23 +404,23 @@ int RemoveGroupsCommand::readShared(){ vector lookup = tempInput->getSharedRAbundVectors(); //save m->Groups - vector allGroupsNames = m->namesOfGroups; - vector mothurOutGroups = m->Groups; + vector allGroupsNames = m->getAllGroups(); + vector mothurOutGroups = m->getGroups(); vector groupsToKeep; for (int i = 0; i < allGroupsNames.size(); i++) { - if (!m->inUsersGroups(allGroupsNames[i], m->Groups)) { + if (!m->inUsersGroups(allGroupsNames[i], m->getGroups())) { groupsToKeep.push_back(allGroupsNames[i]); } } - if (allGroupsNames.size() == groupsToKeep.size()) { m->mothurOut("Your file does not contain any groups you wish to remove."); m->mothurOutEndLine(); m->Groups = mothurOutGroups; delete tempInput; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } + if (allGroupsNames.size() == groupsToKeep.size()) { m->mothurOut("Your file does not contain any groups you wish to remove."); m->mothurOutEndLine(); m->setGroups(mothurOutGroups); delete tempInput; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } return 0; } //reset read for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } delete tempInput; - m->Groups = groupsToKeep; - m->namesOfGroups.clear(); + m->setGroups(groupsToKeep); + m->clearAllGroups(); m->names.clear(); m->saveNextLabel = ""; m->printedHeaders = false; @@ -458,7 +459,7 @@ int RemoveGroupsCommand::readShared(){ } - m->Groups = mothurOutGroups; + m->setGroups(mothurOutGroups); if (wroteSomething == false) { m->mothurOut("Your file contains only the groups you wish to remove."); m->mothurOutEndLine(); } @@ -745,7 +746,7 @@ void RemoveGroupsCommand::readAccnos(){ } in.close(); - m->Groups = Groups; + m->setGroups(Groups); } catch(exception& e) { diff --git a/removeotuscommand.cpp b/removeotuscommand.cpp index 3390c20..25ed99e 100644 --- a/removeotuscommand.cpp +++ b/removeotuscommand.cpp @@ -187,7 +187,8 @@ int RemoveOtusCommand::execute(){ //make sure groups are valid //takes care of user setting groupNames that are invalid or setting groups=all SharedUtil* util = new SharedUtil(); - util->setGroups(Groups, groupMap->namesOfGroups); + vector allGroups = groupMap->getNamesOfGroups(); + util->setGroups(Groups, allGroups); delete util; if (m->control_pressed) { delete groupMap; return 0; } diff --git a/removerarecommand.cpp b/removerarecommand.cpp index f82c4b0..351ace6 100644 --- a/removerarecommand.cpp +++ b/removerarecommand.cpp @@ -353,7 +353,8 @@ int RemoveRareCommand::processList(){ if (groupfile != "") { groupMap = new GroupMap(groupfile); groupMap->readMap(); SharedUtil util; - util.setGroups(Groups, groupMap->namesOfGroups); + vector namesGroups = groupMap->getNamesOfGroups(); + util.setGroups(Groups, namesGroups); m->openOutputFile(outputGroupFileName, outGroup); } @@ -624,7 +625,7 @@ int RemoveRareCommand::processRabund(){ //********************************************************************************************************************** int RemoveRareCommand::processShared(){ try { - m->Groups = Groups; + m->setGroups(Groups); string thisOutputDir = outputDir; if (outputDir == "") { thisOutputDir += m->hasPath(sharedfile); } diff --git a/sharedcommand.cpp b/sharedcommand.cpp index 2928214..de48158 100644 --- a/sharedcommand.cpp +++ b/sharedcommand.cpp @@ -18,7 +18,7 @@ vector SharedCommand::setParameters(){ try { CommandParameter plist("list", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(plist); CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pgroup); - CommandParameter pordergroup("ordergroup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pordergroup); + //CommandParameter pordergroup("ordergroup", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pordergroup); CommandParameter plabel("label", "String", "", "", "", "", "",false,false); parameters.push_back(plabel); CommandParameter pgroups("groups", "String", "", "", "", "", "",false,false); parameters.push_back(pgroups); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); @@ -41,7 +41,7 @@ string SharedCommand::getHelpString(){ helpString += "The make.shared command parameters are list, group, ordergroup, groups and label. list and group are required unless a current file is available.\n"; helpString += "The groups parameter allows you to indicate which groups you want to include, group names should be separated by dashes. ex. groups=A-B-C. Default is all groups in your groupfile.\n"; helpString += "The label parameter allows you to indicate which labels you want to include, label names should be separated by dashes. Default is all labels in your list file.\n"; - helpString += "The ordergroup parameter allows you to indicate the order of the groups in the sharedfile, by default the groups are listed alphabetically.\n"; + //helpString += "The ordergroup parameter allows you to indicate the order of the groups in the sharedfile, by default the groups are listed alphabetically.\n"; return helpString; } catch(exception& e) { @@ -147,7 +147,8 @@ SharedCommand::SharedCommand(string option) { int error = groupMap->readMap(); if (error == 1) { abort = true; } - m->namesOfGroups = groupMap->namesOfGroups; + vector allGroups = groupMap->getNamesOfGroups(); + m->setAllGroups(allGroups); } else { m->mothurOut("You have no current group file and the group parameter is required."); m->mothurOutEndLine(); abort = true; } }else { @@ -155,7 +156,8 @@ SharedCommand::SharedCommand(string option) { int error = groupMap->readMap(); if (error == 1) { abort = true; } - m->namesOfGroups = groupMap->namesOfGroups; + vector allGroups = groupMap->getNamesOfGroups(); + m->setAllGroups(allGroups); m->setGroupFile(groupfile); } @@ -163,7 +165,7 @@ SharedCommand::SharedCommand(string option) { if (groups == "not found") { groups = ""; } else { m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } //check for optional parameter and set defaults @@ -203,7 +205,7 @@ int SharedCommand::execute(){ //if hte user has not specified any groups then use them all if (Groups.size() == 0) { - Groups = groupMap->namesOfGroups; m->Groups = Groups; + Groups = groupMap->getNamesOfGroups(); m->setGroups(Groups); }else { pickedGroups = true; } //fill filehandles with neccessary ofstreams @@ -243,8 +245,11 @@ int SharedCommand::execute(){ for (int i=0; imothurRemove((fileroot + Groups[i] + ".rabund")); } return 0; } - - if ((m->Groups.size() == 0) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) { //if the user has not specified any groups and their files don't match exit with error + + //sanity check + int error = ListGroupSameSeqs(); + + if ((!pickedGroups) && (SharedList->getNumSeqs() != groupMap->getNumSeqs())) { //if the user has not specified any groups and their files don't match exit with error m->mothurOut("Your group file contains " + toString(groupMap->getNumSeqs()) + " sequences and list file contains " + toString(SharedList->getNumSeqs()) + " sequences. Please correct."); m->mothurOutEndLine(); out.close(); @@ -262,12 +267,14 @@ int SharedCommand::execute(){ return 0; } + if (error == 1) { m->control_pressed = true; } + //if user has specified groups make new groupfile for them if (pickedGroups) { //make new group file string groups = ""; - if (m->Groups.size() < 4) { - for (int i = 0; i < m->Groups.size(); i++) { - groups += m->Groups[i] + "."; + if (m->getNumGroups() < 4) { + for (int i = 0; i < m->getNumGroups(); i++) { + groups += (m->getGroups())[i] + "."; } }else { groups = "merge"; } @@ -281,7 +288,7 @@ int SharedCommand::execute(){ string groupName; for (int i = 0; i < names.size(); i++) { groupName = groupMap->getGroup(names[i]); - if (isValidGroup(groupName, m->Groups)) { + if (isValidGroup(groupName, m->getGroups())) { outGroups << names[i] << '\t' << groupName << endl; } } @@ -452,20 +459,22 @@ void SharedCommand::printSharedData(vector thislookup) { if (order.size() == 0) { //user has not specified an order so do aplabetically sort(thislookup.begin(), thislookup.end(), compareSharedRabunds); - m->Groups.clear(); + m->clearGroups(); + vector Groups; //initialize bin values for (int i = 0; i < thislookup.size(); i++) { out << thislookup[i]->getLabel() << '\t' << thislookup[i]->getGroup() << '\t'; thislookup[i]->print(out); - m->Groups.push_back(thislookup[i]->getGroup()); + Groups.push_back(thislookup[i]->getGroup()); RAbundVector rav = thislookup[i]->getRAbundVector(); m->openOutputFileAppend(fileroot + thislookup[i]->getGroup() + ".rabund", *(filehandles[thislookup[i]->getGroup()])); rav.print(*(filehandles[thislookup[i]->getGroup()])); (*(filehandles[thislookup[i]->getGroup()])).close(); } + m->setGroups(Groups); }else{ //create a map from groupName to each sharedrabund map myMap; @@ -475,7 +484,8 @@ void SharedCommand::printSharedData(vector thislookup) { myMap[thislookup[i]->getGroup()] = thislookup[i]; } - m->Groups.clear(); + m->clearGroups(); + vector Groups; //loop through ordered list and print the rabund for (int i = 0; i < order.size(); i++) { @@ -485,7 +495,7 @@ void SharedCommand::printSharedData(vector thislookup) { out << (myIt->second)->getLabel() << '\t' << (myIt->second)->getGroup() << '\t'; (myIt->second)->print(out); - m->Groups.push_back((myIt->second)->getGroup()); + Groups.push_back((myIt->second)->getGroup()); RAbundVector rav = (myIt->second)->getRAbundVector(); m->openOutputFileAppend(fileroot + (myIt->second)->getGroup() + ".rabund", *(filehandles[(myIt->second)->getGroup()])); @@ -495,6 +505,8 @@ void SharedCommand::printSharedData(vector thislookup) { m->mothurOut("Can't find shared info for " + order[i] + ", skipping."); m->mothurOutEndLine(); } } + + m->setGroups(Groups); } @@ -655,7 +667,49 @@ int SharedCommand::createMisMatchFile() { exit(1); } } - +//********************************************************************************************************************** +int SharedCommand::ListGroupSameSeqs() { + try { + + int error = 0; + + vector groupMapsSeqs = groupMap->getNamesSeqs(); + + set groupNamesSeqs; + for(int i = 0; i < groupMapsSeqs.size(); i++) { + groupNamesSeqs.insert(groupMapsSeqs[i]); + } + + + //go through list and if group returns "not found" output it + for (int i = 0; i < SharedList->getNumBins(); i++) { + if (m->control_pressed) { return 0; } + + string names = SharedList->get(i); + + vector listNames; + m->splitAtComma(names, listNames); + + for (int j = 0; j < listNames.size(); j++) { + int num = groupNamesSeqs.count(listNames[j]); + + if (num == 0) { error = 1; m->mothurOut("[ERROR]: " + listNames[j] + " is in your listfile and not in your groupfile. Please correct."); m->mothurOutEndLine(); } + else { groupNamesSeqs.erase(listNames[j]); } + } + } + + for (set::iterator itGroupSet = groupNamesSeqs.begin(); itGroupSet != groupNamesSeqs.end(); itGroupSet++) { + error = 1; + m->mothurOut("[ERROR]: " + (*itGroupSet) + " is in your groupfile and not your listfile. Please correct."); m->mothurOutEndLine(); + } + + return error; + } + catch(exception& e) { + m->errorOut(e, "SharedCommand", "ListGroupSameSeqs"); + exit(1); + } +} //********************************************************************************************************************** SharedCommand::~SharedCommand(){ diff --git a/sharedcommand.h b/sharedcommand.h index 51a2528..861632c 100644 --- a/sharedcommand.h +++ b/sharedcommand.h @@ -43,6 +43,7 @@ private: int readOrderFile(); bool isValidGroup(string, vector); int eliminateZeroOTUS(vector&); + int ListGroupSameSeqs(); SharedListVector* SharedList; InputData* input; diff --git a/sharedjackknife.cpp b/sharedjackknife.cpp index f80a8d4..0998db8 100644 --- a/sharedjackknife.cpp +++ b/sharedjackknife.cpp @@ -88,7 +88,7 @@ EstOutput SharedJackknife::getValues(vector vectorShared){ SharedRAbundVector* shared1 = vectorShared[0]; SharedRAbundVector* shared2 = vectorShared[1]; if(numGroups == -1) { - numGroups = m->Groups.size(); + numGroups = m->getNumGroups(); } if(callCount == numGroups*(numGroups-1)/2) { diff --git a/sharedlistvector.cpp b/sharedlistvector.cpp index 3852c0c..6dfcb97 100644 --- a/sharedlistvector.cpp +++ b/sharedlistvector.cpp @@ -266,17 +266,20 @@ vector SharedListVector::getSharedRAbundVector() { vector lookup; //contains just the groups the user selected map finder; //contains all groups in groupmap string group, names, name; - - util->setGroups(m->Groups, groupmap->namesOfGroups); + + vector Groups = m->getGroups(); + vector allGroups = groupmap->getNamesOfGroups(); + util->setGroups(Groups, allGroups); + m->setGroups(Groups); delete util; - for (int i = 0; i < groupmap->namesOfGroups.size(); i++) { + for (int i = 0; i < allGroups.size(); i++) { SharedRAbundVector* temp = new SharedRAbundVector(data.size()); - finder[groupmap->namesOfGroups[i]] = temp; - finder[groupmap->namesOfGroups[i]]->setLabel(label); - finder[groupmap->namesOfGroups[i]]->setGroup(groupmap->namesOfGroups[i]); - if (m->inUsersGroups(groupmap->namesOfGroups[i], m->Groups)) { //if this group is in user groups - lookup.push_back(finder[groupmap->namesOfGroups[i]]); + finder[allGroups[i]] = temp; + finder[allGroups[i]]->setLabel(label); + finder[allGroups[i]]->setGroup(allGroups[i]); + if (m->inUsersGroups(allGroups[i], m->getGroups())) { //if this group is in user groups + lookup.push_back(finder[allGroups[i]]); } } diff --git a/sharedordervector.cpp b/sharedordervector.cpp index 37cd283..ca26362 100644 --- a/sharedordervector.cpp +++ b/sharedordervector.cpp @@ -41,9 +41,9 @@ SharedOrderVector::SharedOrderVector(ifstream& f) : DataVector() { //reads in a f >> label >> groupN >> num; holdLabel = label; - + vector allGroups; //save group in groupmap - groupmap->namesOfGroups.push_back(groupN); + allGroups.push_back(groupN); groupmap->groupIndex[groupN] = 0; @@ -67,7 +67,7 @@ SharedOrderVector::SharedOrderVector(ifstream& f) : DataVector() { //reads in a //save group in groupmap - groupmap->namesOfGroups.push_back(groupN); + allGroups.push_back(groupN); groupmap->groupIndex[groupN] = count; @@ -88,8 +88,9 @@ SharedOrderVector::SharedOrderVector(ifstream& f) : DataVector() { //reads in a //put file pointer back since you are now at a new distance label for (int i = 0; i < nextLabel.length(); i++) { f.unget(); } - - m->namesOfGroups = groupmap->namesOfGroups; + + groupmap->setNamesOfGroups(allGroups); + m->setAllGroups(allGroups); updateStats(); @@ -285,8 +286,12 @@ vector SharedOrderVector::getSharedRAbundVector() { util = new SharedUtil(); vector lookup; - util->setGroups(m->Groups, m->namesOfGroups); - util->getSharedVectors(m->Groups, lookup, this); + vector Groups = m->getGroups(); + vector allGroups = m->getAllGroups(); + util->setGroups(Groups, allGroups); + util->getSharedVectors(Groups, lookup, this); + m->setGroups(Groups); + m->setAllGroups(allGroups); return lookup; } diff --git a/sharedrabundfloatvector.cpp b/sharedrabundfloatvector.cpp index a5e17c2..bda49bf 100644 --- a/sharedrabundfloatvector.cpp +++ b/sharedrabundfloatvector.cpp @@ -33,7 +33,8 @@ SharedRAbundFloatVector::SharedRAbundFloatVector(int n) : DataVector(), maxRank( SharedRAbundFloatVector::SharedRAbundFloatVector(ifstream& f) : DataVector(), maxRank(0.0), numBins(0), numSeqs(0.0) { try { - m->namesOfGroups.clear(); + m->clearAllGroups(); + vector allGroups; int num, count; float inputData; @@ -88,7 +89,7 @@ SharedRAbundFloatVector::SharedRAbundFloatVector(ifstream& f) : DataVector(), ma lookup[0]->setLabel(label); lookup[0]->setGroup(groupN); - m->namesOfGroups.push_back(groupN); + allGroups.push_back(groupN); //fill vector. data = first sharedrabund in file for(int i=0;i> groupN >> num; count++; - m->namesOfGroups.push_back(groupN); + allGroups.push_back(groupN); //add new vector to lookup temp = new SharedRAbundFloatVector(); @@ -129,6 +130,7 @@ SharedRAbundFloatVector::SharedRAbundFloatVector(ifstream& f) : DataVector(), ma } m->saveNextLabel = nextLabel; + m->setAllGroups(allGroups); } catch(exception& e) { @@ -339,12 +341,15 @@ vector SharedRAbundFloatVector::getSharedRAbundFloatVe SharedUtil* util; util = new SharedUtil(); - util->setGroups(m->Groups, m->namesOfGroups); + vector Groups = m->getGroups(); + vector allGroups = m->getAllGroups(); + util->setGroups(Groups, allGroups); + m->setGroups(Groups); bool remove = false; for (int i = 0; i < lookup.size(); i++) { //if this sharedrabund is not from a group the user wants then delete it. - if (util->isValidGroup(lookup[i]->getGroup(), m->Groups) == false) { + if (util->isValidGroup(lookup[i]->getGroup(), m->getGroups()) == false) { delete lookup[i]; lookup[i] = NULL; lookup.erase(lookup.begin()+i); i--; diff --git a/sharedrabundvector.cpp b/sharedrabundvector.cpp index 7952859..56b7d09 100644 --- a/sharedrabundvector.cpp +++ b/sharedrabundvector.cpp @@ -59,8 +59,9 @@ SharedRAbundVector::SharedRAbundVector(string id, vector rav) : Data //reads a shared file SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), numBins(0), numSeqs(0) { try { - m->namesOfGroups.clear(); - + m->clearAllGroups(); + vector allGroups; + int num, inputData, count; count = 0; string holdLabel, nextLabel, groupN; @@ -112,7 +113,7 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), lookup[0]->setLabel(label); lookup[0]->setGroup(groupN); - m->namesOfGroups.push_back(groupN); + allGroups.push_back(groupN); //fill vector. data = first sharedrabund in file for(int i=0;i> groupN >> num; count++; - m->namesOfGroups.push_back(groupN); + allGroups.push_back(groupN); //add new vector to lookup temp = new SharedRAbundVector(); @@ -152,7 +153,8 @@ SharedRAbundVector::SharedRAbundVector(ifstream& f) : DataVector(), maxRank(0), if (f.eof() != true) { f >> nextLabel; } } - m->saveNextLabel = nextLabel; + m->saveNextLabel = nextLabel; + m->setAllGroups(allGroups); } catch(exception& e) { @@ -418,12 +420,14 @@ vector SharedRAbundVector::getSharedRAbundVectors(){ SharedUtil* util; util = new SharedUtil(); - util->setGroups(m->Groups, m->namesOfGroups); + vector Groups = m->getGroups(); + vector allGroups = m->getAllGroups(); + util->setGroups(Groups, allGroups); bool remove = false; for (int i = 0; i < lookup.size(); i++) { //if this sharedrabund is not from a group the user wants then delete it. - if (util->isValidGroup(lookup[i]->getGroup(), m->Groups) == false) { + if (util->isValidGroup(lookup[i]->getGroup(), m->getGroups()) == false) { remove = true; delete lookup[i]; lookup[i] = NULL; lookup.erase(lookup.begin()+i); diff --git a/sharedutilities.cpp b/sharedutilities.cpp index f43bb81..151b254 100644 --- a/sharedutilities.cpp +++ b/sharedutilities.cpp @@ -102,6 +102,10 @@ void SharedUtil::getSharedVectorswithReplacement(vector Groups, vector& userGroups, vector& allGroups) { try { + + sort(userGroups.begin(), userGroups.end()); + sort(allGroups.begin(), allGroups.end()); + if (userGroups.size() != 0) { if (userGroups[0] != "all") { //check that groups are valid @@ -144,6 +148,10 @@ void SharedUtil::setGroups(vector& userGroups, vector& allGroups //need to have mode because different commands require different number of valid groups void SharedUtil::setGroups(vector& userGroups, vector& allGroups, string mode) { try { + + sort(userGroups.begin(), userGroups.end()); + sort(allGroups.begin(), allGroups.end()); + if (userGroups.size() != 0) { if (userGroups[0] != "all") { //check that groups are valid @@ -190,6 +198,9 @@ void SharedUtil::setGroups(vector& userGroups, vector& allGroups //for parsimony and unifrac commands you set pairwise groups as well as an allgroups in calc void SharedUtil::setGroups(vector& userGroups, vector& allGroups, string& label, int& numGroups, string mode){ //globaldata->Groups, your tree or group map, allgroups, mode try { + sort(userGroups.begin(), userGroups.end()); + sort(allGroups.begin(), allGroups.end()); + numGroups = 0; label = ""; @@ -271,6 +282,7 @@ void SharedUtil::setGroups(vector& userGroups, vector& allGroups /**************************************************************************************/ void SharedUtil::getCombos(vector& groupComb, vector userGroups, int& numComp) { //groupcomb, globaldata->Groups, numcomb try { + sort(userGroups.begin(), userGroups.end()); //calculate number of comparisons i.e. with groups A,B,C = AB, AC, BC = 3; numComp = 0; for (int i=0; i< userGroups.size(); i++) { diff --git a/splitabundcommand.cpp b/splitabundcommand.cpp index 29ffdb4..140f379 100644 --- a/splitabundcommand.cpp +++ b/splitabundcommand.cpp @@ -181,7 +181,7 @@ SplitAbundCommand::SplitAbundCommand(string option) { groups = validParameter.validFile(parameters, "groups", false); if (groups == "not found") { groups = ""; } else if (groups == "all") { - if (groupfile != "") { Groups = groupMap->namesOfGroups; } + if (groupfile != "") { Groups = groupMap->getNamesOfGroups(); } else { m->mothurOut("You cannot select groups without a valid groupfile, I will disregard your groups selection. "); m->mothurOutEndLine(); groups = ""; } }else { m->splitAtDash(groups, Groups); diff --git a/splitgroupscommand.cpp b/splitgroupscommand.cpp index 00d9a9d..530df68 100644 --- a/splitgroupscommand.cpp +++ b/splitgroupscommand.cpp @@ -167,7 +167,8 @@ int SplitGroupCommand::execute(){ groupMap = new GroupMap(groupfile); groupMap->readMap(); - SharedUtil util; util.setGroups(Groups, groupMap->namesOfGroups); + vector namesGroups = groupMap->getNamesOfGroups(); + SharedUtil util; util.setGroups(Groups, namesGroups); if (namefile != "") { readNames(); } splitFasta(); diff --git a/subsamplecommand.cpp b/subsamplecommand.cpp index e22bfde..651ee02 100644 --- a/subsamplecommand.cpp +++ b/subsamplecommand.cpp @@ -229,7 +229,7 @@ SubSampleCommand::SubSampleCommand(string option) { else { pickedGroups = true; m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } string temp = validParameter.validFile(parameters, "size", false); if (temp == "not found"){ temp = "0"; } @@ -349,7 +349,8 @@ int SubSampleCommand::getSubSampleFasta() { //takes care of user setting groupNames that are invalid or setting groups=all SharedUtil* util = new SharedUtil(); - util->setGroups(Groups, groupMap->namesOfGroups); + vector namesGroups = groupMap->getNamesOfGroups(); + util->setGroups(Groups, namesGroups); delete util; //file mismatch quit @@ -672,19 +673,20 @@ int SubSampleCommand::getSubSampleShared() { if (thisSize < size) { size = thisSize; } } }else { - m->Groups.clear(); + m->clearGroups(); + Groups.clear(); vector temp; for (int i = 0; i < lookup.size(); i++) { if (lookup[i]->getNumSeqs() < size) { m->mothurOut(lookup[i]->getGroup() + " contains " + toString(lookup[i]->getNumSeqs()) + ". Eliminating."); m->mothurOutEndLine(); delete lookup[i]; }else { - m->Groups.push_back(lookup[i]->getGroup()); + Groups.push_back(lookup[i]->getGroup()); temp.push_back(lookup[i]); } } lookup = temp; - Groups = m->Groups; + m->setGroups(Groups); } if (lookup.size() == 0) { m->mothurOut("The size you selected is too large, skipping shared file."); m->mothurOutEndLine(); delete input; return 0; } @@ -878,7 +880,8 @@ int SubSampleCommand::getSubSampleList() { //takes care of user setting groupNames that are invalid or setting groups=all SharedUtil* util = new SharedUtil(); - util->setGroups(Groups, groupMap->namesOfGroups); + vector namesGroups = groupMap->getNamesOfGroups(); + util->setGroups(Groups, namesGroups); delete util; //create outputfiles diff --git a/summarysharedcommand.cpp b/summarysharedcommand.cpp index 77db937..c8d112f 100644 --- a/summarysharedcommand.cpp +++ b/summarysharedcommand.cpp @@ -194,7 +194,7 @@ SummarySharedCommand::SummarySharedCommand(string option) { if (groups == "not found") { groups = ""; } else { m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } string temp = validParameter.validFile(parameters, "all", false); if (temp == "not found") { temp = "false"; } @@ -380,7 +380,7 @@ int SummarySharedCommand::execute(){ delete input; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } for(int i=0;iGroups.clear(); + m->clearGroups(); return 0; } /******************************************************/ @@ -388,7 +388,7 @@ int SummarySharedCommand::execute(){ /******************************************************/ //comparison breakup to be used by different processes later - numGroups = m->Groups.size(); + numGroups = m->getNumGroups(); lines.resize(processors); for (int i = 0; i < processors; i++) { lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups); @@ -408,7 +408,7 @@ int SummarySharedCommand::execute(){ delete input; for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } for(int i=0;iGroups.clear(); + m->clearGroups(); return 0; } @@ -450,7 +450,7 @@ int SummarySharedCommand::execute(){ m->mothurRemove(outputFileName); delete input; for(int i=0;iGroups.clear(); + m->clearGroups(); return 0; } @@ -479,7 +479,7 @@ int SummarySharedCommand::execute(){ //reset groups parameter - m->Groups.clear(); + m->clearGroups(); for(int i=0;inamesOfGroups.size(); i++) { - groupNodeInfo[tmap->namesOfGroups[i]].resize(0); + for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) { + groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0); } //initialize tree with correct number of nodes, name and group info. @@ -632,8 +632,8 @@ void Tree::randomLabels(vector g) { try { //initialize groupNodeInfo - for (int i = 0; i < tmap->namesOfGroups.size(); i++) { - groupNodeInfo[tmap->namesOfGroups[i]].resize(0); + for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) { + groupNodeInfo[(tmap->getNamesOfGroups())[i]].resize(0); } for(int i = 0; i < numLeaves; i++){ diff --git a/treegroupscommand.cpp b/treegroupscommand.cpp index f1be7cb..3a8cd26 100644 --- a/treegroupscommand.cpp +++ b/treegroupscommand.cpp @@ -240,7 +240,7 @@ TreeGroupCommand::TreeGroupCommand(string option) { if (groups == "not found") { groups = ""; } else { m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } calc = validParameter.validFile(parameters, "calc", false); @@ -396,13 +396,13 @@ int TreeGroupCommand::execute(){ //create treemap class from groupmap for tree class to use tmap = new TreeMap(); - tmap->makeSim(m->namesOfGroups); + tmap->makeSim(m->getAllGroups()); //clear globaldatas old tree names if any m->Treenames.clear(); //fills globaldatas tree names - m->Treenames = m->Groups; + m->Treenames = m->getGroups(); if (m->control_pressed) { return 0; } @@ -438,13 +438,14 @@ int TreeGroupCommand::execute(){ tmap->makeSim(list); - m->Groups = tmap->namesOfGroups; + vector namesGroups = tmap->getNamesOfGroups(); + m->setGroups(namesGroups); //clear globaldatas old tree names if any m->Treenames.clear(); //fills globaldatas tree names - m->Treenames = m->Groups; + m->Treenames = m->getGroups(); //used in tree constructor m->runParse = false; @@ -468,7 +469,7 @@ int TreeGroupCommand::execute(){ } //reset groups parameter - m->Groups.clear(); + m->clearGroups(); //set tree file as new current treefile string current = ""; diff --git a/treemap.h b/treemap.h index 244348e..7ed8d04 100644 --- a/treemap.h +++ b/treemap.h @@ -37,7 +37,11 @@ public: void removeSeq(string); //removes a sequence, this is to accomadate trees that do not contain all the seqs in your groupfile string getGroup(string); void addSeq(string, string); - vector namesOfGroups; + void addGroup(string s) { setNamesOfGroups(s); } + vector getNamesOfGroups() { + sort(namesOfGroups.begin(), namesOfGroups.end()); + return namesOfGroups; + } vector namesOfSeqs; map seqsPerGroup; //groupname, number of seqs in that group. map treemap; //sequence name and @@ -46,6 +50,7 @@ public: void makeSim(ListVector*); //takes listvector info and fills treemap for use by tree.shared command. private: + vector namesOfGroups; ifstream fileHandle; string groupFileName; int numGroups; diff --git a/unifracunweightedcommand.cpp b/unifracunweightedcommand.cpp index 1c2db53..cc175e1 100644 --- a/unifracunweightedcommand.cpp +++ b/unifracunweightedcommand.cpp @@ -134,8 +134,8 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option) { } m->runParse = true; - m->Groups.clear(); - m->namesOfGroups.clear(); + m->clearGroups(); + m->clearAllGroups(); m->Treenames.clear(); m->names.clear(); @@ -167,7 +167,7 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option) { if (groups == "not found") { groups = ""; } else { m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; } @@ -196,7 +196,7 @@ UnifracUnweightedCommand::UnifracUnweightedCommand(string option) { if ((phylip) && (Groups.size() == 0)) { groups = "all"; m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } } @@ -259,7 +259,7 @@ int UnifracUnweightedCommand::execute() { if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->Groups.clear(); + m->clearGroups(); return 0; } @@ -282,8 +282,10 @@ int UnifracUnweightedCommand::execute() { m->openOutputFile(sumFile, outSum); util = new SharedUtil(); - util->setGroups(m->Groups, tmap->namesOfGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze - util->getCombos(groupComb, m->Groups, numComp); + vector Groups = m->getGroups(); + vector namesGroups = tmap->getNamesOfGroups(); + util->setGroups(Groups, namesGroups, allGroups, numGroups, "unweighted"); //sets the groups the user wants to analyze + util->getCombos(groupComb, Groups, numComp); delete util; if (numGroups == 1) { numComp++; groupComb.push_back(allGroups); } @@ -403,7 +405,7 @@ int UnifracUnweightedCommand::execute() { outSum.close(); - m->Groups.clear(); + m->clearGroups(); delete tmap; delete unweighted; for (int i = 0; i < T.size(); i++) { delete T[i]; } @@ -516,18 +518,18 @@ void UnifracUnweightedCommand::createPhylipFile(int i) { if ((outputForm == "lt") || (outputForm == "square")) { //output numSeqs - out << m->Groups.size() << endl; + out << m->getNumGroups() << endl; } //make matrix with scores in it - vector< vector > dists; dists.resize(m->Groups.size()); - for (int i = 0; i < m->Groups.size(); i++) { - dists[i].resize(m->Groups.size(), 0.0); + vector< vector > dists; dists.resize(m->getNumGroups()); + for (int i = 0; i < m->getNumGroups(); i++) { + dists[i].resize(m->getNumGroups(), 0.0); } //flip it so you can print it int count = 0; - for (int r=0; rGroups.size(); r++) { + for (int r=0; rgetNumGroups(); r++) { for (int l = 0; l < r; l++) { dists[r][l] = utreeScores[count][0]; dists[l][r] = utreeScores[count][0]; @@ -536,9 +538,9 @@ void UnifracUnweightedCommand::createPhylipFile(int i) { } //output to file - for (int r=0; rGroups.size(); r++) { + for (int r=0; rgetNumGroups(); r++) { //output name - string name = m->Groups[r]; + string name = (m->getGroups())[r]; if (name.length() < 10) { //pad with spaces to make compatible while (name.length() < 10) { name += " "; } } @@ -553,12 +555,12 @@ void UnifracUnweightedCommand::createPhylipFile(int i) { out << name << '\t'; //output distances - for (int l = 0; l < m->Groups.size(); l++) { out << dists[r][l] << '\t'; } + for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; } out << endl; }else{ //output distances for (int l = 0; l < r; l++) { - string otherName = m->Groups[l]; + string otherName = (m->getGroups())[l]; if (otherName.length() < 10) { //pad with spaces to make compatible while (otherName.length() < 10) { otherName += " "; } } diff --git a/unifracweightedcommand.cpp b/unifracweightedcommand.cpp index 994f09b..a0d710e 100644 --- a/unifracweightedcommand.cpp +++ b/unifracweightedcommand.cpp @@ -134,8 +134,8 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) { } m->runParse = true; - m->Groups.clear(); - m->namesOfGroups.clear(); + m->clearGroups(); + m->clearAllGroups(); m->Treenames.clear(); m->names.clear(); @@ -168,7 +168,7 @@ UnifracWeightedCommand::UnifracWeightedCommand(string option) { if (groups == "not found") { groups = ""; } else { m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } itersString = validParameter.validFile(parameters, "iters", false); if (itersString == "not found") { itersString = "1000"; } @@ -253,7 +253,7 @@ int UnifracWeightedCommand::execute() { if (m->control_pressed) { delete tmap; for (int i = 0; i < T.size(); i++) { delete T[i]; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } outputTypes.clear(); - m->Groups.clear(); + m->clearGroups(); return 0; } @@ -277,8 +277,10 @@ int UnifracWeightedCommand::execute() { util = new SharedUtil(); string s; //to make work with setgroups - util->setGroups(m->Groups, tmap->namesOfGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze - util->getCombos(groupComb, m->Groups, numComp); + vector Groups = m->getGroups(); + vector nameGroups = tmap->getNamesOfGroups(); + util->setGroups(Groups, nameGroups, s, numGroups, "weighted"); //sets the groups the user wants to analyze + util->getCombos(groupComb, Groups, numComp); delete util; weighted = new Weighted(tmap, includeRoot); @@ -327,7 +329,7 @@ int UnifracWeightedCommand::execute() { vector< vector > namesOfGroupCombos; for (int a=0; a groups; groups.push_back(m->Groups[a]); groups.push_back(m->Groups[l]); + vector groups; groups.push_back((m->getGroups())[a]); groups.push_back((m->getGroups())[l]); namesOfGroupCombos.push_back(groups); } } @@ -409,7 +411,7 @@ int UnifracWeightedCommand::execute() { if (phylip) { createPhylipFile(); } //clear out users groups - m->Groups.clear(); + m->clearGroups(); delete tmap; delete weighted; for (int i = 0; i < T.size(); i++) { delete T[i]; } @@ -639,17 +641,17 @@ void UnifracWeightedCommand::createPhylipFile() { if ((outputForm == "lt") || (outputForm == "square")) { //output numSeqs - out << m->Groups.size() << endl; + out << m->getNumGroups() << endl; } //make matrix with scores in it - vector< vector > dists; dists.resize(m->Groups.size()); - for (int i = 0; i < m->Groups.size(); i++) { - dists[i].resize(m->Groups.size(), 0.0); + vector< vector > dists; dists.resize(m->getNumGroups()); + for (int i = 0; i < m->getNumGroups(); i++) { + dists[i].resize(m->getNumGroups(), 0.0); } //flip it so you can print it - for (int r=0; rGroups.size(); r++) { + for (int r=0; rgetNumGroups(); r++) { for (int l = 0; l < r; l++) { dists[r][l] = utreeScores[count]; dists[l][r] = utreeScores[count]; @@ -658,9 +660,9 @@ void UnifracWeightedCommand::createPhylipFile() { } //output to file - for (int r=0; rGroups.size(); r++) { + for (int r=0; rgetNumGroups(); r++) { //output name - string name = m->Groups[r]; + string name = (m->getGroups())[r]; if (name.length() < 10) { //pad with spaces to make compatible while (name.length() < 10) { name += " "; } } @@ -675,12 +677,12 @@ void UnifracWeightedCommand::createPhylipFile() { out << name << '\t'; //output distances - for (int l = 0; l < m->Groups.size(); l++) { out << dists[r][l] << '\t'; } + for (int l = 0; l < m->getNumGroups(); l++) { out << dists[r][l] << '\t'; } out << endl; }else{ //output distances for (int l = 0; l < r; l++) { - string otherName = m->Groups[l]; + string otherName = (m->getGroups())[l]; if (otherName.length() < 10) { //pad with spaces to make compatible while (otherName.length() < 10) { otherName += " "; } } diff --git a/unweighted.cpp b/unweighted.cpp index 98bfa45..431323f 100644 --- a/unweighted.cpp +++ b/unweighted.cpp @@ -17,7 +17,7 @@ EstOutput Unweighted::getValues(Tree* t, int p, string o) { outputDir = o; //if the users enters no groups then give them the score of all groups - int numGroups = m->Groups.size(); + int numGroups = m->getNumGroups(); //calculate number of comparsions int numComp = 0; @@ -25,7 +25,7 @@ EstOutput Unweighted::getValues(Tree* t, int p, string o) { for (int r=0; r groups; groups.push_back(m->Groups[r]); groups.push_back(m->Groups[l]); + vector groups; groups.push_back((m->getGroups())[r]); groups.push_back((m->getGroups())[l]); namesOfGroupCombos.push_back(groups); } } @@ -34,15 +34,15 @@ EstOutput Unweighted::getValues(Tree* t, int p, string o) { vector groups; if (numGroups == 0) { //get score for all users groups - for (int i = 0; i < tmap->namesOfGroups.size(); i++) { - if (tmap->namesOfGroups[i] != "xxx") { - groups.push_back(tmap->namesOfGroups[i]); + for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) { + if ((tmap->getNamesOfGroups())[i] != "xxx") { + groups.push_back((tmap->getNamesOfGroups())[i]); } } namesOfGroupCombos.push_back(groups); }else { - for (int i = 0; i < m->Groups.size(); i++) { - groups.push_back(m->Groups[i]); + for (int i = 0; i < m->getNumGroups(); i++) { + groups.push_back((m->getGroups())[i]); } namesOfGroupCombos.push_back(groups); } @@ -260,7 +260,7 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, st outputDir = o; //if the users enters no groups then give them the score of all groups - int numGroups = m->Groups.size(); + int numGroups = m->getNumGroups(); //calculate number of comparsions int numComp = 0; @@ -268,7 +268,7 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, st for (int r=0; r groups; groups.push_back(m->Groups[r]); groups.push_back(m->Groups[l]); + vector groups; groups.push_back((m->getGroups())[r]); groups.push_back((m->getGroups())[l]); namesOfGroupCombos.push_back(groups); } } @@ -277,15 +277,15 @@ EstOutput Unweighted::getValues(Tree* t, string groupA, string groupB, int p, st vector groups; if (numGroups == 0) { //get score for all users groups - for (int i = 0; i < tmap->namesOfGroups.size(); i++) { - if (tmap->namesOfGroups[i] != "xxx") { - groups.push_back(tmap->namesOfGroups[i]); + for (int i = 0; i < (tmap->getNamesOfGroups()).size(); i++) { + if ((tmap->getNamesOfGroups())[i] != "xxx") { + groups.push_back((tmap->getNamesOfGroups())[i]); } } namesOfGroupCombos.push_back(groups); }else { - for (int i = 0; i < m->Groups.size(); i++) { - groups.push_back(m->Groups[i]); + for (int i = 0; i < m->getNumGroups(); i++) { + groups.push_back((m->getGroups())[i]); } namesOfGroupCombos.push_back(groups); } diff --git a/venncommand.cpp b/venncommand.cpp index 7ee0489..5a5b9b3 100644 --- a/venncommand.cpp +++ b/venncommand.cpp @@ -168,7 +168,7 @@ VennCommand::VennCommand(string option) { if (groups == "not found") { groups = ""; } else { m->splitAtDash(groups, Groups); - m->Groups = Groups; + m->setGroups(Groups); } calc = validParameter.validFile(parameters, "calc", false); @@ -274,7 +274,7 @@ int VennCommand::execute(){ if (m->control_pressed) { for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; } for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } - m->Groups.clear(); delete venn; delete input; + m->clearGroups(); delete venn; delete input; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } @@ -357,7 +357,7 @@ int VennCommand::execute(){ if (m->control_pressed) { for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; } - m->Groups.clear(); delete venn; delete input; + m->clearGroups(); delete venn; delete input; for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; } @@ -414,10 +414,10 @@ int VennCommand::execute(){ //reset groups parameter - m->Groups.clear(); + m->clearGroups(); if (m->control_pressed) { - m->Groups.clear(); delete venn; delete input; + m->clearGroups(); delete venn; delete input; for (int i = 0; i < vennCalculators.size(); i++) { delete vennCalculators[i]; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; diff --git a/weighted.cpp b/weighted.cpp index 70f6ae3..d8a4b85 100644 --- a/weighted.cpp +++ b/weighted.cpp @@ -19,7 +19,7 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) { processors = p; outputDir = o; - numGroups = m->Groups.size(); + numGroups = m->getNumGroups(); if (m->control_pressed) { return data; } @@ -29,7 +29,7 @@ EstOutput Weighted::getValues(Tree* t, int p, string o) { for (int l = 0; l < i; l++) { //initialize weighted scores //WScore[globaldata->Groups[i]+globaldata->Groups[l]] = 0.0; - vector groups; groups.push_back(m->Groups[i]); groups.push_back(m->Groups[l]); + vector groups; groups.push_back((m->getGroups())[i]); groups.push_back((m->getGroups())[l]); namesOfGroupCombos.push_back(groups); } } -- 2.39.2