From: westcott Date: Thu, 23 Sep 2010 15:31:47 +0000 (+0000) Subject: added cluster.fragments command as well as the nseqs parameter to the venn command X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=61798fe609675abfedf511e542cc48c56a531199 added cluster.fragments command as well as the nseqs parameter to the venn command --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index cd981c3..2721d08 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -49,6 +49,8 @@ A747E79C1163442A00FB9042 /* chimeracheckcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeracheckcommand.cpp; sourceTree = ""; }; A747E81C116365E000FB9042 /* chimeraslayercommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimeraslayercommand.h; sourceTree = ""; }; A747E81D116365E000FB9042 /* chimeraslayercommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimeraslayercommand.cpp; sourceTree = ""; }; + A74A9C03124B72DB000D5D25 /* clusterfragmentscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clusterfragmentscommand.h; sourceTree = ""; }; + A74A9C04124B72DB000D5D25 /* clusterfragmentscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clusterfragmentscommand.cpp; sourceTree = ""; }; A7639F8D1175DF35008F5578 /* makefile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.make; path = makefile; sourceTree = ""; }; A768D95D1248FEAA008AB1D0 /* mothur */ = {isa = PBXFileReference; lastKnownFileType = "compiled.mach-o.executable"; path = mothur; sourceTree = ""; }; A76AAD02117F322B003D8DA1 /* phylosummary.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = phylosummary.h; sourceTree = ""; }; @@ -748,6 +750,8 @@ A7D215C911996C6E00F13F13 /* clearcutcommand.cpp */, A7DA2021113FECD400BF472F /* clustercommand.cpp */, A7DA2022113FECD400BF472F /* clustercommand.h */, + A74A9C03124B72DB000D5D25 /* clusterfragmentscommand.h */, + A74A9C04124B72DB000D5D25 /* clusterfragmentscommand.cpp */, A71D924311AEB42400D00CBC /* clustersplitcommand.h */, A71D924211AEB42400D00CBC /* clustersplitcommand.cpp */, A7DA2026113FECD400BF472F /* collectcommand.h */, diff --git a/clustercommand.cpp b/clustercommand.cpp index 6df5faa..2f9c36e 100644 --- a/clustercommand.cpp +++ b/clustercommand.cpp @@ -158,7 +158,7 @@ int ClusterCommand::execute(){ double saveCutoff = cutoff; while (matrix->getSmallDist() < cutoff && matrix->getNNodes() > 0){ - + cout << matrix->getSmallDist() << '\t' << cutoff << '\t' << matrix->getNNodes() << endl; if (m->control_pressed) { //clean up delete globaldata->gSparseMatrix; globaldata->gSparseMatrix = NULL; delete globaldata->gListVector; globaldata->gListVector = NULL; diff --git a/clusterfragmentscommand.cpp b/clusterfragmentscommand.cpp new file mode 100644 index 0000000..0c4dec3 --- /dev/null +++ b/clusterfragmentscommand.cpp @@ -0,0 +1,296 @@ +/* + * ryanscommand.cpp + * Mothur + * + * Created by westcott on 9/23/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + +#include "clusterfragmentscommand.h" + +//********************************************************************************************************************** +//sort by unaligned +inline bool comparePriority(seqRNode first, seqRNode second) { + bool better = false; + + if (first.length > second.length) { + better = true; + }else if (first.length == second.length) { + if (first.numIdentical > second.numIdentical) { + better = true; + } + } + + return better; +} +//********************************************************************************************************************** +ClusterFragmentsCommand::ClusterFragmentsCommand(string option) { + try { + abort = false; + + //allow user to run help + if(option == "help") { help(); abort = true; } + + else { + //valid paramters for this command + string Array[] = {"fasta","name","outputdir","inputdir"}; + vector myArray (Array, Array+(sizeof(Array)/sizeof(string))); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + map::iterator it; + + //check to make sure all parameters are valid for command + for (map::iterator it2 = parameters.begin(); it2 != parameters.end(); it2++) { + if (validParameter.isValidParameter(it2->first, myArray, it2->second) != true) { abort = true; } + } + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("fasta"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["fasta"] = inputDir + it->second; } + } + + it = parameters.find("name"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["name"] = inputDir + it->second; } + } + } + + //check for required parameters + fastafile = validParameter.validFile(parameters, "fasta", true); + if (fastafile == "not found") { m->mothurOut("fasta is a required parameter for the cluster.fragments command."); m->mothurOutEndLine(); abort = true; } + else if (fastafile == "not open") { abort = true; } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ outputDir = m->hasPath(fastafile); } + + //check for optional parameter and set defaults + // ...at some point should added some additional type checking... + namefile = validParameter.validFile(parameters, "name", true); + if (namefile == "not found") { namefile = ""; } + else if (namefile == "not open") { abort = true; } + else { readNameFile(); } + } + + } + catch(exception& e) { + m->errorOut(e, "ClusterFragmentsCommand", "ClusterFragmentsCommand"); + exit(1); + } +} + +//********************************************************************************************************************** +ClusterFragmentsCommand::~ClusterFragmentsCommand(){} +//********************************************************************************************************************** +void ClusterFragmentsCommand::help(){ + try { + m->mothurOut("The cluster.fragments command groups sequences that are part of a larger sequence.\n"); + m->mothurOut("The cluster.fragments command outputs a new fasta and name file.\n"); + m->mothurOut("The cluster.fragments command parameters are fasta and name. The fasta parameter is required. \n"); + m->mothurOut("The names parameter allows you to give a list of seqs that are identical. This file is 2 columns, first column is name or representative sequence, second column is a list of its identical sequences separated by commas.\n"); + m->mothurOut("The cluster.fragments command should be in the following format: \n"); + m->mothurOut("cluster.fragments(fasta=yourFastaFile, names=yourNamesFile) \n"); + m->mothurOut("Example cluster.fragments(fasta=amazon.fasta).\n"); + m->mothurOut("Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n\n"); + } + catch(exception& e) { + m->errorOut(e, "ClusterFragmentsCommand", "help"); + exit(1); + } +} +//********************************************************************************************************************** +int ClusterFragmentsCommand::execute(){ + try { + + if (abort == true) { return 0; } + + int start = time(NULL); + + //reads fasta file and return number of seqs + int numSeqs = readFASTA(); //fills alignSeqs and makes all seqs active + + if (m->control_pressed) { return 0; } + + if (numSeqs == 0) { m->mothurOut("Error reading fasta file...please correct."); m->mothurOutEndLine(); return 0; } + + //sort seqs by length of unaligned sequence + sort(alignSeqs.begin(), alignSeqs.end(), comparePriority); + + int count = 0; + + //think about running through twice... + for (int i = 0; i < numSeqs; i++) { + + if (alignSeqs[i].active) { //this sequence has not been merged yet + + string iBases = alignSeqs[i].seq.getUnaligned(); + + //try to merge it with all smaller seqs + for (int j = i+1; j < numSeqs; j++) { + + if (m->control_pressed) { return 0; } + + if (alignSeqs[j].active) { //this sequence has not been merged yet + + string jBases = alignSeqs[j].seq.getUnaligned(); + + int pos = iBases.find(jBases); + + if (pos != string::npos) { + //merge + alignSeqs[i].names += ',' + alignSeqs[j].names; + alignSeqs[i].numIdentical += alignSeqs[j].numIdentical; + + alignSeqs[j].active = 0; + alignSeqs[j].numIdentical = 0; + count++; + } + }//end if j active + }//end if i != j + + //remove from active list + alignSeqs[i].active = 0; + + }//end if active i + if(i % 100 == 0) { m->mothurOut(toString(i) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); m->mothurOutEndLine(); } + } + + if(numSeqs % 100 != 0) { m->mothurOut(toString(numSeqs) + "\t" + toString(numSeqs - count) + "\t" + toString(count)); m->mothurOutEndLine(); } + + + string fileroot = outputDir + m->getRootName(m->getSimpleName(fastafile)); + + string newFastaFile = fileroot + "fragclust.fasta"; + string newNamesFile = fileroot + "names"; + + if (m->control_pressed) { return 0; } + + m->mothurOutEndLine(); + m->mothurOut("Total number of sequences before cluster.fragments was " + toString(alignSeqs.size()) + "."); m->mothurOutEndLine(); + m->mothurOut("cluster.fragments removed " + toString(count) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); + + printData(newFastaFile, newNamesFile); + + m->mothurOut("It took " + toString(time(NULL) - start) + " secs to cluster " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); + + if (m->control_pressed) { remove(newFastaFile.c_str()); remove(newNamesFile.c_str()); return 0; } + + m->mothurOutEndLine(); + m->mothurOut("Output File Names: "); m->mothurOutEndLine(); + m->mothurOut(newFastaFile); m->mothurOutEndLine(); + m->mothurOut(newNamesFile); m->mothurOutEndLine(); + m->mothurOutEndLine(); + + return 0; + + } + catch(exception& e) { + m->errorOut(e, "ClusterFragmentsCommand", "execute"); + exit(1); + } +} + +/**************************************************************************************************/ +int ClusterFragmentsCommand::readFASTA(){ + try { + + ifstream inFasta; + m->openInputFile(fastafile, inFasta); + + while (!inFasta.eof()) { + + if (m->control_pressed) { inFasta.close(); return 0; } + + Sequence seq(inFasta); m->gobble(inFasta); + + if (seq.getName() != "") { //can get "" if commented line is at end of fasta file + if (namefile != "") { + itSize = sizes.find(seq.getName()); + + if (itSize == sizes.end()) { m->mothurOut(seq.getName() + " is not in your names file, please correct."); m->mothurOutEndLine(); exit(1); } + else{ + seqRNode tempNode(itSize->second, seq, names[seq.getName()], seq.getUnaligned().length()); + alignSeqs.push_back(tempNode); + } + }else { //no names file, you are identical to yourself + seqRNode tempNode(1, seq, seq.getName(), seq.getUnaligned().length()); + alignSeqs.push_back(tempNode); + } + } + } + + inFasta.close(); + return alignSeqs.size(); + } + + catch(exception& e) { + m->errorOut(e, "ClusterFragmentsCommand", "readFASTA"); + exit(1); + } +} +/**************************************************************************************************/ +void ClusterFragmentsCommand::printData(string newfasta, string newname){ + try { + ofstream outFasta; + ofstream outNames; + + m->openOutputFile(newfasta, outFasta); + m->openOutputFile(newname, outNames); + + for (int i = 0; i < alignSeqs.size(); i++) { + if (alignSeqs[i].numIdentical != 0) { + alignSeqs[i].seq.printSequence(outFasta); + outNames << alignSeqs[i].seq.getName() << '\t' << alignSeqs[i].names << endl; + } + } + + outFasta.close(); + outNames.close(); + } + catch(exception& e) { + m->errorOut(e, "ClusterFragmentsCommand", "printData"); + exit(1); + } +} +/**************************************************************************************************/ + +void ClusterFragmentsCommand::readNameFile(){ + try { + ifstream in; + m->openInputFile(namefile, in); + string firstCol, secondCol; + + while (!in.eof()) { + in >> firstCol >> secondCol; m->gobble(in); + names[firstCol] = secondCol; + int size = 1; + + for(int i=0;ierrorOut(e, "ClusterFragmentsCommand", "readNameFile"); + exit(1); + } +} + +/**************************************************************************************************/ + diff --git a/clusterfragmentscommand.h b/clusterfragmentscommand.h new file mode 100644 index 0000000..9bdbc44 --- /dev/null +++ b/clusterfragmentscommand.h @@ -0,0 +1,55 @@ +#ifndef CLUSTERFRAGMENTSCOMMAND_H +#define CLUSTERFRAGMENTSCOMMAND_H + +/* + * clusterfragmentscommand.h + * Mothur + * + * Created by westcott on 9/23/10. + * Copyright 2010 Schloss Lab. All rights reserved. + * + */ + + +#include "command.hpp" +#include "sequence.hpp" + +/************************************************************/ +struct seqRNode { + int numIdentical; + int length; + Sequence seq; + string names; + bool active; + seqRNode() {} + seqRNode(int n, Sequence s, string nm, int l) : numIdentical(n), seq(s), names(nm), active(1), length(l) {} + ~seqRNode() {} +}; +/************************************************************/ + +class ClusterFragmentsCommand : public Command { + +public: + ClusterFragmentsCommand(string); + ~ClusterFragmentsCommand(); + int execute(); + void help(); + +private: + bool abort; + string fastafile, namefile, outputDir; + vector alignSeqs; + map names; //represents the names file first column maps to second column + map sizes; //this map a seq name to the number of identical seqs in the names file + map::iterator itSize; + + int readFASTA(); + void readNameFile(); + void printData(string, string); //fasta filename, names file name + +}; + +/************************************************************/ + +#endif + diff --git a/commandfactory.cpp b/commandfactory.cpp index 45f8906..4651a07 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -88,6 +88,7 @@ #include "normalizesharedcommand.h" #include "metastatscommand.h" #include "splitgroupscommand.h" +#include "clusterfragmentscommand.h" /*******************************************************/ @@ -180,6 +181,7 @@ CommandFactory::CommandFactory(){ commands["normalize.shared"] = "normalize.shared"; commands["metastats"] = "metastats"; commands["split.groups"] = "split.groups"; + commands["cluster.fragments"] = "cluster.fragments"; commands["classify.seqs"] = "MPIEnabled"; commands["dist.seqs"] = "MPIEnabled"; commands["filter.seqs"] = "MPIEnabled"; @@ -315,6 +317,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "normalize.shared") { command = new NormalizeSharedCommand(optionString); } else if(commandName == "metastats") { command = new MetaStatsCommand(optionString); } else if(commandName == "split.groups") { command = new SplitGroupCommand(optionString); } + else if(commandName == "cluster.fragments") { command = new ClusterFragmentsCommand(optionString); } else { command = new NoCommand(optionString); } return command; diff --git a/kmerdb.cpp b/kmerdb.cpp index 2926593..108b207 100644 --- a/kmerdb.cpp +++ b/kmerdb.cpp @@ -110,7 +110,7 @@ void KmerDB::generateDB(){ m->openOutputFile(kmerDBName, kmerFile); // to a file //output version - kmerFile << m->getVersion() << endl; + kmerFile << "#" << m->getVersion() << endl; for(int i=0;i Venn::getPic(SAbundVector* sabund, vector vCalcs) { outsvg << "The upper bound of the confidence interval is " + toString(data[2]) + "\n"; } + if (nseqs) { + outsvg << "The number of sequences represented is " + toString(sabund->getNumSeqs()) + "\n"; + } + outsvg << "\n\n"; outsvg.close(); } @@ -102,8 +106,6 @@ vector Venn::getPic(vector lookup, vectorgetName() == "sharedace") { singleCalc = new Ace(10); - }else if (vCalcs[i]->getName() == "nseqs") { - singleCalc = new NSeqs(); } vector data = singleCalc->getValues(sabund); @@ -122,7 +124,11 @@ vector Venn::getPic(vector lookup, vectorThe lower bound of the confidence interval is " + toString(data[1]) + "\n"; outsvg << "The upper bound of the confidence interval is " + toString(data[2]) + "\n"; } - + + if (nseqs) { + outsvg << "The number of sequences represented is " + toString(sabund->getNumSeqs()) + "\n"; + } + outsvg << "\n\n"; outsvg.close(); delete singleCalc; @@ -151,63 +157,71 @@ vector Venn::getPic(vector lookup, vectorcontrol_pressed) { outsvg.close(); return outputNames; } //get estimates for sharedAB - vector results = vCalcs[i]->getValues(subset); + vector shared = vCalcs[i]->getValues(subset); //in essence you want to run it like a single if (vCalcs[i]->getName() == "sharedsobs") { singleCalc = new Sobs(); }else if (vCalcs[i]->getName() == "sharedchao") { singleCalc = new Chao1(); - }else if (vCalcs[i]->getName() == "nseqs") { - singleCalc = new NSeqs(); }//else if (vCalcs[i]->getName() == "sharedace") { //singleCalc = new Ace(10); //} + int sharedVal, numSeqsA, numSeqsB; + if (nseqs) { + NSeqs* nseqsCalc = new NSeqs(); + vector data = nseqsCalc->getValues(lookup); + + sharedVal = data[0] + data[1]; + numSeqsA = sabundA->getNumSeqs(); + numSeqsB = sabundB->getNumSeqs(); + + delete nseqsCalc; + } + + //get estimates for numA - vector resultsA = singleCalc->getValues(sabundA); + vector numA = singleCalc->getValues(sabundA); //get estimates for numB - vector resultsB = singleCalc->getValues(sabundB); - - double numA, numB, shared; - if (vCalcs[i]->getName() == "nseqs") { - shared = results[0] + results[1]; //add both groups sequences - numA = resultsA[0] - results[0]; //what's in A - the number of species in A that are in shared otus - numB = resultsB[0] - results[1]; //what's in B - the number of species in B that are in shared otus - }else{ - shared = results[0]; - numA = resultsA[0] - shared; - numB = resultsB[0] - shared; - } + vector numB = singleCalc->getValues(sabundB); //image window outsvg << "\n"; outsvg << "\n"; - + //draw circles outsvg << ""; - outsvg << "Venn Diagram at distance " + lookup[0]->getLabel() + "\n"; + outsvg << "Venn Diagram at distance " + lookup[0]->getLabel() + "\n"; outsvg << ""; outsvg << ""; - outsvg << "" + toString(numA) + "\n"; - outsvg << "" + toString(numB) + "\n"; + outsvg << "" + toString(numA[0] - shared[0]) + "\n"; + outsvg << "" + toString(numB[0] - shared[0]) + "\n"; outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.25 * height)) + "\">" + lookup[0]->getGroup() + "\n"; outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.25 * height)) + "\">" + lookup[1]->getGroup() + "\n"; - outsvg << "" + toString(shared) + "\n"; - outsvg << "The number of species in group " + lookup[0]->getGroup() + " is " + toString(resultsA[0]); - if (resultsA.size() == 3) { - outsvg << " the lci is " + toString(resultsA[1]) + " and the hci is " + toString(resultsA[2]) + "\n"; - }else { outsvg << "\n"; } + outsvg << "" + toString(shared[0]) + "\n"; + outsvg << "The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA[0]); + if (numA.size() == 3) { + outsvg << " the lci is " + toString(numA[1]) + " and the hci is " + toString(numA[2]); + } + if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsA); } + outsvg << "\n"; - outsvg << "The number of species in group " + lookup[1]->getGroup() + " is " + toString(resultsB[0]); - if (resultsB.size() == 3) { - outsvg << " the lci is " + toString(resultsB[1]) + " and the hci is " + toString(resultsB[2]) + "\n"; - }else { outsvg << "\n"; } - - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(shared) + "\n"; - outsvg << "Percentage of species that are shared in groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString((shared / (float)(numA + numB + shared))*100) + "\n"; - outsvg << "The total richness for all groups is " + toString((float)(numA + numB + shared)) + "\n"; + outsvg << "The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB[0]); + if (numB.size() == 3) { + outsvg << " the lci is " + toString(numB[1]) + " and the hci is " + toString(numB[2]); + } + if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsB); } + outsvg << "\n"; + + outsvg << "The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(shared[0]); + if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedVal); } + outsvg << "\n"; + + outsvg << "Percentage of species that are shared in groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString((shared[0] / (float)(numA[0] + numB[0] - shared[0]))*100) + "\n"; + outsvg << "The total richness for all groups is " + toString((float)(numA[0] + numB[0] - shared[0]))+ "\n";; + //close file outsvg << "\n\n"; @@ -219,6 +233,8 @@ vector Venn::getPic(vector lookup, vector Venn::getPic(vector lookup, vectorcontrol_pressed) { outsvg.close(); return outputNames; } + int sharedVal, sharedABVal, sharedACVal, sharedBCVal, numSeqsA, numSeqsB, numSeqsC; + + if (nseqs) { + NSeqs* nseqsCalc = new NSeqs(); + vector sharedData = nseqsCalc->getValues(lookup); + + vector mysubset; mysubset.push_back(lookup[0]); mysubset.push_back(lookup[1]); + vector sharedAB = nseqsCalc->getValues(mysubset); + + mysubset.clear(); mysubset.push_back(lookup[0]); mysubset.push_back(lookup[2]); + vector sharedAC = nseqsCalc->getValues(mysubset); + + mysubset.clear(); mysubset.push_back(lookup[1]); mysubset.push_back(lookup[2]); + vector sharedBC = nseqsCalc->getValues(mysubset); + + sharedVal = sharedData[0] + sharedData[1] + sharedData[2]; + sharedABVal = sharedAB[0] + sharedAB[1]; + sharedACVal = sharedAC[0] + sharedAC[1]; + sharedBCVal = sharedBC[0] + sharedBC[1]; + numSeqsA = sabundA->getNumSeqs(); + numSeqsB = sabundB->getNumSeqs(); + numSeqsC = sabundC->getNumSeqs(); + + delete nseqsCalc; + } + + if (vCalcs[i]->getName() == "sharedace") { singleCalc = new Ace(10); @@ -270,8 +313,7 @@ vector Venn::getPic(vector lookup, vector sharedCwithAB; //find possible sharedABC values - double sharedABC1 = 0.0; double sharedABC2 = 0.0; double sharedABC3 = 0.0; double sharedABC = 0.0; - vector = resultsNseqs; + float sharedABC1 = 0.0; float sharedABC2 = 0.0; float sharedABC3 = 0.0; float sharedABC = 0.0; if (vCalcs[i]->getMultiple() == false) { //merge BC and estimate with shared with A @@ -322,74 +364,72 @@ vector Venn::getPic(vector lookup, vector data = vCalcs[i]->getValues(lookup); - resultsNseqs = data; sharedABC = data[0]; sharedAwithBC.push_back(sharedAB[0] + sharedAC[0] - sharedABC); sharedBwithAC.push_back(sharedAB[0] + sharedBC[0] - sharedABC); sharedCwithAB.push_back(sharedAC[0] + sharedBC[0] - sharedABC); } - double numA, numB, numC, numAB, numAC, numBC, numABC; - if (vCalcs[i]->getName() == "nseqs") { - numABC = resultsNseqs[0] + resultsNseqs[1] + resultsNseqs[2]; - numBC = sharedBC[0] + sharedBC[1] - (resultsNseqs[1] + resultsNseqs[2]); - - }else{ - numABC = sharedABC; - numBC = sharedBC[0] - sharedABC; - numAB = sharedAB[0] - sharedABC; - numAC = sharedAC[0] - sharedABC; - numA = numA[0]-sharedAwithBC[0]; - numB = numB[0]-sharedBwithAC[0]; - numC = numC[0]-sharedCwithAB[0]; - } - //image window - outsvg << "\n"; + outsvg << "\n"; outsvg << "\n"; //draw circles outsvg << ""; - outsvg << "Venn Diagram at distance " + lookup[0]->getLabel() + "\n"; + outsvg << "Venn Diagram at distance " + lookup[0]->getLabel() + "\n"; outsvg << ""; outsvg << ""; outsvg << ""; //place labels within overlaps - outsvg << "" + toString(numA[0]-sharedAwithBC[0]) + "\n"; - outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.19 * height)) + "\">" + lookup[0]->getGroup() + "\n"; - outsvg << "" + toString(sharedAB[0] - sharedABC) + "\n"; - outsvg << "" + toString(numB[0]-sharedBwithAC[0]) + "\n"; - outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.19 * height)) + "\">" + lookup[1]->getGroup() + "\n"; + outsvg << "" + toString(numA[0]-sharedAwithBC[0]) + "\n"; + outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.19 * height)) + "\">" + lookup[0]->getGroup() + "\n"; + outsvg << "" + toString(sharedAB[0] - sharedABC) + "\n"; + outsvg << "" + toString(numB[0]-sharedBwithAC[0]) + "\n"; + outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.19 * height)) + "\">" + lookup[1]->getGroup() + "\n"; outsvg << "" + toString(sharedAC[0] - sharedABC) + "\n"; outsvg << "" + toString(numC[0]-sharedCwithAB[0]) + "\n"; - outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.52 * height)) + "\">" + lookup[2]->getGroup() + "\n"; - outsvg << "" + toString(sharedBC[0] - sharedABC) + "\n"; - outsvg << "" + toString(sharedABC) + "\n"; + outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.52 * height)) + "\">" + lookup[2]->getGroup() + "\n"; + outsvg << "" + toString(sharedBC[0] - sharedABC) + "\n"; + outsvg << "" + toString(sharedABC) + "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedAB[0]) + "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedAC[0]) + "\n"; - outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedBC[0]) + "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and combined groups " + lookup[1]->getGroup() + lookup[2]->getGroup() + " is " + toString(sharedAwithBC[0]) + "\n"; - outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + " and combined groups " + lookup[0]->getGroup() + lookup[2]->getGroup() + " is " + toString(sharedBwithAC[0]) + "\n"; - outsvg << "The number of species shared between groups " + lookup[2]->getGroup() + " and combined groups " + lookup[0]->getGroup() + lookup[1]->getGroup() + " is " + toString(sharedCwithAB[0]) + "\n"; - outsvg << "The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA[0]); + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedAB[0]); + if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedABVal); } + outsvg << "\n"; + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedAC[0]); + if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedACVal); } + outsvg << "\n"; + outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedBC[0]); + if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedBCVal); } + outsvg << "\n"; + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and combined groups " + lookup[1]->getGroup() + lookup[2]->getGroup() + " is " + toString(sharedAwithBC[0]) + "\n"; + outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + " and combined groups " + lookup[0]->getGroup() + lookup[2]->getGroup() + " is " + toString(sharedBwithAC[0]) + "\n"; + outsvg << "The number of species shared between groups " + lookup[2]->getGroup() + " and combined groups " + lookup[0]->getGroup() + lookup[1]->getGroup() + " is " + toString(sharedCwithAB[0]) + "\n"; + outsvg << "The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA[0]); if (numA.size() == 3) { - outsvg << " the lci is " + toString(numA[1]) + " and the hci is " + toString(numA[2]) + "\n"; - }else { outsvg << "\n"; } + outsvg << " the lci is " + toString(numA[1]) + " and the hci is " + toString(numA[2]); + } + if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsA); } + outsvg << "\n"; - outsvg << "The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB[0]); + outsvg << "The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB[0]); if (numB.size() == 3) { - outsvg << " the lci is " + toString(numB[1]) + " and the hci is " + toString(numB[2]) + "\n"; - }else { outsvg << "\n"; } + outsvg << " the lci is " + toString(numB[1]) + " and the hci is " + toString(numB[2]); + } + if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsB); } + outsvg << "\n"; - outsvg << "The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC[0]); + outsvg << "The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC[0]); if (numC.size() == 3) { - outsvg << " the lci is " + toString(numC[1]) + " and the hci is " + toString(numC[2]) + "\n"; - }else { outsvg << "\n"; } + outsvg << " the lci is " + toString(numC[1]) + " and the hci is " + toString(numC[2]); + } + if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsC); } + outsvg << "\n"; - outsvg << "The total richness of all the groups is " + toString(numA[0] + numB[0] + numC[0] - sharedAB[0] - sharedAC[0] - sharedBC[0] + sharedABC) + "\n"; - outsvg << "The total shared richness is " + toString(sharedABC) + "\n"; + outsvg << "The total richness of all the groups is " + toString(numA[0] + numB[0] + numC[0] - sharedAB[0] - sharedAC[0] - sharedBC[0] + sharedABC) + "\n"; + outsvg << "The total shared richness is " + toString(sharedABC); + if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedVal); } + outsvg << "\n"; delete singleCalc; @@ -428,52 +468,65 @@ vector Venn::getPic(vector lookup, vector sharedabc = vCalcs[i]->getValues(subset); //image window - outsvg << "\n"; + outsvg << "\n"; outsvg << "\n"; //draw circles outsvg << ""; - outsvg << "Venn Diagram at distance " + lookup[0]->getLabel() + "\n"; + outsvg << "Venn Diagram at distance " + lookup[0]->getLabel() + "\n"; outsvg << ""; outsvg << ""; outsvg << ""; //place labels within overlaps - outsvg << "" + toString(numA[0]-sharedab[0]-sharedac[0]+sharedabc[0]) + "\n"; - outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.19 * height)) + "\">" + lookup[0]->getGroup() + "\n"; - outsvg << "" + toString(sharedab[0] - sharedabc[0]) + "\n"; - outsvg << "" + toString(numB[0]-sharedab[0]-sharedbc[0]+sharedabc[0]) + "\n"; - outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.19 * height)) + "\">" + lookup[1]->getGroup() + "\n"; - outsvg << "" + toString(sharedac[0] - sharedabc[0]) + "\n"; - outsvg << "" + toString(numC[0]-sharedac[0]-sharedbc[0]+sharedabc[0]) + "\n"; - outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.51 * height)) + "\">" + lookup[2]->getGroup() + "\n"; - outsvg << "" + toString(sharedbc[0] - sharedabc[0]) + "\n"; - outsvg << "" + toString(sharedabc[0]) + "\n"; + outsvg << "" + toString(numA[0]-sharedab[0]-sharedac[0]+sharedabc[0]) + "\n"; + outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.19 * height)) + "\">" + lookup[0]->getGroup() + "\n"; + outsvg << "" + toString(sharedab[0] - sharedabc[0]) + "\n"; + outsvg << "" + toString(numB[0]-sharedab[0]-sharedbc[0]+sharedabc[0]) + "\n"; + outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.19 * height)) + "\">" + lookup[1]->getGroup() + "\n"; + outsvg << "" + toString(sharedac[0] - sharedabc[0]) + "\n"; + outsvg << "" + toString(numC[0]-sharedac[0]-sharedbc[0]+sharedabc[0]) + "\n"; + outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.51 * height)) + "\">" + lookup[2]->getGroup() + "\n"; + outsvg << "" + toString(sharedbc[0] - sharedabc[0]) + "\n"; + outsvg << "" + toString(sharedabc[0]) + "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedab[0]) + "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedac[0]) + "\n"; - outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedbc[0]) + "\n"; - outsvg << "The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA[0]); + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedab[0]); + if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedABVal); } + outsvg << "\n"; + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedac[0]); + if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedACVal); } + outsvg << "\n"; + outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedbc[0]); + if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedBCVal); } + outsvg << "\n"; + + outsvg << "The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA[0]); if (numA.size() == 3) { - outsvg << " the lci is " + toString(numA[1]) + " and the hci is " + toString(numA[2]) + "\n"; - }else { outsvg << "\n"; } + outsvg << " the lci is " + toString(numA[1]) + " and the hci is " + toString(numA[2]); + } + if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsA); } + outsvg << "\n"; - outsvg << "The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB[0]); + outsvg << "The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB[0]); if (numB.size() == 3) { - outsvg << " the lci is " + toString(numB[1]) + " and the hci is " + toString(numB[2]) + "\n"; - }else { outsvg << "\n"; } - - outsvg << "The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC[0]); + outsvg << " the lci is " + toString(numB[1]) + " and the hci is " + toString(numB[2]); + } + if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsB); } + outsvg << "\n"; + + outsvg << "The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC[0]); if (numC.size() == 3) { - outsvg << " the lci is " + toString(numC[1]) + " and the hci is " + toString(numC[2]) + "\n"; - }else { outsvg << "\n"; } - - outsvg << "The total richness of all the groups is " + toString(numA[0] + numB[0] + numC[0] - sharedab[0] - sharedac[0] - sharedbc[0] + sharedabc[0]) + "\n"; - outsvg << "The total shared richness is " + toString(sharedabc[0]) + "\n"; + outsvg << " the lci is " + toString(numC[1]) + " and the hci is " + toString(numC[2]); + } + if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsC); } + outsvg << "\n"; + outsvg << "The total richness of all the groups is " + toString(numA[0] + numB[0] + numC[0] - sharedab[0] - sharedac[0] - sharedbc[0] + sharedabc[0]) + "\n"; + outsvg << "The total shared richness is " + toString(sharedabc[0]); + if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedVal); } + outsvg << "\n"; } - //close file outsvg << "\n\n"; @@ -487,6 +540,8 @@ vector Venn::getPic(vector lookup, vector Venn::getPic(vector lookup, vectorgetName() != "sharedsobs") && (vCalcs[i]->getName() != "sharedchao") && (vCalcs[i]->getName() != "nseqs")) { m->mothurOut(vCalcs[i]->getName() + " is not a valid calculator with four groups. It will be disregarded. "); m->mothurOutEndLine(); } + if ((vCalcs[i]->getName() != "sharedsobs") && (vCalcs[i]->getName() != "sharedchao")) { m->mothurOut(vCalcs[i]->getName() + " is not a valid calculator with four groups. It will be disregarded. "); m->mothurOutEndLine(); } else{ string filenamesvg = outputDir + m->getRootName(m->getSimpleName(globaldata->inputFileName)) + lookup[0]->getLabel() + "." + vCalcs[i]->getName() + "." + lookup[0]->getGroup() + "-" + lookup[1]->getGroup() + "-" + lookup[2]->getGroup() + "-" + lookup[3]->getGroup() + ".svg"; outputNames.push_back(filenamesvg); @@ -521,10 +576,7 @@ vector Venn::getPic(vector lookup, vectorgetName() == "sharedchao") { singleCalc = new Chao1(); - }else if (vCalcs[i]->getName() == "nseqs") { - singleCalc = new NSeqs(); } - //get estimates for numA data = singleCalc->getValues(sabundA); @@ -601,31 +653,112 @@ vector Venn::getPic(vector lookup, vectorgetValues(lookup); sharedABCD = data[0]; //cout << "num abcd = " << sharedABCD << endl << endl; - - + int sharedVal, sharedABCVal, sharedABDVal, sharedACDVal, sharedBCDVal, sharedABVal, sharedACVal, sharedADVal, sharedBCVal, sharedBDVal, sharedCDVal, numSeqsA, numSeqsB, numSeqsC, numSeqsD; + + if (nseqs) { + NSeqs* nseqsCalc = new NSeqs(); + vector sharedData = nseqsCalc->getValues(lookup); + + vector mysubset; mysubset.push_back(lookup[0]); mysubset.push_back(lookup[1]); + vector sharedAB = nseqsCalc->getValues(mysubset); + + mysubset.clear(); mysubset.push_back(lookup[0]); mysubset.push_back(lookup[2]); + vector sharedAC = nseqsCalc->getValues(mysubset); + + mysubset.clear(); mysubset.push_back(lookup[0]); mysubset.push_back(lookup[3]); + vector sharedAD = nseqsCalc->getValues(mysubset); + + mysubset.clear(); mysubset.push_back(lookup[1]); mysubset.push_back(lookup[2]); + vector sharedBC = nseqsCalc->getValues(mysubset); + + mysubset.clear(); mysubset.push_back(lookup[1]); mysubset.push_back(lookup[3]); + vector sharedBD = nseqsCalc->getValues(mysubset); + + mysubset.clear(); mysubset.push_back(lookup[2]); mysubset.push_back(lookup[3]); + vector sharedCD = nseqsCalc->getValues(mysubset); + + mysubset.clear(); mysubset.push_back(lookup[0]); mysubset.push_back(lookup[1]); mysubset.push_back(lookup[2]); + vector sharedABC = nseqsCalc->getValues(mysubset); + + mysubset.clear(); mysubset.push_back(lookup[0]); mysubset.push_back(lookup[1]); mysubset.push_back(lookup[3]); + vector sharedABD = nseqsCalc->getValues(mysubset); + + mysubset.clear(); mysubset.push_back(lookup[0]); mysubset.push_back(lookup[2]); mysubset.push_back(lookup[3]); + vector sharedACD = nseqsCalc->getValues(mysubset); + + mysubset.clear(); mysubset.push_back(lookup[1]); mysubset.push_back(lookup[2]); mysubset.push_back(lookup[3]); + vector sharedBCD = nseqsCalc->getValues(mysubset); + + sharedVal = sharedData[0] + sharedData[1] + sharedData[2] + sharedData[3]; + sharedABCVal = sharedABC[0] + sharedABC[1] + sharedABC[2]; + sharedABDVal = sharedABD[0] + sharedABD[1] + sharedABD[2]; + sharedACDVal = sharedACD[0] + sharedACD[1] + sharedACD[2]; + sharedBCDVal = sharedBCD[0] + sharedBCD[1] + sharedBCD[2]; + sharedABVal = sharedAB[0] + sharedAB[1]; + sharedACVal = sharedAC[0] + sharedAC[1]; + sharedADVal = sharedAD[0] + sharedAD[1]; + sharedBCVal = sharedBC[0] + sharedBC[1]; + sharedBDVal = sharedBD[0] + sharedBD[1]; + sharedCDVal = sharedCD[0] + sharedCD[1]; + numSeqsA = sabundA->getNumSeqs(); + numSeqsB = sabundB->getNumSeqs(); + numSeqsC = sabundC->getNumSeqs(); + numSeqsD = sabundD->getNumSeqs(); + + delete nseqsCalc; + } + //image window - outsvg << "\n"; + outsvg << "\n"; outsvg << "\n"; - outsvg << ""; + outsvg << ""; outsvg << "Venn Diagram at distance " + lookup[0]->getLabel() + "\n"; - outsvg << "The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA) + "\n"; - outsvg << "The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB) + "\n"; - outsvg << "The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC) + "\n"; - outsvg << "The number of species in group " + lookup[3]->getGroup() + " is " + toString(numD) + "\n"; + outsvg << "The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA); + if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsA); } + outsvg << "\n"; + outsvg << "The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB); + if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsB); } + outsvg << "\n"; + outsvg << "The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC); + if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsC); } + outsvg << "\n"; + outsvg << "The number of species in group " + lookup[3]->getGroup() + " is " + toString(numD); + if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsD); } + outsvg << "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedAB) + "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedAC) + "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedAD) + "\n"; - outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedBC) + "\n"; - outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedBD) + "\n"; - outsvg << "The number of species shared between groups " + lookup[2]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedCD) + "\n"; + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedAB); + if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedABVal); } + outsvg << "\n"; + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedAC); + if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedACVal); } + outsvg << "\n"; + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedAD); + if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedADVal); } + outsvg << "\n"; + outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedBC); + if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedBCVal); } + outsvg << "\n"; + outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedBD); + if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedBDVal); } + outsvg << "\n"; + outsvg << "The number of species shared between groups " + lookup[2]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedCD); + if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedCDVal); } + outsvg << "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + ", " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedABC) + "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + ", " + lookup[1]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedABD) + "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + ", " + lookup[2]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedACD) + "\n"; - outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + ", " + lookup[2]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedBCD) + "\n"; + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + ", " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedABC); + if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedABCVal); } + outsvg << "\n"; + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + ", " + lookup[1]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedABD); + if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedABDVal); } + outsvg << "\n"; + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + ", " + lookup[2]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedACD); + if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedACDVal); } + outsvg << "\n"; + outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + ", " + lookup[2]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedBCD); + if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedBCDVal); } + outsvg << "\n"; //make adjustments sharedABC = sharedABC - sharedABCD; @@ -682,11 +815,10 @@ vector Venn::getPic(vector lookup, vector" + toString(sharedABC) + "\n"; outsvg << "" + toString(sharedABCD) + "\n"; + outsvg << "The total richness of all the groups is " + toString((float)(numA + numB + numC + numD + sharedAB + sharedAC + sharedAD + sharedBC + sharedBD + sharedCD + sharedABC + sharedABD + sharedACD + sharedBCD + sharedABCD)); + if (nseqs) { outsvg << ", and the number of squences in the otus shared by all groups is " + toString(sharedVal); } + outsvg << "\n"; - - outsvg << "The total richness of all the groups is " + toString((float)(numA + numB + numC + numD + sharedAB + sharedAC + sharedAD + sharedBC + sharedBD + sharedCD + sharedABC + sharedABD + sharedACD + sharedBCD + sharedABCD)) + "\n"; - - outsvg << "\n\n"; outsvg.close(); delete singleCalc; diff --git a/venn.h b/venn.h index d3a3f05..33935e6 100644 --- a/venn.h +++ b/venn.h @@ -20,7 +20,7 @@ class Venn { public: - Venn(string); + Venn(string, bool); ~Venn(){}; vector getPic(SAbundVector*, vector); @@ -32,6 +32,7 @@ private: string groupComb, outputDir; ofstream outsvg; MothurOut* m; + bool nseqs; }; /***********************************************************************/ diff --git a/venncommand.cpp b/venncommand.cpp index 8b617d8..5fa06aa 100644 --- a/venncommand.cpp +++ b/venncommand.cpp @@ -32,7 +32,7 @@ VennCommand::VennCommand(string option) { else { //valid paramters for this command - string AlignArray[] = {"groups","label","calc", "abund","outputdir","inputdir"}; + string AlignArray[] = {"groups","label","calc", "abund","nseqs","outputdir","inputdir"}; vector myArray (AlignArray, AlignArray+(sizeof(AlignArray)/sizeof(string))); OptionParser parser(option); @@ -95,6 +95,10 @@ VennCommand::VennCommand(string option) { string temp; temp = validParameter.validFile(parameters, "abund", false); if (temp == "not found") { temp = "10"; } convert(temp, abund); + + temp = validParameter.validFile(parameters, "nseqs", false); if (temp == "not found"){ temp = "f"; } + nseqs = m->isTrue(temp); + if (abort == false) { validCalculator = new ValidCalculators(); @@ -112,8 +116,6 @@ VennCommand::VennCommand(string option) { if(abund < 5) abund = 10; vennCalculators.push_back(new Ace(abund)); - }else if (Estimators[i] == "nseqs") { - vennCalculators.push_back(new NSeqs()); } } } @@ -126,8 +128,6 @@ VennCommand::VennCommand(string option) { vennCalculators.push_back(new SharedChao1()); }else if (Estimators[i] == "sharedace") { vennCalculators.push_back(new SharedAce()); - }else if (Estimators[i] == "nseqs") { - vennCalculators.push_back(new NSeqs()); } } } @@ -135,7 +135,7 @@ VennCommand::VennCommand(string option) { //if the users entered no valid calculators don't execute command if (vennCalculators.size() == 0) { m->mothurOut("No valid calculators given, please correct."); m->mothurOutEndLine(); abort = true; } - else { venn = new Venn(outputDir); } + else { venn = new Venn(outputDir, nseqs); } } } @@ -154,7 +154,7 @@ VennCommand::VennCommand(string option) { void VennCommand::help(){ try { m->mothurOut("The venn command can only be executed after a successful read.otu command.\n"); - m->mothurOut("The venn command parameters are groups, calc, abund and label. No parameters are required.\n"); + m->mothurOut("The venn command parameters are groups, calc, abund, nseqs and label. No parameters are required.\n"); m->mothurOut("The groups parameter allows you to specify which of the groups in your groupfile you would like included in your venn diagram, you may only use a maximum of 4 groups.\n"); m->mothurOut("The group names are separated by dashes. The label allows you to select what distance levels you would like a venn diagram created for, and are also separated by dashes.\n"); m->mothurOut("The venn command should be in the following format: venn(groups=yourGroups, calc=yourCalcs, label=yourLabels, abund=yourAbund).\n"); @@ -162,7 +162,8 @@ void VennCommand::help(){ m->mothurOut("The default value for groups is all the groups in your groupfile up to 4, and all labels in your inputfile will be used.\n"); m->mothurOut("The default value for calc is sobs if you have only read a list file or if you have selected only one group, and sharedsobs if you have multiple groups.\n"); m->mothurOut("The default available estimators for calc are sobs, chao and ace if you have only read a list file, and sharedsobs, sharedchao and sharedace if you have read a list and group file or a shared file.\n"); - m->mothurOut("The only estmiator available four 4 groups is sharedsobs.\n"); + m->mothurOut("The nseqs parameter will output the number of sequences represented by the otus in the picture, default=F.\n"); + m->mothurOut("The only estimators available four 4 groups are sharedsobs and sharedchao.\n"); m->mothurOut("The venn command outputs a .svg file for each calculator you specify at each distance you choose.\n"); m->mothurOut("Note: No spaces between parameter labels (i.e. groups), '=' and parameters (i.e.yourGroups).\n\n"); } diff --git a/venncommand.h b/venncommand.h index aa1e856..a159356 100644 --- a/venncommand.h +++ b/venncommand.h @@ -40,7 +40,7 @@ private: SAbundVector* sabund; int abund; - bool abort, allLines; + bool abort, allLines, nseqs; set labels; //holds labels to be used string format, groups, calc, label, outputDir; vector Estimators, Groups;