From: westcott Date: Tue, 29 Nov 2011 17:21:17 +0000 (+0000) Subject: Added summary.qual command. Added fontsize parameter to heatmap.sim and venn commands X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=0718c426add1df69adcc24444ef45550fb262ca7 Added summary.qual command. Added fontsize parameter to heatmap.sim and venn commands --- diff --git a/Mothur.xcodeproj/project.pbxproj b/Mothur.xcodeproj/project.pbxproj index ff6df12..d1cb6d8 100644 --- a/Mothur.xcodeproj/project.pbxproj +++ b/Mothur.xcodeproj/project.pbxproj @@ -41,6 +41,7 @@ A74D369B137DAB8400332B0C /* viterbifast.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3684137DAB8300332B0C /* viterbifast.cpp */; }; A74D369C137DAB8400332B0C /* writechhit.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D3686137DAB8300332B0C /* writechhit.cpp */; }; A74D36B8137DAFAA00332B0C /* chimerauchimecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */; }; + A754149714840CF7005850D1 /* summaryqualcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A754149614840CF7005850D1 /* summaryqualcommand.cpp */; }; A75790591301749D00A30DAB /* homovacommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A75790581301749D00A30DAB /* homovacommand.cpp */; }; A7730EFF13967241007433A3 /* countseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7730EFE13967241007433A3 /* countseqscommand.cpp */; }; A774101414695AF60098E6AC /* shhhseqscommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A774101314695AF60098E6AC /* shhhseqscommand.cpp */; }; @@ -419,6 +420,8 @@ A74D3686137DAB8300332B0C /* writechhit.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = writechhit.cpp; sourceTree = ""; }; A74D36B6137DAFAA00332B0C /* chimerauchimecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = chimerauchimecommand.h; sourceTree = ""; }; A74D36B7137DAFAA00332B0C /* chimerauchimecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = chimerauchimecommand.cpp; sourceTree = ""; }; + A754149514840CF7005850D1 /* summaryqualcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = summaryqualcommand.h; sourceTree = ""; }; + A754149614840CF7005850D1 /* summaryqualcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = summaryqualcommand.cpp; sourceTree = ""; }; A75790571301749D00A30DAB /* homovacommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = homovacommand.h; sourceTree = ""; }; A75790581301749D00A30DAB /* homovacommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = homovacommand.cpp; sourceTree = ""; }; A7730EFD13967241007433A3 /* countseqscommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = countseqscommand.h; sourceTree = ""; }; @@ -1427,6 +1430,8 @@ A7E9B84F12D37EC400DA6239 /* subsamplecommand.cpp */, A7E9B85812D37EC400DA6239 /* summarycommand.h */, A7E9B85712D37EC400DA6239 /* summarycommand.cpp */, + A754149514840CF7005850D1 /* summaryqualcommand.h */, + A754149614840CF7005850D1 /* summaryqualcommand.cpp */, A7E9B85A12D37EC400DA6239 /* summarysharedcommand.h */, A7E9B85912D37EC400DA6239 /* summarysharedcommand.cpp */, A7FFB556142CA02C004884F2 /* summarytaxcommand.h */, @@ -2185,6 +2190,7 @@ A774101414695AF60098E6AC /* shhhseqscommand.cpp in Sources */, A774104814696F320098E6AC /* myseqdist.cpp in Sources */, A77410F614697C300098E6AC /* seqnoise.cpp in Sources */, + A754149714840CF7005850D1 /* summaryqualcommand.cpp in Sources */, ); runOnlyForDeploymentPostprocessing = 0; }; diff --git a/commandfactory.cpp b/commandfactory.cpp index b49df72..8c8fe3c 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -125,6 +125,7 @@ #include "summarytaxcommand.h" #include "chimeraperseuscommand.h" #include "shhhseqscommand.h" +#include "summaryqualcommand.h" /*******************************************************/ @@ -270,8 +271,8 @@ CommandFactory::CommandFactory(){ commands["sens.spec"] = "sens.spec"; commands["seq.error"] = "seq.error"; commands["summary.tax"] = "summary.tax"; + commands["summary.qual"] = "summary.qual"; commands["shhh.seqs"] = "shhh.seqs"; - commands["quit"] = "MPIEnabled"; } @@ -429,6 +430,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString){ else if(commandName == "count.groups") { command = new CountGroupsCommand(optionString); } else if(commandName == "clear.memory") { command = new ClearMemoryCommand(optionString); } else if(commandName == "summary.tax") { command = new SummaryTaxCommand(optionString); } + else if(commandName == "summary.qual") { command = new SummaryQualCommand(optionString); } else if(commandName == "chimera.perseus") { command = new ChimeraPerseusCommand(optionString); } else if(commandName == "shhh.seqs") { command = new ShhhSeqsCommand(optionString); } else { command = new NoCommand(optionString); } @@ -572,6 +574,7 @@ Command* CommandFactory::getCommand(string commandName, string optionString, str else if(commandName == "count.groups") { pipecommand = new CountGroupsCommand(optionString); } else if(commandName == "clear.memory") { pipecommand = new ClearMemoryCommand(optionString); } else if(commandName == "summary.tax") { pipecommand = new SummaryTaxCommand(optionString); } + else if(commandName == "summary.qual") { pipecommand = new SummaryQualCommand(optionString); } else if(commandName == "chimera.perseus") { pipecommand = new ChimeraPerseusCommand(optionString); } else if(commandName == "shhh.seqs") { pipecommand = new ShhhSeqsCommand(optionString); } else { pipecommand = new NoCommand(optionString); } @@ -703,6 +706,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "count.groups") { shellcommand = new CountGroupsCommand(); } else if(commandName == "clear.memory") { shellcommand = new ClearMemoryCommand(); } else if(commandName == "summary.tax") { shellcommand = new SummaryTaxCommand(); } + else if(commandName == "summary.qual") { shellcommand = new SummaryQualCommand(); } else if(commandName == "chimera.perseus") { shellcommand = new ChimeraPerseusCommand(); } else if(commandName == "shhh.seqs") { shellcommand = new ShhhSeqsCommand(); } else { shellcommand = new NoCommand(); } diff --git a/seqsummarycommand.h b/seqsummarycommand.h index 32947ca..d37d6f4 100644 --- a/seqsummarycommand.h +++ b/seqsummarycommand.h @@ -61,7 +61,7 @@ private: //custom data structure for threads to use. // This is passed by void pointer so it can be any data type // that can be passed using a single void pointer (LPVOID). -typedef struct seqSumData { +struct seqSumData { vector* startPosition; vector* endPosition; vector* seqLength; diff --git a/summaryqualcommand.cpp b/summaryqualcommand.cpp new file mode 100644 index 0000000..0969a12 --- /dev/null +++ b/summaryqualcommand.cpp @@ -0,0 +1,446 @@ +/* + * summaryqualcommand.cpp + * Mothur + * + * Created by westcott on 11/28/11. + * Copyright 2011 Schloss Lab. All rights reserved. + * + */ + +#include "summaryqualcommand.h" + + +//********************************************************************************************************************** +vector SummaryQualCommand::setParameters(){ + try { + CommandParameter pqual("qfile", "InputTypes", "", "", "none", "none", "none",false,true); parameters.push_back(pqual); + CommandParameter pname("name", "InputTypes", "", "", "none", "none", "none",false,false); parameters.push_back(pname); + CommandParameter pprocessors("processors", "Number", "", "1", "", "", "",false,false); parameters.push_back(pprocessors); + CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); + CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); + + vector myArray; + for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); } + return myArray; + } + catch(exception& e) { + m->errorOut(e, "SummaryQualCommand", "setParameters"); + exit(1); + } +} +//********************************************************************************************************************** +string SummaryQualCommand::getHelpString(){ + try { + string helpString = ""; + helpString += "The summary.qual command reads a quality file and an optional name file, and summarizes the quality information.\n"; + helpString += "The summary.tax command parameters are qfile, name and processors. qfile is required, unless you have a valid current quality file.\n"; + helpString += "The name parameter allows you to enter a name file associated with your quality file. \n"; + helpString += "The summary.qual command should be in the following format: \n"; + helpString += "summary.qual(qfile=yourQualityFile) \n"; + helpString += "Note: No spaces between parameter labels (i.e. qfile), '=' and parameters (i.e.yourQualityFile).\n"; + return helpString; + } + catch(exception& e) { + m->errorOut(e, "SummaryQualCommand", "getHelpString"); + exit(1); + } +} + +//********************************************************************************************************************** +SummaryQualCommand::SummaryQualCommand(){ + try { + abort = true; calledHelp = true; + setParameters(); + vector tempOutNames; + outputTypes["summary"] = tempOutNames; + } + catch(exception& e) { + m->errorOut(e, "SummaryQualCommand", "SummaryQualCommand"); + exit(1); + } +} +//*************************************************************************************************************** + +SummaryQualCommand::SummaryQualCommand(string option) { + try { + abort = false; calledHelp = false; + + //allow user to run help + if(option == "help") { help(); abort = true; calledHelp = true; } + else if(option == "citation") { citation(); abort = true; calledHelp = true;} + + else { + vector myArray = setParameters(); + + OptionParser parser(option); + map parameters = parser.getParameters(); + + ValidParameters validParameter; + map::iterator it; + + //check to make sure all parameters are valid for command + for (it = parameters.begin(); it != parameters.end(); it++) { + if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } + } + + //if the user changes the input directory command factory will send this info to us in the output parameter + string inputDir = validParameter.validFile(parameters, "inputdir", false); + if (inputDir == "not found"){ inputDir = ""; } + else { + string path; + it = parameters.find("qfile"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["qfile"] = inputDir + it->second; } + } + + it = parameters.find("name"); + //user has given a template file + if(it != parameters.end()){ + path = m->hasPath(it->second); + //if the user has not given a path then, add inputdir. else leave path alone. + if (path == "") { parameters["name"] = inputDir + it->second; } + } + } + + //initialize outputTypes + vector tempOutNames; + outputTypes["summary"] = tempOutNames; + + //check for required parameters + qualfile = validParameter.validFile(parameters, "qfile", true); + if (qualfile == "not open") { abort = true; } + else if (qualfile == "not found") { + qualfile = m->getQualFile(); + if (qualfile != "") { m->mothurOut("Using " + qualfile + " as input file for the qfile parameter."); m->mothurOutEndLine(); } + else { m->mothurOut("You have no current quality file and the qfile parameter is required."); m->mothurOutEndLine(); abort = true; } + }else { m->setQualFile(qualfile); } + + namefile = validParameter.validFile(parameters, "name", true); + if (namefile == "not open") { namefile = ""; abort = true; } + else if (namefile == "not found") { namefile = ""; } + else { m->setNameFile(namefile); } + + //if the user changes the output directory command factory will send this info to us in the output parameter + outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){ + outputDir = ""; + outputDir += m->hasPath(qualfile); //if user entered a file with a path then preserve it + } + + string temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); } + m->setProcessors(temp); + convert(temp, processors); + } + } + catch(exception& e) { + m->errorOut(e, "SummaryQualCommand", "SummaryQualCommand"); + exit(1); + } +} +//*************************************************************************************************************** +int SummaryQualCommand::execute(){ + try{ + + if (abort == true) { if (calledHelp) { return 0; } return 2; } + + int start = time(NULL); + int numSeqs = 0; + + vector position; + vector averageQ; + vector< vector > scores; + + if (m->control_pressed) { return 0; } + + if (namefile != "") { nameMap = m->readNames(namefile); } + + vector positions; +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + positions = m->divideFile(qualfile, processors); + for (int i = 0; i < (positions.size()-1); i++) { lines.push_back(linePair(positions[i], positions[(i+1)])); } +#else + if (processors == 1) { + lines.push_back(linePair(0, 1000)); + }else { + positions = m->setFilePosFasta(qualfile, numSeqs); + + //figure out how many sequences you have to process + int numSeqsPerProcessor = numSeqs / processors; + for (int i = 0; i < processors; i++) { + int startIndex = i * numSeqsPerProcessor; + if(i == (processors - 1)){ numSeqsPerProcessor = numSeqs - i * numSeqsPerProcessor; } + lines.push_back(linePair(positions[startIndex], numSeqsPerProcessor)); + } + } +#endif + + + if(processors == 1){ numSeqs = driverCreateSummary(position, averageQ, scores, qualfile, lines[0]); } + else{ numSeqs = createProcessesCreateSummary(position, averageQ, scores, qualfile); } + + if (m->control_pressed) { return 0; } + + //print summary file + string summaryFile = outputDir + m->getRootName(m->getSimpleName(qualfile)) + "qual.summary"; + printQual(summaryFile, position, averageQ, scores); + + if (m->control_pressed) { m->mothurRemove(summaryFile); return 0; } + + //output results to screen + cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint); + m->mothurOutEndLine(); + m->mothurOut("Position\tNumSeqs\tAverageQ"); m->mothurOutEndLine(); + for (int i = 0; i < position.size(); i+=100) { + float average = averageQ[i] / (float) position[i]; + cout << i << '\t' << position[i] << '\t' << average; + m->mothurOutJustToLog(toString(i) + "\t" + toString(position[i]) + "\t" + toString(average)); m->mothurOutEndLine(); + } + + m->mothurOutEndLine(); + m->mothurOut("It took " + toString(time(NULL) - start) + " secs to create the summary file for " + toString(numSeqs) + " sequences."); m->mothurOutEndLine(); m->mothurOutEndLine(); + m->mothurOutEndLine(); + m->mothurOut("Output File Name: "); m->mothurOutEndLine(); + m->mothurOut(summaryFile); m->mothurOutEndLine(); outputNames.push_back(summaryFile); outputTypes["summary"].push_back(summaryFile); + m->mothurOutEndLine(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SummaryQualCommand", "execute"); + exit(1); + } +} +/**************************************************************************************/ +int SummaryQualCommand::driverCreateSummary(vector& position, vector& averageQ, vector< vector >& scores, string filename, linePair filePos) { + try { + ifstream in; + m->openInputFile(filename, in); + + in.seekg(filePos.start); + + bool done = false; + int count = 0; + + while (!done) { + + if (m->control_pressed) { in.close(); return 1; } + + QualityScores current(in); m->gobble(in); + + if (current.getName() != "") { + + int num = 1; + if (namefile != "") { + //make sure this sequence is in the namefile, else error + map::iterator it = nameMap.find(current.getName()); + + if (it == nameMap.end()) { m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); m->mothurOutEndLine(); m->control_pressed = true; } + else { num = it->second; } + } + + vector thisScores = current.getQualityScores(); + + //resize to num of positions setting number of seqs with that size to 1 + if (position.size() < thisScores.size()) { position.resize(thisScores.size(), 0); } + if (averageQ.size() < thisScores.size()) { averageQ.resize(thisScores.size(), 0); } + if (scores.size() < thisScores.size()) { + scores.resize(thisScores.size()); + for (int i = 0; i < scores.size(); i++) { scores[i].resize(41, 0); } + } + + //increase counts of number of seqs with this position + //average is really the total, we will average in execute + for (int i = 0; i < thisScores.size(); i++) { + position[i] += num; + averageQ[i] += (thisScores[i] * num); //weighting for namesfile + if (thisScores[i] > 40) { m->mothurOut("[ERROR]: " + current.getName() + " has a quality scores of " + toString(thisScores[i]) + ", expecting values to be less than 40."); m->mothurOutEndLine(); m->control_pressed = true; } + else { scores[i][thisScores[i]] += num; } + } + + count += num; + } + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + unsigned long long pos = in.tellg(); + if ((pos == -1) || (pos >= filePos.end)) { break; } +#else + if (in.eof()) { break; } +#endif + } + + in.close(); + + return count; + } + catch(exception& e) { + m->errorOut(e, "SummaryQualCommand", "driverCreateSummary"); + exit(1); + } +} +/**************************************************************************************************/ +int SummaryQualCommand::createProcessesCreateSummary(vector& position, vector& averageQ, vector< vector >& scores, string filename) { + try { + int process = 1; + int numSeqs = 0; + processIDS.clear(); + +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) + + //loop through and create all the processes you want + while (process != processors) { + int pid = fork(); + + if (pid > 0) { + processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later + process++; + }else if (pid == 0){ + numSeqs = driverCreateSummary(position, averageQ, scores, qualfile, lines[process]); + + //pass numSeqs to parent + ofstream out; + string tempFile = qualfile + toString(getpid()) + ".num.temp"; + m->openOutputFile(tempFile, out); + + out << numSeqs << endl; + out << position.size() << endl; + for (int k = 0; k < position.size(); k++) { out << position[k] << '\t'; } out << endl; + for (int k = 0; k < averageQ.size(); k++) { out << averageQ[k] << '\t'; } out << endl; + for (int k = 0; k < scores.size(); k++) { + for (int j = 0; j < 41; j++) { + out << scores[k][j] << '\t'; + } + out << endl; + } + out << endl; + + out.close(); + + exit(0); + }else { + m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine(); + for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); } + exit(0); + } + } + + //do your part + numSeqs = driverCreateSummary(position, averageQ, scores, qualfile, lines[0]); + + //force parent to wait until all the processes are done + for (int i=0;iopenInputFile(tempFilename, in); + + int temp, tempNum; + in >> tempNum; m->gobble(in); numSeqs += tempNum; + in >> tempNum; m->gobble(in); + + if (position.size() < tempNum) { position.resize(tempNum, 0); } + if (averageQ.size() < tempNum) { averageQ.resize(tempNum, 0); } + if (scores.size() < tempNum) { + scores.resize(tempNum); + for (int i = 0; i < scores.size(); i++) { scores[i].resize(41, 0); } + } + + for (int k = 0; k < tempNum; k++) { in >> temp; position[k] += temp; } m->gobble(in); + for (int k = 0; k < tempNum; k++) { in >> temp; averageQ[k] += temp; } m->gobble(in); + for (int k = 0; k < tempNum; k++) { + for (int j = 0; j < 41; j++) { + in >> temp; scores[k][j] += temp; + m->gobble(in); + } + } + + in.close(); + m->mothurRemove(tempFilename); + } + +#else + ////////////////////////////////////////////////////////////////////////////////////////////////////// + //Windows version shared memory, so be careful when passing variables through the seqSumQualData struct. + //Above fork() will clone, so memory is separate, but that's not the case with windows, + //Taking advantage of shared memory to allow both threads to add info to vectors. + ////////////////////////////////////////////////////////////////////////////////////////////////////// + + vector pDataArray; + DWORD dwThreadIdArray[processors]; + HANDLE hThreadArray[processors]; + + //Create processor worker threads. + for( int i=0; icount; + CloseHandle(hThreadArray[i]); + delete pDataArray[i]; + } +#endif + return numSeqs; + } + catch(exception& e) { + m->errorOut(e, "SummaryQualCommand", "createProcessesCreateSummary"); + exit(1); + } +} +/**************************************************************************************************/ +int SummaryQualCommand::printQual(string sumFile, vector& position, vector& averageQ, vector< vector >& scores) { + try { + ofstream out; + m->openOutputFile(sumFile, out); + out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); + outputNames.push_back(sumFile); outputTypes["summary"].push_back(sumFile); + + //print headings + out << "Position\tnumSeqs\tAverageQ\t"; + for (int i = 0; i < 41; i++) { out << "q" << i << '\t'; } + out << endl; + + for (int i = 0; i < position.size(); i++) { + + if (m->control_pressed) { out.close(); return 0; } + + float average = averageQ[i] / (float) position[i]; + out << i << '\t' << position[i] << '\t' << average << '\t'; + + for (int j = 0; j < 41; j++) { + out << scores[i][j] << '\t'; + } + out << endl; + } + + out.close(); + + return 0; + } + catch(exception& e) { + m->errorOut(e, "SummaryQualCommand", "printQual"); + exit(1); + } +} + +/**************************************************************************************/ + + diff --git a/summaryqualcommand.h b/summaryqualcommand.h new file mode 100644 index 0000000..1ec3cf1 --- /dev/null +++ b/summaryqualcommand.h @@ -0,0 +1,163 @@ +#ifndef SUMMARYQUALCOMMAND_H +#define SUMMARYQUALCOMMAND_H + +/* + * summaryqualcommand.h + * Mothur + * + * Created by westcott on 11/28/11. + * Copyright 2011 Schloss Lab. All rights reserved. + * + */ + + +#include "command.hpp" +#include "qualityscores.h" + +/**************************************************************************************************/ + +class SummaryQualCommand : public Command { +public: + SummaryQualCommand(string); + SummaryQualCommand(); + ~SummaryQualCommand(){} + + vector setParameters(); + string getCommandName() { return "summary.qual"; } + string getCommandCategory() { return "Sequence Processing"; } + string getHelpString(); + string getCitation() { return "http://www.mothur.org/wiki/Summary.qual"; } + string getDescription() { return "summarize the quality of a set of sequences"; } + + int execute(); + void help() { m->mothurOut(getHelpString()); } + +private: + bool abort; + string qualfile, outputDir, namefile; + vector outputNames; + map nameMap; + int processors; + + struct linePair { + unsigned long long start; + unsigned long long end; + linePair(unsigned long long i, unsigned long long j) : start(i), end(j) {} + }; + + vector lines; + vector processIDS; + + int createProcessesCreateSummary(vector&, vector&, vector< vector >&, string); + int driverCreateSummary(vector&, vector&, vector< vector >&, string, linePair); + int printQual(string, vector&, vector&, vector< vector >&); +}; + +/**************************************************************************************************/ +//custom data structure for threads to use. +// This is passed by void pointer so it can be any data type +// that can be passed using a single void pointer (LPVOID). +struct seqSumQualData { + vector* position; + vector* averageQ; + vector< vector >* scores; + string filename, namefile; + unsigned long long start; + unsigned long long end; + int count; + MothurOut* m; + map nameMap; + + ~seqSumQualData(){} + seqSumQualData(vector* p, vector* a, vector< vector >* s, string f, MothurOut* mout, unsigned long long st, unsigned long long en, string n, map nam) { + position = p; + averageQ = a; + scores = s; + filename = f; + m = mout; + start = st; + end = en; + namefile = n; + nameMap = nam; + count = 0; + } +}; + +/**************************************************************************************************/ +#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) +#else +static DWORD WINAPI MySeqSumQualThreadFunction(LPVOID lpParam){ + seqSumQualData* pDataArray; + pDataArray = (seqSumQualData*)lpParam; + + try { + ifstream in; + pDataArray->m->openInputFile(pDataArray->filename, in); + + //print header if you are process 0 + if ((pDataArray->start == 0) || (pDataArray->start == 1)) { + in.seekg(0); + }else { //this accounts for the difference in line endings. + in.seekg(pDataArray->start-1); pDataArray->m->gobble(in); + } + + int count = 0; + for(int i = 0; i < pDataArray->end; i++){ //end is the number of sequences to process + + if (pDataArray->m->control_pressed) { in.close(); pDataArray->count = 1; return 1; } + + QualityScores current(in); pDataArray->m->gobble(in); + + if (current.getName() != "") { + + int num = 1; + if (pDataArray->namefile != "") { + //make sure this sequence is in the namefile, else error + map::iterator it = pDataArray->nameMap.find(current.getName()); + + if (it == pDataArray->nameMap.end()) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " is not in your namefile, please correct."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; } + else { num = it->second; } + } + + vector thisScores = current.getQualityScores(); + + //resize to num of positions setting number of seqs with that size to 1 + if (pDataArray->position->size() < thisScores.size()) { pDataArray->position->resize(thisScores.size(), 0); } + if (pDataArray->averageQ->size() < thisScores.size()) { pDataArray->averageQ->resize(thisScores.size(), 0); } + if (pDataArray->scores->size() < thisScores.size()) { + pDataArray->scores->resize(thisScores.size()); + for (int i = 0; i < pDataArray->scores->size(); i++) { pDataArray->scores->at(i).resize(41, 0); } + } + + //increase counts of number of seqs with this position + //average is really the total, we will average in execute + for (int i = 0; i < thisScores.size(); i++) { + pDataArray->position->at(i) += num; + pDataArray->averageQ->at(i) += (thisScores[i] * num); //weighting for namesfile + if (thisScores[i] > 40) { pDataArray->m->mothurOut("[ERROR]: " + current.getName() + " has a quality scores of " + toString(thisScores[i]) + ", expecting values to be less than 40."); pDataArray->m->mothurOutEndLine(); pDataArray->m->control_pressed = true; } + else { pDataArray->scores->at(i)[thisScores[i]] += num; } + } + + count += num; + } + } + + pDataArray->count = count; + in.close(); + + return 0; + + } + catch(exception& e) { + pDataArray->m->errorOut(e, "SummaryQualCommand", "MySeqSumQualThreadFunction"); + exit(1); + } +} +#endif + + +/**************************************************************************************************/ + + +#endif + diff --git a/venn.cpp b/venn.cpp index cd14785..32ebb3c 100644 --- a/venn.cpp +++ b/venn.cpp @@ -17,7 +17,7 @@ //********************************************************************************************************************** -Venn::Venn(string o, bool n, string f) : outputDir(o), nseqs(n), inputfile(f) { +Venn::Venn(string o, bool n, string f, int fs) : outputDir(o), nseqs(n), inputfile(f), fontSize(fs) { try { m = MothurOut::getInstance(); } @@ -49,17 +49,17 @@ vector Venn::getPic(SAbundVector* sabund, vector vCalcs) { outsvg << "\n"; outsvg << ""; - outsvg << "Venn Diagram at distance " + sabund->getLabel() + "\n"; + outsvg << "Venn Diagram at distance " + sabund->getLabel() + "\n"; outsvg << ""; - outsvg << "" + toString(data[0]) + "\n"; + outsvg << "" + toString(data[0]) + "\n"; if (data.size() == 3) { - outsvg << "The lower bound of the confidence interval is " + toString(data[1]) + "\n"; - outsvg << "The upper bound of the confidence interval is " + toString(data[2]) + "\n"; + outsvg << "The lower bound of the confidence interval is " + toString(data[1]) + "\n"; + outsvg << "The upper bound of the confidence interval is " + toString(data[2]) + "\n"; } if (nseqs) { - outsvg << "The number of sequences represented is " + toString(sabund->getNumSeqs()) + "\n"; + outsvg << "The number of sequences represented is " + toString(sabund->getNumSeqs()) + "\n"; } outsvg << "\n\n"; @@ -113,18 +113,18 @@ vector Venn::getPic(vector lookup, vector\n"; outsvg << ""; - outsvg << "Venn Diagram at distance " + lookup[0]->getLabel() + "\n"; + outsvg << "Venn Diagram at distance " + lookup[0]->getLabel() + "\n"; outsvg << ""; - outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.24 * height)) + "\">" + lookup[0]->getGroup() + "\n"; - outsvg << "" + toString(data[0]) + "\n"; + outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.24 * height)) + "\">" + lookup[0]->getGroup() + "\n"; + outsvg << "" + toString(data[0]) + "\n"; if (data.size() == 3) { - outsvg << "The lower bound of the confidence interval is " + toString(data[1]) + "\n"; - outsvg << "The upper bound of the confidence interval is " + toString(data[2]) + "\n"; + outsvg << "The lower bound of the confidence interval is " + toString(data[1]) + "\n"; + outsvg << "The upper bound of the confidence interval is " + toString(data[2]) + "\n"; } if (nseqs) { - outsvg << "The number of sequences represented is " + toString(sabund->getNumSeqs()) + "\n"; + outsvg << "The number of sequences represented is " + toString(sabund->getNumSeqs()) + "\n"; } outsvg << "\n\n"; @@ -193,35 +193,35 @@ vector Venn::getPic(vector lookup, vector"; - outsvg << "Venn Diagram at distance " + lookup[0]->getLabel() + "\n"; + outsvg << "Venn Diagram at distance " + lookup[0]->getLabel() + "\n"; outsvg << ""; outsvg << ""; - outsvg << "" + toString(numA[0] - shared[0]) + "\n"; - outsvg << "" + toString(numB[0] - shared[0]) + "\n"; - outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.25 * height)) + "\">" + lookup[0]->getGroup() + "\n"; - outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.25 * height)) + "\">" + lookup[1]->getGroup() + "\n"; - outsvg << "" + toString(shared[0]) + "\n"; - outsvg << "The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA[0]); + outsvg << "" + toString(numA[0] - shared[0]) + "\n"; + outsvg << "" + toString(numB[0] - shared[0]) + "\n"; + outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.25 * height)) + "\">" + lookup[0]->getGroup() + "\n"; + outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.25 * height)) + "\">" + lookup[1]->getGroup() + "\n"; + outsvg << "" + toString(shared[0]) + "\n"; + outsvg << "The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA[0]); if (numA.size() == 3) { outsvg << " the lci is " + toString(numA[1]) + " and the hci is " + toString(numA[2]); } if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsA) + "; " + toString(uniqSeqsToA) + " sequences are not shared"; } outsvg << "\n"; - outsvg << "The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB[0]); + outsvg << "The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB[0]); if (numB.size() == 3) { outsvg << " the lci is " + toString(numB[1]) + " and the hci is " + toString(numB[2]); } if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsB) + "; " + toString(uniqSeqsToB) + " sequences are not shared"; } outsvg << "\n"; - outsvg << "The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(shared[0]); + outsvg << "The number of sepecies shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(shared[0]); if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedVal) + "; " + toString((sharedVal / (float)(numSeqsA + numSeqsB))*100) + "% of these sequences are shared"; } outsvg << "\n"; - outsvg << "Percentage of species that are shared in groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString((shared[0] / (float)(numA[0] + numB[0] - shared[0]))*100) + "\n"; + outsvg << "Percentage of species that are shared in groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString((shared[0] / (float)(numA[0] + numB[0] - shared[0]))*100) + "\n"; - outsvg << "The total richness for all groups is " + toString((float)(numA[0] + numB[0] - shared[0]))+ "\n";; + outsvg << "The total richness for all groups is " + toString((float)(numA[0] + numB[0] - shared[0]))+ "\n";; //close file @@ -380,58 +380,58 @@ vector Venn::getPic(vector lookup, vector"; - outsvg << "Venn Diagram at distance " + lookup[0]->getLabel() + "\n"; + outsvg << "Venn Diagram at distance " + lookup[0]->getLabel() + "\n"; outsvg << ""; outsvg << ""; outsvg << ""; //place labels within overlaps - outsvg << "" + toString(numA[0]-sharedAwithBC[0]) + "\n"; - outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.19 * height)) + "\">" + lookup[0]->getGroup() + "\n"; - outsvg << "" + toString(sharedAB[0] - sharedABC) + "\n"; - outsvg << "" + toString(numB[0]-sharedBwithAC[0]) + "\n"; - outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.19 * height)) + "\">" + lookup[1]->getGroup() + "\n"; - outsvg << "" + toString(sharedAC[0] - sharedABC) + "\n"; - outsvg << "" + toString(numC[0]-sharedCwithAB[0]) + "\n"; - outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.52 * height)) + "\">" + lookup[2]->getGroup() + "\n"; - outsvg << "" + toString(sharedBC[0] - sharedABC) + "\n"; - outsvg << "" + toString(sharedABC) + "\n"; + outsvg << "" + toString(numA[0]-sharedAwithBC[0]) + "\n"; + outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.19 * height)) + "\">" + lookup[0]->getGroup() + "\n"; + outsvg << "" + toString(sharedAB[0] - sharedABC) + "\n"; + outsvg << "" + toString(numB[0]-sharedBwithAC[0]) + "\n"; + outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.19 * height)) + "\">" + lookup[1]->getGroup() + "\n"; + outsvg << "" + toString(sharedAC[0] - sharedABC) + "\n"; + outsvg << "" + toString(numC[0]-sharedCwithAB[0]) + "\n"; + outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.52 * height)) + "\">" + lookup[2]->getGroup() + "\n"; + outsvg << "" + toString(sharedBC[0] - sharedABC) + "\n"; + outsvg << "" + toString(sharedABC) + "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedAB[0]); + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedAB[0]); if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedABVal); } outsvg << "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedAC[0]); + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedAC[0]); if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedACVal); } outsvg << "\n"; - outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedBC[0]); + outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedBC[0]); if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedBCVal); } outsvg << "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and combined groups " + lookup[1]->getGroup() + lookup[2]->getGroup() + " is " + toString(sharedAwithBC[0]) + "\n"; - outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + " and combined groups " + lookup[0]->getGroup() + lookup[2]->getGroup() + " is " + toString(sharedBwithAC[0]) + "\n"; - outsvg << "The number of species shared between groups " + lookup[2]->getGroup() + " and combined groups " + lookup[0]->getGroup() + lookup[1]->getGroup() + " is " + toString(sharedCwithAB[0]) + "\n"; - outsvg << "The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA[0]); + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and combined groups " + lookup[1]->getGroup() + lookup[2]->getGroup() + " is " + toString(sharedAwithBC[0]) + "\n"; + outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + " and combined groups " + lookup[0]->getGroup() + lookup[2]->getGroup() + " is " + toString(sharedBwithAC[0]) + "\n"; + outsvg << "The number of species shared between groups " + lookup[2]->getGroup() + " and combined groups " + lookup[0]->getGroup() + lookup[1]->getGroup() + " is " + toString(sharedCwithAB[0]) + "\n"; + outsvg << "The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA[0]); if (numA.size() == 3) { outsvg << " the lci is " + toString(numA[1]) + " and the hci is " + toString(numA[2]); } if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsA) + "; " + toString(uniqSeqsToA) + " sequences are not shared"; } outsvg << "\n"; - outsvg << "The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB[0]); + outsvg << "The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB[0]); if (numB.size() == 3) { outsvg << " the lci is " + toString(numB[1]) + " and the hci is " + toString(numB[2]); } if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsB) + "; " + toString(uniqSeqsToB) + " sequences are not shared"; } outsvg << "\n"; - outsvg << "The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC[0]); + outsvg << "The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC[0]); if (numC.size() == 3) { outsvg << " the lci is " + toString(numC[1]) + " and the hci is " + toString(numC[2]); } if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsC) + "; " + toString(uniqSeqsToC) + " sequences are not shared"; } outsvg << "\n"; - outsvg << "The total richness of all the groups is " + toString(numA[0] + numB[0] + numC[0] - sharedAB[0] - sharedAC[0] - sharedBC[0] + sharedABC) + "\n"; - outsvg << "The total shared richness is " + toString(sharedABC); + outsvg << "The total richness of all the groups is " + toString(numA[0] + numB[0] + numC[0] - sharedAB[0] - sharedAC[0] - sharedBC[0] + sharedABC) + "\n"; + outsvg << "The total shared richness is " + toString(sharedABC); if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedVal); } outsvg << "\n"; @@ -477,56 +477,56 @@ vector Venn::getPic(vector lookup, vector"; - outsvg << "Venn Diagram at distance " + lookup[0]->getLabel() + "\n"; + outsvg << "Venn Diagram at distance " + lookup[0]->getLabel() + "\n"; outsvg << ""; outsvg << ""; outsvg << ""; //place labels within overlaps - outsvg << "" + toString(numA[0]-sharedab[0]-sharedac[0]+sharedabc[0]) + "\n"; - outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.19 * height)) + "\">" + lookup[0]->getGroup() + "\n"; - outsvg << "" + toString(sharedab[0] - sharedabc[0]) + "\n"; - outsvg << "" + toString(numB[0]-sharedab[0]-sharedbc[0]+sharedabc[0]) + "\n"; - outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.19 * height)) + "\">" + lookup[1]->getGroup() + "\n"; - outsvg << "" + toString(sharedac[0] - sharedabc[0]) + "\n"; - outsvg << "" + toString(numC[0]-sharedac[0]-sharedbc[0]+sharedabc[0]) + "\n"; - outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.51 * height)) + "\">" + lookup[2]->getGroup() + "\n"; - outsvg << "" + toString(sharedbc[0] - sharedabc[0]) + "\n"; - outsvg << "" + toString(sharedabc[0]) + "\n"; + outsvg << "" + toString(numA[0]-sharedab[0]-sharedac[0]+sharedabc[0]) + "\n"; + outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.19 * height)) + "\">" + lookup[0]->getGroup() + "\n"; + outsvg << "" + toString(sharedab[0] - sharedabc[0]) + "\n"; + outsvg << "" + toString(numB[0]-sharedab[0]-sharedbc[0]+sharedabc[0]) + "\n"; + outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.19 * height)) + "\">" + lookup[1]->getGroup() + "\n"; + outsvg << "" + toString(sharedac[0] - sharedabc[0]) + "\n"; + outsvg << "" + toString(numC[0]-sharedac[0]-sharedbc[0]+sharedabc[0]) + "\n"; + outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.51 * height)) + "\">" + lookup[2]->getGroup() + "\n"; + outsvg << "" + toString(sharedbc[0] - sharedabc[0]) + "\n"; + outsvg << "" + toString(sharedabc[0]) + "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedab[0]); + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedab[0]); if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedABVal); } outsvg << "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedac[0]); + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedac[0]); if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedACVal); } outsvg << "\n"; - outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedbc[0]); + outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedbc[0]); if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedBCVal); } outsvg << "\n"; - outsvg << "The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA[0]); + outsvg << "The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA[0]); if (numA.size() == 3) { outsvg << " the lci is " + toString(numA[1]) + " and the hci is " + toString(numA[2]); } if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsA); } outsvg << "\n"; - outsvg << "The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB[0]); + outsvg << "The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB[0]); if (numB.size() == 3) { outsvg << " the lci is " + toString(numB[1]) + " and the hci is " + toString(numB[2]); } if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsB); } outsvg << "\n"; - outsvg << "The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC[0]); + outsvg << "The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC[0]); if (numC.size() == 3) { outsvg << " the lci is " + toString(numC[1]) + " and the hci is " + toString(numC[2]); } if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsC); } outsvg << "\n"; - outsvg << "The total richness of all the groups is " + toString(numA[0] + numB[0] + numC[0] - sharedab[0] - sharedac[0] - sharedbc[0] + sharedabc[0]) + "\n"; - outsvg << "The total shared richness is " + toString(sharedabc[0]); + outsvg << "The total richness of all the groups is " + toString(numA[0] + numB[0] + numC[0] - sharedab[0] - sharedac[0] - sharedbc[0] + sharedabc[0]) + "\n"; + outsvg << "The total shared richness is " + toString(sharedabc[0]); if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedVal); } outsvg << "\n"; @@ -717,50 +717,50 @@ vector Venn::getPic(vector lookup, vector\n"; outsvg << "\n"; outsvg << ""; - outsvg << "Venn Diagram at distance " + lookup[0]->getLabel() + "\n"; + outsvg << "Venn Diagram at distance " + lookup[0]->getLabel() + "\n"; - outsvg << "The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA); + outsvg << "The number of species in group " + lookup[0]->getGroup() + " is " + toString(numA); if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsA); } outsvg << "\n"; - outsvg << "The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB); + outsvg << "The number of species in group " + lookup[1]->getGroup() + " is " + toString(numB); if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsB); } outsvg << "\n"; - outsvg << "The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC); + outsvg << "The number of species in group " + lookup[2]->getGroup() + " is " + toString(numC); if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsC); } outsvg << "\n"; - outsvg << "The number of species in group " + lookup[3]->getGroup() + " is " + toString(numD); + outsvg << "The number of species in group " + lookup[3]->getGroup() + " is " + toString(numD); if (nseqs) { outsvg << ", and the number of squences is " + toString(numSeqsD); } outsvg << "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedAB); + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[1]->getGroup() + " is " + toString(sharedAB); if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedABVal); } outsvg << "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedAC); + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedAC); if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedACVal); } outsvg << "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedAD); + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedAD); if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedADVal); } outsvg << "\n"; - outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedBC); + outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedBC); if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedBCVal); } outsvg << "\n"; - outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedBD); + outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedBD); if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedBDVal); } outsvg << "\n"; - outsvg << "The number of species shared between groups " + lookup[2]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedCD); + outsvg << "The number of species shared between groups " + lookup[2]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedCD); if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedCDVal); } outsvg << "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + ", " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedABC); + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + ", " + lookup[1]->getGroup() + " and " + lookup[2]->getGroup() + " is " + toString(sharedABC); if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedABCVal); } outsvg << "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + ", " + lookup[1]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedABD); + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + ", " + lookup[1]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedABD); if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedABDVal); } outsvg << "\n"; - outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + ", " + lookup[2]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedACD); + outsvg << "The number of species shared between groups " + lookup[0]->getGroup() + ", " + lookup[2]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedACD); if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedACDVal); } outsvg << "\n"; - outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + ", " + lookup[2]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedBCD); + outsvg << "The number of species shared between groups " + lookup[1]->getGroup() + ", " + lookup[2]->getGroup() + " and " + lookup[3]->getGroup() + " is " + toString(sharedBCD); if (nseqs) { outsvg << ", and the number of squences is " + toString(sharedBCDVal); } outsvg << "\n"; @@ -799,27 +799,27 @@ vector Venn::getPic(vector lookup, vector" + toString(numA) + "\n"; - outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.11 * height)) + "\">" + lookup[0]->getGroup() + "\n"; - outsvg << "" + toString(sharedAB) + "\n"; - outsvg << "" + toString(numB) + "\n"; - outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.11 * height)) + "\">" + lookup[1]->getGroup() + "\n"; - outsvg << "" + toString(sharedAC) + "\n"; - outsvg << "" + toString(numC) + "\n"; - outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.26 * height)) + "\">" + lookup[2]->getGroup() + "\n"; - outsvg << "" + toString(sharedBD) + "\n"; - outsvg << "" + toString(numD) + "\n"; - outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.26 * height)) + "\">" + lookup[3]->getGroup() + "\n"; - outsvg << "" + toString(sharedAD) + "\n"; - outsvg << "" + toString(sharedBC) + "\n"; - outsvg << "" + toString(sharedCD) + "\n"; - outsvg << "" + toString(sharedABD) + "\n"; - outsvg << "" + toString(sharedBCD) + "\n"; - outsvg << "" + toString(sharedACD) + "\n"; - outsvg << "" + toString(sharedABC) + "\n"; - outsvg << "" + toString(sharedABCD) + "\n"; + outsvg << "" + toString(numA) + "\n"; + outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.11 * height)) + "\">" + lookup[0]->getGroup() + "\n"; + outsvg << "" + toString(sharedAB) + "\n"; + outsvg << "" + toString(numB) + "\n"; + outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.11 * height)) + "\">" + lookup[1]->getGroup() + "\n"; + outsvg << "" + toString(sharedAC) + "\n"; + outsvg << "" + toString(numC) + "\n"; + outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.26 * height)) + "\">" + lookup[2]->getGroup() + "\n"; + outsvg << "" + toString(sharedBD) + "\n"; + outsvg << "" + toString(numD) + "\n"; + outsvg << "getGroup().length() / 2)) + "\" y=\"" + toString(int(0.26 * height)) + "\">" + lookup[3]->getGroup() + "\n"; + outsvg << "" + toString(sharedAD) + "\n"; + outsvg << "" + toString(sharedBC) + "\n"; + outsvg << "" + toString(sharedCD) + "\n"; + outsvg << "" + toString(sharedABD) + "\n"; + outsvg << "" + toString(sharedBCD) + "\n"; + outsvg << "" + toString(sharedACD) + "\n"; + outsvg << "" + toString(sharedABC) + "\n"; + outsvg << "" + toString(sharedABCD) + "\n"; - outsvg << "The total richness of all the groups is " + toString((float)(numA + numB + numC + numD + sharedAB + sharedAC + sharedAD + sharedBC + sharedBD + sharedCD + sharedABC + sharedABD + sharedACD + sharedBCD + sharedABCD)); + outsvg << "The total richness of all the groups is " + toString((float)(numA + numB + numC + numD + sharedAB + sharedAC + sharedAD + sharedBC + sharedBD + sharedCD + sharedABC + sharedABD + sharedACD + sharedBCD + sharedABCD)); if (nseqs) { outsvg << ", and the number of squences in the otus shared by all groups is " + toString(sharedVal); } outsvg << "\n"; diff --git a/venn.h b/venn.h index c5c5f6f..50d49d1 100644 --- a/venn.h +++ b/venn.h @@ -19,7 +19,7 @@ class Venn { public: - Venn(string, bool, string); + Venn(string, bool, string, int); ~Venn(){}; vector getPic(SAbundVector*, vector); @@ -31,6 +31,7 @@ private: ofstream outsvg; MothurOut* m; bool nseqs; + int fontSize; }; /***********************************************************************/ diff --git a/venncommand.cpp b/venncommand.cpp index 5a5b9b3..b5b401d 100644 --- a/venncommand.cpp +++ b/venncommand.cpp @@ -27,6 +27,7 @@ vector VennCommand::setParameters(){ CommandParameter pcalc("calc", "String", "", "", "", "", "",false,false); parameters.push_back(pcalc); CommandParameter pabund("abund", "Number", "", "10", "", "", "",false,false); parameters.push_back(pabund); CommandParameter pnseqs("nseqs", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(pnseqs); + CommandParameter pfontsize("fontsize", "Number", "", "24", "", "", "",false,false); parameters.push_back(pfontsize); CommandParameter ppermute("permute", "Boolean", "", "F", "", "", "",false,false); parameters.push_back(ppermute); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "",false,false); parameters.push_back(pinputdir); CommandParameter poutputdir("outputdir", "String", "", "", "", "", "",false,false); parameters.push_back(poutputdir); @@ -44,9 +45,10 @@ vector VennCommand::setParameters(){ string VennCommand::getHelpString(){ try { string helpString = ""; - helpString += "The venn command parameters are list, shared, groups, calc, abund, nseqs, permute and label. shared, relabund, list, rabund or sabund is required unless you have a valid current file.\n"; + helpString += "The venn command parameters are list, shared, groups, calc, abund, nseqs, permute, fontsize and label. shared, relabund, list, rabund or sabund is required unless you have a valid current file.\n"; helpString += "The groups parameter allows you to specify which of the groups in your groupfile you would like included in your venn diagram, you may only use a maximum of 4 groups.\n"; helpString += "The group names are separated by dashes. The label allows you to select what distance levels you would like a venn diagram created for, and are also separated by dashes.\n"; + helpString += "The fontsize parameter allows you to adjust the font size of the picture created, default=24.\n"; helpString += "The venn command should be in the following format: venn(groups=yourGroups, calc=yourCalcs, label=yourLabels, abund=yourAbund).\n"; helpString += "Example venn(groups=A-B-C, calc=sharedsobs-sharedchao, abund=20).\n"; helpString += "The default value for groups is all the groups in your groupfile up to 4, and all labels in your inputfile will be used.\n"; @@ -196,8 +198,11 @@ VennCommand::VennCommand(string option) { temp = validParameter.validFile(parameters, "nseqs", false); if (temp == "not found"){ temp = "f"; } nseqs = m->isTrue(temp); - temp = validParameter.validFile(parameters, "permute", false); if (temp == "not found"){ temp = "f"; } + temp = validParameter.validFile(parameters, "permute", false); if (temp == "not found"){ temp = "f"; } perm = m->isTrue(temp); + + temp = validParameter.validFile(parameters, "fontsize", false); if (temp == "not found") { temp = "24"; } + convert(temp, fontsize); } @@ -247,7 +252,7 @@ int VennCommand::execute(){ //if the users entered no valid calculators don't execute command if (vennCalculators.size() == 0) { m->mothurOut("No valid calculators given, please correct."); m->mothurOutEndLine(); return 0; } - venn = new Venn(outputDir, nseqs, inputfile); + venn = new Venn(outputDir, nseqs, inputfile, fontsize); input = new InputData(inputfile, format); string lastLabel; diff --git a/venncommand.h b/venncommand.h index 8d14262..ae3c121 100644 --- a/venncommand.h +++ b/venncommand.h @@ -40,7 +40,7 @@ private: vector lookup; set< set > combosOfFour; SAbundVector* sabund; - int abund; + int abund, fontsize; bool abort, allLines, nseqs, perm; set labels; //holds labels to be used