From a65438a717aba8cad1ff86c53279b191710b8810 Mon Sep 17 00:00:00 2001 From: Sarah Westcott Date: Fri, 28 Feb 2014 14:59:14 -0500 Subject: [PATCH] pcr.seqs bug fix. working on shannon range calc --- heatmap.cpp | 15 ++++++++++--- prcseqscommand.cpp | 4 ++-- shannonrange.cpp | 34 +++++++++++++++++++++++------ shannonrange.h | 4 ++-- sracommand.cpp | 53 ++++++++++++++++++++++++++++++++++------------ sracommand.h | 2 +- 6 files changed, 85 insertions(+), 27 deletions(-) diff --git a/heatmap.cpp b/heatmap.cpp index 514c7af..254b706 100644 --- a/heatmap.cpp +++ b/heatmap.cpp @@ -144,12 +144,17 @@ string HeatMap::getPic(vector lookup) { if (lookup[i]->getAbundance(j) != 0) { //don't want log value of 0. if (scaler == "log10") { - scaleRelAbund[i][j] = toHex(int(255 * log10(relAbund) / log10(maxRelAbund[i]))) + "0000"; + if (maxRelAbund[i] == 1) { maxRelAbund[i] -= 0.001; } + if (relAbund == 1) { relAbund -= 0.001; } + scaleRelAbund[i][j] = toHex(int(255 * log10(relAbund) / log10(maxRelAbund[i]))) + "0000"; }else if (scaler == "log2") { + if (maxRelAbund[i] == 1) { maxRelAbund[i] -= 0.001; } + if (relAbund == 1) { relAbund -= 0.001; } scaleRelAbund[i][j] = toHex(int(255 * log2(relAbund) / log2(maxRelAbund[i]))) + "0000"; }else if (scaler == "linear") { - scaleRelAbund[i][j] = toHex(int(255 * relAbund / maxRelAbund[i])) + "0000"; + scaleRelAbund[i][j] = toHex(int(255 * relAbund / maxRelAbund[i])) + "0000"; }else { //if user enters invalid scaler option. + if (maxRelAbund[i] == 1) { maxRelAbund[i] += 0.001; } scaleRelAbund[i][j] = toHex(int(255 * log10(relAbund / log10(maxRelAbund[i])))) + "0000"; } }else { scaleRelAbund[i][j] = "FFFFFF"; } @@ -455,11 +460,15 @@ string HeatMap::getPic(vector lookup) { if (lookup[i]->getAbundance(j) != 0) { //don't want log value of 0. if (scaler == "log10") { + if (maxRelAbund[i] == 1) { maxRelAbund[i] -= 0.001; } + if (relAbund == 1) { relAbund -= 0.001; } scaleRelAbund[i][j] = toHex(int(255 * log10(relAbund) / log10(maxRelAbund[i]))) + "0000"; }else if (scaler == "log2") { + if (maxRelAbund[i] == 1) { maxRelAbund[i] -= 0.001; } + if (relAbund == 1) { relAbund -= 0.001; } scaleRelAbund[i][j] = toHex(int(255 * log2(relAbund) / log2(maxRelAbund[i]))) + "0000"; }else if (scaler == "linear") { - scaleRelAbund[i][j] = toHex(int(255 * relAbund / maxRelAbund[i])) + "0000"; + scaleRelAbund[i][j] = toHex(int(255 * relAbund / maxRelAbund[i])) + "0000"; }else { //if user enters invalid scaler option. scaleRelAbund[i][j] = toHex(int(255 * log10(relAbund / log10(maxRelAbund[i])))) + "0000"; } diff --git a/prcseqscommand.cpp b/prcseqscommand.cpp index a1eae46..6a4df63 100644 --- a/prcseqscommand.cpp +++ b/prcseqscommand.cpp @@ -575,7 +575,7 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string - if (fileAligned) { + if (fileAligned && adjustNeeded) { //find pend - pend is the biggest ending value, but we must account for when we adjust the start. That adjustment may make the "new" end larger then the largest end. So lets find out what that "new" end will be. ifstream inLocations; m->openInputFile(locationsFile, inLocations); @@ -608,7 +608,7 @@ int PcrSeqsCommand::createProcesses(string filename, string goodFileName, string inLocations.close(); adjustDots(goodFileName, locationsFile, pstart, pend); - } + }else { m->mothurRemove(locationsFile); } return num; diff --git a/shannonrange.cpp b/shannonrange.cpp index f26f08a..2b8468b 100644 --- a/shannonrange.cpp +++ b/shannonrange.cpp @@ -10,18 +10,40 @@ /***********************************************************************/ -EstOutput RangeShannon::getValues(vector shared) { +EstOutput RangeShannon::getValues(SAbundVector* rank){ try { data.resize(3,0); double commSize = 1e20; + double sampleSize = rank->getNumSeqs(); - SAbundVector sabund1 = shared[0]->getSAbundVector(); - SAbundVector sabund2 = shared[1]->getSAbundVector(); + double aux = ceil(pow(sampleSize+1, (1/(double)3))); + double est0 = rank->get(1)+1; + if (aux > est0) { est0 = aux; } //est0 = max(rank->get(1)+1, aux) + + vector ests; + double numr = 0.0; + for (int i = 1; i < rank->getNumBins()-1; i++) { + + if (m->control_pressed) { break; } + + int abund = rank->get(i); + + if (abund != 0) { + + int abundNext = rank->get(i+1); + if (abundNext == 0) { numr = aux; } + else { + if (abundNext+1 > aux) { numr = abundNext+1; } //numr = max(abundNext+1, aux) + else { numr = aux; } + } + double denr = aux; + if (abund > aux) { denr = abund; } //denr = max(abund, aux) + ests.push_back((abund+1)*numr/denr); + } + } + numr = aux; - double sampleSize = 0; - for (int i = 0; i < sabund1.getNumBins(); i++) { sampleSize += (sabund1.get(i) * sabund2.get(i)); } - int aux = ceil(pow((sampleSize+1), 0.33333)); if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } diff --git a/shannonrange.h b/shannonrange.h index 34b73f4..2123e05 100644 --- a/shannonrange.h +++ b/shannonrange.h @@ -17,8 +17,8 @@ class RangeShannon : public Calculator { public: RangeShannon() : Calculator("rangeshannon", 3, false) {}; - EstOutput getValues(SAbundVector*) {return data;}; - EstOutput getValues(vector); + EstOutput getValues(SAbundVector*); + EstOutput getValues(vector) {return data;}; string getCitation() { return "http://www.mothur.org/wiki/rangeshannon"; } }; diff --git a/sracommand.cpp b/sracommand.cpp index 65ccda4..2c4c2b1 100644 --- a/sracommand.cpp +++ b/sracommand.cpp @@ -13,11 +13,11 @@ //********************************************************************************************************************** vector SRACommand::setParameters(){ try { - CommandParameter psff("sff", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","sra",false,false); parameters.push_back(psff); - CommandParameter pgroup("group", "InputTypes", "", "", "groupOligos", "none", "none","sra",false,false); parameters.push_back(pgroup); - CommandParameter poligos("oligos", "InputTypes", "", "", "groupOligos", "none", "none","sra",false,false); parameters.push_back(poligos); - CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","sra",false,false); parameters.push_back(pfile); - CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","sra",false,false); parameters.push_back(pfastq); + CommandParameter psff("sff", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(psff); + CommandParameter pgroup("group", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(pgroup); + CommandParameter poligos("oligos", "InputTypes", "", "", "groupOligos", "none", "none","",false,false); parameters.push_back(poligos); + CommandParameter pfile("file", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfile); + CommandParameter pfastq("fastq", "InputTypes", "", "", "sffFastQFile", "sffFastQFile", "none","xml",false,false); parameters.push_back(pfastq); //choose only one multiple options CommandParameter pplatform("platform", "Multiple", "454-???-???", "454", "", "", "","",false,false); parameters.push_back(pplatform); CommandParameter ppdiffs("pdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ppdiffs); @@ -43,9 +43,13 @@ vector SRACommand::setParameters(){ string SRACommand::getHelpString(){ try { string helpString = ""; - helpString += "The sra command creates a sequence read archive from sff or fastq files.\n"; - helpString += "The sra command parameters are: sff, fastqfiles, oligos, platform....\n"; - helpString += "The sffiles parameter is used to provide a file containing a \n"; + helpString += "The sra command creates the necessary files for a NCBI submission. The xml file and individual sff or fastq files parsed from the original sff or fastq file.\n"; + helpString += "The sra command parameters are: sff, fastq, file, oligos, pdiffs, bdiffs, ldiffs, sdiffs, tdiffs, group.\n"; + helpString += "The sff parameter is used to provide the original sff file.\n"; + helpString += "The fastq parameter is used to provide the original fastq file.\n"; + helpString += "The oligos parameter is used to provide an oligos file to parse your sff or fastq file by.\n"; + helpString += "The group parameter is used to provide the group file to parse your sff or fastq file by.\n"; + helpString += "The file parameter is used to provide a file containing a list of individual fastq or sff files.\n"; helpString += "The tdiffs parameter is used to specify the total number of differences allowed in the sequence. The default is pdiffs + bdiffs + sdiffs + ldiffs.\n"; helpString += "The bdiffs parameter is used to specify the number of differences allowed in the barcode. The default is 0.\n"; helpString += "The pdiffs parameter is used to specify the number of differences allowed in the primer. The default is 0.\n"; @@ -66,7 +70,7 @@ string SRACommand::getOutputPattern(string type) { try { string pattern = ""; - if (type == "sra") { pattern = "[filename],sra"; } + if (type == "xml") { pattern = "[filename],xml"; } else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; } return pattern; @@ -82,7 +86,7 @@ SRACommand::SRACommand(){ abort = true; calledHelp = true; setParameters(); vector tempOutNames; - outputTypes["sra"] = tempOutNames; + outputTypes["xml"] = tempOutNames; } catch(exception& e) { m->errorOut(e, "SRACommand", "SRACommand"); @@ -112,6 +116,8 @@ SRACommand::SRACommand(string option) { if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; } } + vector tempOutNames; + outputTypes["xml"] = tempOutNames; //if the user changes the input directory command factory will send this info to us in the output parameter string inputDir = validParameter.validFile(parameters, "inputdir", false); @@ -234,8 +240,6 @@ SRACommand::SRACommand(string option) { m->mothurConvert(temp, tdiffs); if(tdiffs == 0){ tdiffs = bdiffs + pdiffs + ldiffs + sdiffs; } - - } @@ -253,12 +257,13 @@ int SRACommand::execute(){ //parse files vector filesBySample; + isSFF = false; if (file != "") { readFile(filesBySample); } else if (sfffile != "") { parseSffFile(filesBySample); } else if (fastqfile != "") { parseFastqFile(filesBySample); } - + //create xml file //output files created by command @@ -277,6 +282,27 @@ int SRACommand::execute(){ //********************************************************************************************************************** int SRACommand::readFile(vector& files){ try { + files.clear(); + + ifstream in; + m->openInputFile(file, in); + + while(!in.eof()) { + + if (m->control_pressed) { break; } + + string filename; + in >> filename; m->gobble(in); + files.push_back(filename); + } + in.close(); + + if (!m->control_pressed) { + if (files.size() > 0) { + int pos = files[0].find(".sff"); + if (pos != string::npos) { isSFF = true; } //these files are sff files + } + } return 0; } @@ -288,6 +314,7 @@ int SRACommand::readFile(vector& files){ //********************************************************************************************************************** int SRACommand::parseSffFile(vector& files){ try { + isSFF = true; //run sffinfo to parse sff file into individual sampled sff files string commandString = "sff=" + sfffile; if (groupfile != "") { commandString += ", group=" + groupfile; } diff --git a/sracommand.h b/sracommand.h index 7bf764b..3f7f156 100644 --- a/sracommand.h +++ b/sracommand.h @@ -34,7 +34,7 @@ public: void help() { m->mothurOut(getHelpString()); } private: - bool abort; + bool abort, isSFF; int tdiffs, bdiffs, pdiffs, sdiffs, ldiffs; string sfffile, fastqfile, platform, outputDir, groupfile, file, oligosfile; vector outputNames; -- 2.39.2