From: ryabin Date: Mon, 4 May 2009 22:20:46 +0000 (+0000) Subject: *** empty log message *** X-Git-Url: https://git.donarmstrong.com/?p=mothur.git;a=commitdiff_plain;h=c196b6b4768ccb84955d773ff0f22e4994d1ba7b *** empty log message *** --- diff --git a/bergerparker.cpp b/bergerparker.cpp index c9f3565..b900ce1 100644 --- a/bergerparker.cpp +++ b/bergerparker.cpp @@ -23,11 +23,11 @@ EstOutput BergerParker::getValues(SAbundVector* rank){ return data; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the NPShannon class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the BergerParker class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the NPShannon class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the BergerParker class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } diff --git a/bstick.cpp b/bstick.cpp index eabf087..c3ff58f 100644 --- a/bstick.cpp +++ b/bstick.cpp @@ -25,8 +25,7 @@ RAbundVector BStick::getRAbundVector(SAbundVector* rank){ int nb = 0; int ns = 0; - for(int i = rank->size()-1; i > 0; i--) - { + for(int i = rank->size()-1; i > 0; i--) { int cur = rank->get(i); if(mr == 1 && cur > 0) mr = i; @@ -55,8 +54,7 @@ EstOutput BStick::getValues(SAbundVector* rank){ double sumObs = 0; double maxDiff = 0; - for(int i = 0; i < rdata.size(); i++) - { + for(int i = 0; i < rdata.size(); i++) { sumObs += rdata.get(i); sumExp += numInd/numSpec*invSum(i+1,numSpec); double diff = fabs(sumObs-sumExp); @@ -64,6 +62,7 @@ EstOutput BStick::getValues(SAbundVector* rank){ maxDiff = diff; } + data[0] = maxDiff/numInd; data[1] = 0.886/sqrt(rdata.size()); data[2] = 1.031/sqrt(rdata.size()); @@ -71,6 +70,7 @@ EstOutput BStick::getValues(SAbundVector* rank){ /*cout << critVal << "\n"; cout << "If D-Statistic is less than the critical value then the data fits the Broken Stick model w/ 95% confidence.\n\n";*/ + if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } if (isnan(data[2]) || isinf(data[2])) { data[2] = 0; } @@ -78,11 +78,11 @@ EstOutput BStick::getValues(SAbundVector* rank){ return data; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the NPShannon class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the BStick class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the NPShannon class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the BStick class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } diff --git a/calculator.cpp b/calculator.cpp index 71aa99c..c45ddbf 100644 --- a/calculator.cpp +++ b/calculator.cpp @@ -863,7 +863,7 @@ double TDTable::getConfLimit(int row, int col) //Rows are the degrees of freedom //Found on http://www.vgtu.lt/leidiniai/elektroniniai/Probability.pdf/Table%203.pdf //Confidence Level .90 .95 .975 .99 .995 .999 .9995 - double values[33][7] = {{3.078, 6.314, 12.706, 31.821, 63.656, 318.289, 636.578}, + double values[30][7] = {{3.078, 6.314, 12.706, 31.821, 63.656, 318.289, 636.578}, {1.886, 2.920, 4.303, 6.965, 9.925, 22.328, 31.600}, {1.638, 2.353, 3.182, 4.541, 5.841, 10.214, 12.924}, {1.533, 2.132, 2.776, 3.747, 4.604, 7.173, 8.610}, @@ -892,10 +892,8 @@ double TDTable::getConfLimit(int row, int col) //Rows are the degrees of freedom {1.314, 1.703, 2.052, 2.473, 2.771, 3.421, 3.689}, {1.313, 1.701, 2.048, 2.467, 2.763, 3.408, 3.674}, {1.311, 1.699, 2.045, 2.462, 2.756, 3.396, 3.660}, - {1.310, 1.697, 2.042, 2.457, 2.750, 3.385, 3.646}, - {1.296, 1.671, 2.000, 2.390, 2.660, 3.232, 3.460}, - {1.289, 1.658, 1.980, 2.358, 2.617, 3.160, 3.373}, - {1.282, 1.645, 1.960, 2.326, 2.576, 3.091, 3.291}}; + {1.310, 1.697, 2.042, 2.457, 2.750, 3.385, 3.646}}; + return values[row][col]; } catch(exception& e) { diff --git a/collect.cpp b/collect.cpp index d109ca0..ee46ef5 100644 --- a/collect.cpp +++ b/collect.cpp @@ -111,7 +111,9 @@ try { //calculate at 0 and the given increment if((i == 0) || (i+1) % increment == 0){ + //how many comparisons to make i.e. for group a, b, c = ab, ac, bc. + int n = 1; for (int k = 0; k < (lookup.size() - 1); k++) { // pass cdd each set of groups to commpare for (int l = n; l < lookup.size(); l++) { diff --git a/collectcommand.cpp b/collectcommand.cpp index 2687f92..098ec1f 100644 --- a/collectcommand.cpp +++ b/collectcommand.cpp @@ -22,8 +22,12 @@ #include "logsd.h" #include "bergerparker.h" #include "bstick.h" +#include "goodscoverage.h" + + #include "coverage.h" + //********************************************************************************************************************** CollectCommand::CollectCommand(){ try { @@ -64,7 +68,9 @@ CollectCommand::CollectCommand(){ }else if (globaldata->Estimators[i] == "bergerparker") { cDisplays.push_back(new CollectDisplay(new BergerParker(), new OneColumnFile(fileNameRoot+"bergerparker"))); }else if (globaldata->Estimators[i] == "bstick") { - cDisplays.push_back(new CollectDisplay(new BStick(), new OneColumnFile(fileNameRoot+"bstick"))); + cDisplays.push_back(new CollectDisplay(new BStick(), new ThreeColumnFile(fileNameRoot+"bstick"))); + }else if (globaldata->Estimators[i] == "goodscoverage") { + cDisplays.push_back(new CollectDisplay(new GoodsCoverage(), new OneColumnFile(fileNameRoot+"goodscoverage"))); } } } diff --git a/collectsharedcommand.cpp b/collectsharedcommand.cpp index bc0609e..96e495a 100644 --- a/collectsharedcommand.cpp +++ b/collectsharedcommand.cpp @@ -29,6 +29,8 @@ #include "sharedlennon.h" #include "sharedmorisitahorn.h" #include "sharedbraycurtis.h" +#include "sharedjackknife.h" +#include "sharedwhittaker.h" @@ -74,6 +76,7 @@ CollectSharedCommand::CollectSharedCommand(){ cDisplays.push_back(new CollectDisplay(new Whittaker(), new SharedOneColumnFile(fileNameRoot+"whittaker"))); }else if (globaldata->Estimators[i] == "sharednseqs") { cDisplays.push_back(new CollectDisplay(new SharedNSeqs(), new SharedOneColumnFile(fileNameRoot+"shared.nseqs"))); + }else if (globaldata->Estimators[i] == "ochiai") { cDisplays.push_back(new CollectDisplay(new Ochiai(), new SharedOneColumnFile(fileNameRoot+"ochiai"))); }else if (globaldata->Estimators[i] == "anderberg") { diff --git a/commandfactory.cpp b/commandfactory.cpp index 1f6f828..e357e6c 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -11,6 +11,7 @@ #include "readdistcommand.h" #include "readtreecommand.h" #include "readotucommand.h" +#include "readseqscommand.h" #include "clustercommand.h" #include "parselistcommand.h" #include "collectcommand.h" @@ -31,6 +32,8 @@ #include "unifracweightedcommand.h" #include "libshuffcommand.h" #include "heatmapcommand.h" +#include "filterseqscommand.h" +#include "mothur.h" #include "venncommand.h" #include "nocommands.h" #include "binsequencecommand.h" @@ -63,6 +66,7 @@ Command* CommandFactory::getCommand(string commandName){ if(commandName == "read.dist") { command = new ReadDistCommand(); } else if(commandName == "read.otu") { command = new ReadOtuCommand(); } + else if(commandName == "read.seqs") { command = new ReadSeqsCommand(); } else if(commandName == "read.tree") { command = new ReadTreeCommand(); } else if(commandName == "cluster") { command = new ClusterCommand(); } else if(commandName == "deconvolute") { command = new DeconvoluteCommand(); } @@ -82,6 +86,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "get.line") { command = new GetlineCommand(); } else if(commandName == "libshuff") { command = new LibShuffCommand(); } else if(commandName == "heatmap") { command = new HeatMapCommand(); } + else if(commandName == "filter.seqs") { command = new FilterSeqsCommand(); } else if(commandName == "venn") { command = new VennCommand(); } else if(commandName == "bin.seqs") { command = new BinSeqCommand(); } else if(commandName == "get.oturep") { command = new GetOTURepCommand(); } diff --git a/errorchecking.cpp b/errorchecking.cpp index 728d4d2..eaf49df 100644 --- a/errorchecking.cpp +++ b/errorchecking.cpp @@ -23,6 +23,7 @@ ErrorCheck::ErrorCheck() { /******************************************************/ void ErrorCheck::refresh() { + //columnfile = globaldata->getColumnFile(); //phylipfile = globaldata->getPhylipFile(); //listfile = globaldata->getListFile(); @@ -38,6 +39,7 @@ void ErrorCheck::refresh() { //method = globaldata->getMethod(); //randomtree = globaldata->getRandomTree(); //sharedfile = globaldata->getSharedFile(); + } /*******************************************************/ @@ -74,7 +76,6 @@ bool ErrorCheck::checkInput(string input) { //is it a valid command if (validCommand->isValidCommand(commandName) != true) { return false; } - string parameter, value; //reads in parameters and values @@ -94,9 +95,11 @@ bool ErrorCheck::checkInput(string input) { if (parameter == "name" ) { namefile = value; } if (parameter == "order" ) { orderfile = value; } if (parameter == "fasta" ) { fastafile = value; } + if (parameter == "nexus" ) { nexusfile = value; } + if (parameter == "clustal" ) { clustalfile = value; } if (parameter == "tree" ) { treefile = value; } - if (parameter == "group" ) { groupfile = value; } - if (parameter == "shared" ) { sharedfile = value; } + if (parameter == "group" ) { groupfile = value; } + if (parameter == "shared" ) { sharedfile = value; } if (parameter == "cutoff" ) { cutoff = value; } if (parameter == "precision" ) { precision = value; } if (parameter == "iters" ) { iters = value; } @@ -107,10 +110,13 @@ bool ErrorCheck::checkInput(string input) { if (parameter == "line" ) { line = value; } if (parameter == "label" ) { label = value; } if (parameter == "abund" ) { abund = value; } - if (parameter == "random" ) { randomtree = value; } - if (parameter == "sorted" ) { sorted = value; } + if (parameter == "random" ) { randomtree = value; } + if (parameter == "sorted" ) { sorted = value; } + if (parameter == "trump" ) { trump = value; } + if (parameter == "soft" ) { soft = value; } + if (parameter == "filter" ) { filter = value; } if (parameter == "scale" ) { scale = value; } - + } //gets the last parameter and value @@ -131,6 +137,8 @@ bool ErrorCheck::checkInput(string input) { if (parameter == "group" ) { groupfile = value; } if (parameter == "shared" ) { sharedfile = value; } if (parameter == "fasta" ) { fastafile = value; } + if (parameter == "nexus" ) { nexusfile = value; } + if (parameter == "clustal" ) { clustalfile = value; } if (parameter == "tree" ) { treefile = value; } if (parameter == "cutoff" ) { cutoff = value; } if (parameter == "precision" ) { precision = value; } @@ -144,7 +152,11 @@ bool ErrorCheck::checkInput(string input) { if (parameter == "random" ) { randomtree = value; } if (parameter == "abund" ) { abund = value; } if (parameter == "sorted" ) { sorted = value; } + if (parameter == "trump" ) { trump = value; } + if (parameter == "soft" ) { soft = value; } + if (parameter == "filter" ) { filter = value; } if (parameter == "scale" ) { scale = value; } + } } @@ -172,6 +184,8 @@ bool ErrorCheck::checkInput(string input) { } }else if (commandName == "read.tree") { validateTreeFiles(); //checks the treefile and groupfile parameters + }else if (commandName == "read.seqs") { + if ((fastafile == "") && (nexusfile == "") && (clustalfile == "") && (phylipfile == "")) { cout << "You must enter a fastafile, nexusfile, or clustalfile with the read.seqs() command." << endl; return false; } }else if (commandName == "deconvolute") { if (fastafile == "") { cout << "You must enter a fastafile with the deconvolute() command." << endl; return false; } validateReadFiles(); @@ -230,6 +244,12 @@ bool ErrorCheck::checkInput(string input) { } } + if (commandName == "filter.seqs"){ + if ((globaldata->getFastaFile() == "") && (globaldata->getNexusFile() == "") && (globaldata->getClustalFile() == "") && (globaldata->getPhylipFile() == "")) { + cout << "You must read either a fasta, nexus, clustal, or phylip file before you can use the filter.seqs command." << endl; return false; + } + } + if ((commandName == "bin.seqs")) { if ((globaldata->getListFile() == "")) { cout << "You must read a list file before you can use the bin.seqs command." << endl; return false; } validateBinFiles(); @@ -551,6 +571,9 @@ void ErrorCheck::clear() { groupfile = ""; orderfile = ""; sharedfile = ""; + fastafile = ""; + nexusfile = ""; + clustalfile = ""; line = ""; label = ""; method = "furthest"; diff --git a/errorchecking.h b/errorchecking.h index 05ce5cf..bbc3050 100644 --- a/errorchecking.h +++ b/errorchecking.h @@ -33,8 +33,9 @@ class ErrorCheck { void validateBinFiles(); void clear(); void refresh(); - string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, treefile, sharedfile, cutoff, format; - string precision, method, fileroot, label, line, iters, jumble, freq, single, rarefaction, shared, summary, randomtree, abund, sorted, scale; + string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, nexusfile, clustalfile, treefile, sharedfile, cutoff, format; + string precision, method, fileroot, label, line, iters, jumble, freq, single, rarefaction, shared, summary, randomtree, abund, sorted, trump, soft, filter, scale; + string commandName, optionText; bool errorFree; diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp new file mode 100644 index 0000000..aff959a --- /dev/null +++ b/filterseqscommand.cpp @@ -0,0 +1,160 @@ +/* + * filterseqscommand.cpp + * Mothur + * + * Created by Thomas Ryabin on 5/4/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "filterseqscommand.h" +#include +#include + +/**************************************************************************************/ +void FilterSeqsCommand::doTrump() { + //trump = globaldata->getTrump(); +// +// for(int i = 0; i < db->size(); i++) { +// Sequence cur = db->get(i); +// string curAligned = cur.getAligned(); +// +// for(int j = 0; j < curAligned.length-1; j++) { +// string curChar = curAligned.substr(j, j+1); +// +// if(curChar.compare(trump) == 0) +// columnsToRemove[j] = true; +// } +// } +} + +/**************************************************************************************/ +void FilterSeqsCommand::doSoft() { + //soft = atoi(globaldata->getSoft().c_str()); +// vector > columnSymbolSums; +// vector > columnSymbols; +// for(int i = 0; i < db->get(0).getLength(); i++) { +// vector symbols; +// vector sums; +// columnSymbols[i] = symbols; +// columnSymbolSums[i] = sums; +// } +// +// for(int i = 0; i < db->size(); i++) { +// Sequence cur = db->get(i); +// string curAligned = cur.getAligned(); +// +// for(int j = 0; j < curAligned.length-1; j++) { +// string curChar = curAligned.substr(j, j+1); +// vector curColumnSymbols = columnSymbols[j]; +// +// bool newSymbol = true; +// +// for(int k = 0; j < curColumnSymbols.size(); j++) +// if(curChar.compare(curColumnSymbols[k]) == 0) { +// newSymbol = false; +// columnSymbolSums[j][k]++; +// } +// +// if(newSymbol) { +// columnSymbols.push_back(curChar); +// columnSymbolSums[j].push_back(1); +// } +// } +// } +// +// for(int i = 0; i < columnSymbolSums.size(); i++) { +// int totalSum = 0; +// int max = 0; +// vector curColumn = columnSymbolSums[i]; +// +// for(int j = 0; j < curColumn.size(); j++) { +// int curSum = curColumn[j]; +// if(curSum > max) +// max = curSum; +// totalSum += curSum; +// } +// +// if((double)max/(double)totalSum * 100 < soft) +// columnsToRemove[i] = true; +// } +} +void FilterSeqsCommand::doFilter() {} +/**************************************************************************************/ +int FilterSeqsCommand::execute() { + try { + globaldata = GlobalData::getInstance(); + filename = globaldata->inputFileName; + + if(globaldata->getFastaFile().compare("") != 0) { + readFasta = new ReadFasta(filename); + readFasta->read(); + db = readFasta->getDB(); + } + + else if(globaldata->getNexusFile().compare("") != 0) { + readNexus = new ReadNexus(filename); + readNexus->read(); + db = readNexus->getDB(); + } + + else if(globaldata->getClustalFile().compare("") != 0) { + readClustal = new ReadClustal(filename); + readClustal->read(); + db = readClustal->getDB(); + } + + else if(globaldata->getPhylipFile().compare("") != 0) { + readPhylip = new ReadPhylip(filename); + readPhylip->read(); + db = readPhylip->getDB(); + } + + for(int i = 0; i < db->get(0).getLength(); i++) + columnsToRemove[i] = false; + + // Trump + if(globaldata->getTrump().compare("") != 0) { + + + } + + // Soft + if(globaldata->getSoft().compare("") != 0) {} + + + + + // Filter + //if(globaldata->getFilter().compare("") != 0) { +// +// filter = globaldata->getFilter(); +// ifstream filehandle; +// openInputFile(filter, filehandle); +// +// char c; +// int count = 0; +// while(!filehandle.eof()) { +// c = filehandle.get(); +// if(c == '0') +// columnsToRemove[count] = true; +// count++; +// } +// } + + + + + + return 0; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the DeconvoluteCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the DeconvoluteCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/**************************************************************************************/ diff --git a/filterseqscommand.h b/filterseqscommand.h new file mode 100644 index 0000000..fffa457 --- /dev/null +++ b/filterseqscommand.h @@ -0,0 +1,48 @@ +#ifndef FILTERSEQSCOMMAND_H +#define FILTERSEQSCOMMAND_H + +/* + * filterseqscommand.h + * Mothur + * + * Created by Thomas Ryabin on 5/4/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "command.hpp" +#include "mothur.h" +#include "globaldata.hpp" +#include "readfasta.h" +#include "readnexus.h" +#include "readclustal.h" +#include "readseqsphylip.h" + +using namespace std; + +class FilterSeqsCommand : public Command { + +public: + FilterSeqsCommand() {}; + ~FilterSeqsCommand() {}; + int execute(); + +private: + void doTrump(); + void doSoft(); + void doFilter(); + + GlobalData* globaldata; + string filename, trump, filter; + + ReadFasta* readFasta; + ReadNexus* readNexus; + ReadClustal* readClustal; + ReadPhylip* readPhylip; + + vector columnsToRemove; + SequenceDB* db; + double soft; +}; + +#endif \ No newline at end of file diff --git a/geom.cpp b/geom.cpp index 755c1ec..a23352c 100644 --- a/geom.cpp +++ b/geom.cpp @@ -21,8 +21,7 @@ RAbundVector Geom::getRAbundVector(SAbundVector* rank){ int nb = 0; int ns = 0; - for(int i = rank->size()-1; i > 0; i--) - { + for(int i = rank->size()-1; i > 0; i--) { int cur = rank->get(i); if(mr == 1 && cur > 0) mr = i; @@ -51,8 +50,7 @@ EstOutput Geom::getValues(SAbundVector* rank){ double k = .5; double step = .49999; - while(fabs(min - numInd*kEq(k, (double)numSpec)) > .0001) //This uses a binary search to find the value of k. - { + while(fabs(min - numInd*kEq(k, (double)numSpec)) > .0001) { //This uses a binary search to find the value of k. if(numInd*kEq(k, numSpec) > min) k += step; else @@ -86,7 +84,7 @@ EstOutput Geom::getValues(SAbundVector* rank){ /*cout << critVal << "\n"; cout << "If D-Statistic is less than the critical value then the data fits the Geometric Series model w/ 95% confidence.\n\n";*/ - + if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } if (isnan(data[2]) || isinf(data[2])) { data[2] = 0; } @@ -94,11 +92,11 @@ EstOutput Geom::getValues(SAbundVector* rank){ return data; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the NPShannon class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the Geom class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the NPShannon class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the Geom class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } diff --git a/getlabelcommand.cpp b/getlabelcommand.cpp index 1e27267..6d92b8e 100644 --- a/getlabelcommand.cpp +++ b/getlabelcommand.cpp @@ -41,12 +41,10 @@ int GetlabelCommand::execute(){ string label; int numBins = 0; int count = -1; - while(in.good()) - { + while(in.good()) { if(count > numBins) count = 0; - if(count == 0) - { + if(count == 0) { cout << label << "\n"; in >> numBins; } diff --git a/getlinecommand.cpp b/getlinecommand.cpp index 09b1654..df14f51 100644 --- a/getlinecommand.cpp +++ b/getlinecommand.cpp @@ -41,12 +41,10 @@ int GetlineCommand::execute(){ int numBins = 0; int count = -1; int line = 1; - while(in.good()) - { + while(in.good()) { if(count > numBins) count = 0; - if(count == 0) - { + if(count == 0) { cout << line << "\n"; in >> numBins; line++; diff --git a/globaldata.cpp b/globaldata.cpp index ce10ee1..6849b88 100644 --- a/globaldata.cpp +++ b/globaldata.cpp @@ -54,7 +54,9 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "list" ) { listfile = value; inputFileName = value; fileroot = value; format = "list"; } if (key == "rabund" ) { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; } if (key == "sabund" ) { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; } - if (key == "fasta" ) { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; } + if (key == "fasta" ) { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; } + if (key == "nexus" ) { nexusfile = value; inputFileName = value; fileroot = value; format = "nexus"; } + if (key == "clustal" ) { clustalfile = value; inputFileName = value; fileroot = value; format = "clustal"; } if (key == "tree" ) { treefile = value; inputFileName = value; fileroot = value; format = "tree"; } if (key == "shared" ) { sharedfile = value; inputFileName = value; fileroot = value; format = "sharedfile"; } if (key == "name" ) { namefile = value; } @@ -73,10 +75,16 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "step") { step = value; } if (key == "form") { form = value; } if (key == "sorted") { sorted = value; } + if (key == "vertical") { vertical = value; } + if (key == "trump") { trump = value; } + if (key == "filter") { filter = value; } + if (key == "soft") { soft = value; } if (key == "scale") { scale = value; } + + if (key == "line") {//stores lines to be used in a set lines.clear(); labels.clear(); @@ -111,6 +119,8 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "rabund" ) { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; } if (key == "sabund" ) { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; } if (key == "fasta" ) { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; } + if (key == "nexus" ) { nexusfile = value; inputFileName = value; fileroot = value; format = "nexus"; } + if (key == "clustal" ) { clustalfile = value; inputFileName = value; fileroot = value; format = "clustal"; } if (key == "tree" ) { treefile = value; inputFileName = value; fileroot = value; format = "tree"; } if (key == "shared" ) { sharedfile = value; inputFileName = value; fileroot = value; format = "sharedfile"; } if (key == "name" ) { namefile = value; } @@ -129,8 +139,15 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "step") { step = value; } if (key == "form") { form = value; } if (key == "sorted") { sorted = value; } + if (key == "vertical") { vertical = value; } + if (key == "trump") { trump = value; } + if (key == "filter") { filter = value; } + if (key == "soft") { soft = value; } if (key == "scale") { scale = value; } + + + if (key == "line") {//stores lines to be used in a vector lines.clear(); @@ -163,6 +180,7 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ //input defaults for calculators if (commandName == "collect.single") { + if ((calc == "default") || (calc == "")) { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } Estimators.clear(); splitAtDash(calc, Estimators); @@ -173,6 +191,7 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ splitAtDash(calc, Estimators); } if (commandName == "collect.shared") { + if ((calc == "default") || (calc == "")) { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; } Estimators.clear(); splitAtDash(calc, Estimators); @@ -239,6 +258,8 @@ string GlobalData::getOrderFile() { return orderfile; } string GlobalData::getTreeFile() { return treefile; } string GlobalData::getSharedFile() { return sharedfile; } string GlobalData::getFastaFile() { return fastafile; } +string GlobalData::getNexusFile() { return nexusfile; } +string GlobalData::getClustalFile() { return clustalfile; } string GlobalData::getCutOff() { return cutoff; } string GlobalData::getFormat() { return format; } string GlobalData::getPrecision() { return precision; } @@ -253,7 +274,11 @@ string GlobalData::getGroups() { return groups; } string GlobalData::getStep() { return step; } string GlobalData::getForm() { return form; } string GlobalData::getSorted() { return sorted; } +string GlobalData::getTrump() { return trump; } +string GlobalData::getSoft() { return soft; } +string GlobalData::getFilter() { return filter; } string GlobalData::getScale() { return scale; } + void GlobalData::setListFile(string file) { listfile = file; inputFileName = file;} void GlobalData::setRabundFile(string file) { rabundfile = file; inputFileName = file;} void GlobalData::setSabundFile(string file) { sabundfile = file; inputFileName = file;} @@ -289,6 +314,8 @@ void GlobalData::clear() { groupfile = ""; orderfile = ""; fastafile = ""; + nexusfile = ""; + clustalfile = ""; treefile = ""; sharedfile = ""; cutoff = "10.00"; @@ -307,7 +334,12 @@ void GlobalData::clear() { step = "0.01"; form = "integral"; sorted = "T"; //F means don't sort, T means sort. - scale = "log10"; + vertical = ""; + trump = ""; + filter = ""; + soft = ""; + scale = "log10"; + } //*******************************************************/ diff --git a/globaldata.hpp b/globaldata.hpp index cc13bae..40b8758 100644 --- a/globaldata.hpp +++ b/globaldata.hpp @@ -39,7 +39,7 @@ public: GroupMap* gGroupmap; FullMatrix* gMatrix; TreeMap* gTreemap; - string inputFileName, helpRequest, commandName; + string inputFileName, helpRequest, commandName, vertical; bool allLines; vector Estimators, Groups; //holds estimators to be used set lines; //hold lines to be used @@ -55,6 +55,8 @@ public: string getGroupFile(); string getOrderFile(); string getFastaFile(); + string getNexusFile(); + string getClustalFile(); string getTreeFile(); string getSharedFile(); string getCutOff(); @@ -71,8 +73,15 @@ public: string getStep(); string getForm(); string getSorted(); + + string getTrump(); + string getSoft(); + string getFilter(); + + string getScale(); + void setListFile(string); void setPhylipFile(string); void setColumnFile(string); @@ -97,8 +106,10 @@ public: private: - string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, treefile, sharedfile, line, label, randomtree, groups; - string cutoff, format, precision, method, fileroot, iters, jumble, freq, calc, abund, step, form, sorted, scale; + + string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, nexusfile, clustalfile, treefile, sharedfile, line, label, randomtree, groups; + string cutoff, format, precision, method, fileroot, iters, jumble, freq, calc, abund, step, form, sorted, trump, soft, filter, scale; + static GlobalData* _uniqueInstance; GlobalData( const GlobalData& ); // Disable copy constructor diff --git a/goodscoverage.cpp b/goodscoverage.cpp new file mode 100644 index 0000000..07b78b3 --- /dev/null +++ b/goodscoverage.cpp @@ -0,0 +1,39 @@ +/* + * goodscoverage.cpp + * Mothur + * + * Created by Thomas Ryabin on 4/8/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "goodscoverage.h" +#include "calculator.h" + +/**********************************************************/ + +EstOutput GoodsCoverage::getValues(SAbundVector* rank){ + try { + data.resize(1,0); + + double numSingletons = rank->get(1); + double totalIndividuals = rank->getNumSeqs(); + + data[0] = 1 - numSingletons/totalIndividuals; + + if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } + + return data; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the GoodsCoverage class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the GoodsCoverage class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************************/ + diff --git a/goodscoverage.h b/goodscoverage.h new file mode 100644 index 0000000..b4065c0 --- /dev/null +++ b/goodscoverage.h @@ -0,0 +1,31 @@ +#ifndef GOODSCOVERAGE_H +#define GOODSCOVERAGE_H + +/* + * goodscoverage.h + * Mothur + * + * Created by Thomas Ryabin on 4/8/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ +#include "calculator.h" + +/* This class implements the LogSD estimator on single group. +It is a child of the calculator class. */ + +/***********************************************************************/ + +class GoodsCoverage : public Calculator { + +public: + GoodsCoverage() : Calculator("goodscoverage", 1, false) {}; + EstOutput getValues(SAbundVector*); + EstOutput getValues(vector) {return data;}; + +private: +}; + +/***********************************************************************/ + +#endif diff --git a/logsd.cpp b/logsd.cpp index e959ab8..782af2a 100644 --- a/logsd.cpp +++ b/logsd.cpp @@ -35,8 +35,7 @@ EstOutput LogSD::getValues(SAbundVector* rank){ double x = .5; double step = .4999999999; - while(fabs(snRatio - logS(x)) > .00001) //This uses a binary search to find the value of x. - { + while(fabs(snRatio - logS(x)) > .00001) { //This uses a binary search to find the value of x. if(logS(x) > snRatio) x += step; else @@ -51,15 +50,12 @@ EstOutput LogSD::getValues(SAbundVector* rank){ double octSumExp = 0; double sumExp = 0; double maxDiff = 0; - for(int y = 1; y < rank->size(); y++) - { - if(y - .5 < pow(2.0, oct)) - { + for(int y = 1; y < rank->size(); y++) { + if(y - .5 < pow(2.0, oct)) { octSumObs += rank->get(y); octSumExp += alpha*pow(x,y)/(y); } - else - { + else { sumObs += octSumObs; octSumObs = rank->get(y); @@ -68,8 +64,7 @@ EstOutput LogSD::getValues(SAbundVector* rank){ oct++; } - if(y == rank->size()-1) - { + if(y == rank->size()-1) { sumObs += octSumObs; sumExp += octSumExp; } @@ -85,6 +80,7 @@ EstOutput LogSD::getValues(SAbundVector* rank){ cout << ".05 confidence value = " << .89196/sqrt(numSpec) << "\n"; cout << "If D Test Statistic is greater than the critical value then the data fits the Log Series Distribution model w/ 95% confidence.\n\n";*/ + data[0] = (maxDiff + .5)/numSpec; data[1] = 0.886/sqrt(numSpec); data[2] = 1.031/sqrt(numSpec); @@ -96,11 +92,11 @@ EstOutput LogSD::getValues(SAbundVector* rank){ return data; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the NPShannon class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the LogSD class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the NPShannon class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the LogSD class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } diff --git a/qstat.cpp b/qstat.cpp index a2f821e..ed4471a 100644 --- a/qstat.cpp +++ b/qstat.cpp @@ -33,20 +33,17 @@ EstOutput QStat::getValues(SAbundVector* rank){ int r3Ind = 0; int sumSpec = 0; int iqSum = 0; - for(int i = 1; i < rank->size(); i++) - { + for(int i = 1; i < rank->size(); i++) { if(r1 != -1 && r3 != -1) i = rank->size(); sumSpec += rank->get(i); - if(r1 == -1 && sumSpec >= numSpec*.25) - { + if(r1 == -1 && sumSpec >= numSpec*.25) { r1 = rank->get(i); r1Ind = i; } - else if(r3 == -1 && sumSpec >= numSpec*.75) - { + else if(r3 == -1 && sumSpec >= numSpec*.75) { r3 = rank->get(i); r3Ind = i; } @@ -63,11 +60,11 @@ EstOutput QStat::getValues(SAbundVector* rank){ return data; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the NPShannon class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the QStat class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the NPShannon class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the QStat class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } diff --git a/readclustal.cpp b/readclustal.cpp new file mode 100644 index 0000000..2211ccd --- /dev/null +++ b/readclustal.cpp @@ -0,0 +1,80 @@ +/* + * readclustal.cpp + * Mothur + * + * Created by Thomas Ryabin on 4/24/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "readclustal.h" +#include +#include + +/*******************************************************************************/ +ReadClustal::ReadClustal(string file) { + try { + openInputFile(file, filehandle); + clustalFile = file; + globaldata = GlobalData::getInstance(); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReadTree class function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/*******************************************************************************/ +ReadClustal::~ReadClustal(){ +// for(int i = 0; i < sequencedb.getNumSeqs(); i++) +// delete sequencedb.get(i); +} +/*******************************************************************************/ +void ReadClustal::read() { + string temp; + string name; + string sequence; + string firstName = ""; + for(int i = 0; i < 6; i++) + filehandle >> temp; + + int count = 0; + int numSeqs = 0; + bool firstDone = false; + + while(!filehandle.eof()) { + filehandle >> name; + if(numSeqs != 0) { + if(count == numSeqs) + count = 0; + } + else if(!firstDone && firstName.compare("") == 0) + firstName = name; + else if(!firstDone && firstName.compare(name) == 0) { + numSeqs = count; + firstDone = true; + count = 0; + } + + if(name.find_first_of("*") == -1) { + filehandle >> sequence; + if(!firstDone) { + Sequence newSeq(name, sequence); + sequencedb.add(newSeq); + } + else + sequencedb.set(count, sequencedb.get(count).getAligned() + sequence); + + count++; + } + } + filehandle.close(); +} + +/*********************************************************************************/ +SequenceDB* ReadClustal::getDB() { + return &sequencedb; +} diff --git a/readclustal.h b/readclustal.h new file mode 100644 index 0000000..8556486 --- /dev/null +++ b/readclustal.h @@ -0,0 +1,38 @@ +#ifndef READCLUSTAL_H +#define READCLUSTAL_H + +/* + * readclustal.h + * Mothur + * + * Created by Thomas Ryabin on 4/24/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ +using namespace std; + +#include "globaldata.hpp" +#include "sequencedb.h" +#include "mothur.h" + +/**********************************************************************************/ + +class ReadClustal { + + public: + ReadClustal(string); + ~ReadClustal(); + void read(); + SequenceDB* getDB(); + + private: + GlobalData* globaldata; + string clustalFile; + ifstream filehandle; + SequenceDB sequencedb; + int readOk; // readOk = 0 means success, readOk = 1 means error(s). + + +}; + +#endif \ No newline at end of file diff --git a/readdistcommand.cpp b/readdistcommand.cpp index f19bf11..0e45ad5 100644 --- a/readdistcommand.cpp +++ b/readdistcommand.cpp @@ -10,6 +10,7 @@ #include "readdistcommand.h" #include "readphylip.h" #include "readcolumn.h" +#include "readmatrix.hpp" ReadDistCommand::ReadDistCommand(){ try { diff --git a/readfasta.cpp b/readfasta.cpp new file mode 100644 index 0000000..ff39e01 --- /dev/null +++ b/readfasta.cpp @@ -0,0 +1,68 @@ +/* + * readfasta.cpp + * Mothur + * + * Created by Thomas Ryabin on 4/21/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "readfasta.h" +#include +#include + +/*******************************************************************************/ +ReadFasta::ReadFasta(string file) { + try { + openInputFile(file, filehandle); + fastaFile = file; + globaldata = GlobalData::getInstance(); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReadTree class function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/*******************************************************************************/ +ReadFasta::~ReadFasta(){ + //for(int i = 0; i < sequencedb.getNumSeqs(); i++) + //delete sequencedb.get(i); +} +/*******************************************************************************/ +void ReadFasta::read() { + string name = ""; + string sequence = ""; + string temp; + int count = 0; + while(!filehandle.eof()){ + if(count == 0) + filehandle >> temp; + if(temp.substr(0,1).compare(">") == 0) { + if(count != 0) { + Sequence newSequence(name, sequence); + sequencedb.add(newSequence); + sequence = ""; + } + else + count++; + name = temp.substr(1,temp.length()-1); + } + else + sequence += temp; + + filehandle >> temp; + } + Sequence newSequence(name, sequence); + sequencedb.add(newSequence); + + filehandle.close(); +} + +/*********************************************************************************/ +SequenceDB* ReadFasta::getDB() { + return &sequencedb; +} diff --git a/readfasta.h b/readfasta.h new file mode 100644 index 0000000..a4fe436 --- /dev/null +++ b/readfasta.h @@ -0,0 +1,39 @@ +#ifndef READFASTA_H +#define READFASTA_H + +/* + * readfasta.h + * Mothur + * + * Created by Thomas Ryabin on 4/21/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +using namespace std; + +#include "globaldata.hpp" +#include "sequencedb.h" +#include "mothur.h" + +/**********************************************************************************/ + +class ReadFasta { + + public: + ReadFasta(string); + ~ReadFasta(); + void read(); + SequenceDB* getDB(); + + private: + GlobalData* globaldata; + string fastaFile; + ifstream filehandle; + SequenceDB sequencedb; + int readOk; // readOk = 0 means success, readOk = 1 means error(s). + + +}; + +#endif \ No newline at end of file diff --git a/readnexus.cpp b/readnexus.cpp new file mode 100644 index 0000000..bc91e16 --- /dev/null +++ b/readnexus.cpp @@ -0,0 +1,75 @@ +/* + * readnexusaln.cpp + * Mothur + * + * Created by Thomas Ryabin on 4/22/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "readnexus.h" +#include +#include + +/*******************************************************************************/ +ReadNexus::ReadNexus(string file) { + try { + openInputFile(file, filehandle); + nexusFile = file; + globaldata = GlobalData::getInstance(); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReadTree class function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/*******************************************************************************/ +ReadNexus::~ReadNexus(){ +// for(int i = 0; i < sequencedb.getNumSeqs(); i++) +// delete sequencedb.get(i); +} +/*******************************************************************************/ +void ReadNexus::read() { + string temp; + string name; + string sequence; + for(int i = 0; i < 5; i++) + filehandle >> temp; + + int numSeqs = atoi(temp.substr(temp.find_first_of("=")+1, temp.length() - temp.find_first_of("=") - 1).c_str()); + + for(int i = 0; i < 9; i++) + filehandle >> temp; + + int count = 0; + bool firstDone = false; + while(!filehandle.eof()){ + filehandle >> name; + filehandle >> sequence; + if(name.compare(";") != 0) { + if(!firstDone) { + Sequence newSeq(name, sequence); + sequencedb.add(newSeq); + } + else + sequencedb.set(count, sequencedb.get(count).getAligned() + sequence); + + count++; + if(count == numSeqs) { + if(!firstDone) + firstDone = true; + count = 0; + } + } + } + filehandle.close(); +} + +/*********************************************************************************/ +SequenceDB* ReadNexus::getDB() { + return &sequencedb; +} diff --git a/readnexus.h b/readnexus.h new file mode 100644 index 0000000..6cd4973 --- /dev/null +++ b/readnexus.h @@ -0,0 +1,38 @@ +#ifndef READNEXUS_H +#define READNEXUS_H + +/* + * readnuxus.h + * Mothur + * + * Created by Thomas Ryabin on 4/22/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ +using namespace std; + +#include "globaldata.hpp" +#include "sequencedb.h" +#include "mothur.h" + +/**********************************************************************************/ + +class ReadNexus { + + public: + ReadNexus(string); + ~ReadNexus(); + void read(); + SequenceDB* getDB(); + + private: + GlobalData* globaldata; + string nexusFile; + ifstream filehandle; + SequenceDB sequencedb; + int readOk; // readOk = 0 means success, readOk = 1 means error(s). + + +}; + +#endif \ No newline at end of file diff --git a/readnexusal.h b/readnexusal.h new file mode 100644 index 0000000..455044a --- /dev/null +++ b/readnexusal.h @@ -0,0 +1,38 @@ +#ifndef READNEXUSALN_H +#define READNEXUSALN_H + +/* + * readnexusaln.h + * Mothur + * + * Created by Thomas Ryabin on 4/22/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +using namespace std; + +#include "globaldata.hpp" +#include "sequencedb.h" +#include "utilities.hpp" + +/**********************************************************************************/ + +class ReadNexus { + + public: + ReadNexus(string); + ~ReadNexus(); + void read(); + + private: + GlobalData* globaldata; + string nexusFile; + ifstream filehandle; + SequenceDB sequencedb; + int readOk; // readOk = 0 means success, readOk = 1 means error(s). + + +}; + +#endif \ No newline at end of file diff --git a/readphylip.cpp b/readphylip.cpp index 174b7e3..bc39b52 100644 --- a/readphylip.cpp +++ b/readphylip.cpp @@ -13,174 +13,173 @@ /***********************************************************************/ ReadPhylipMatrix::ReadPhylipMatrix(string distFile){ - - successOpen = openInputFile(distFile, fileHandle); - + + successOpen = openInputFile(distFile, fileHandle); + } /***********************************************************************/ void ReadPhylipMatrix::read(NameAssignment* nameMap){ - try { - - float distance; - int square, nseqs; - string name; - vector matrixNames; - - fileHandle >> nseqs >> name; + try { + + float distance; + int square, nseqs; + string name; + vector matrixNames; + + fileHandle >> nseqs >> name; - matrixNames.push_back(name); + matrixNames.push_back(name); - if(nameMap == NULL){ - list = new ListVector(nseqs); - list->set(0, name); - } - else{ - list = new ListVector(nameMap->getListVector()); - if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; } - } - - char d; - while((d=fileHandle.get()) != EOF){ - - if(isalnum(d)){ - square = 1; - fileHandle.putback(d); - for(int i=0;i> distance; - } - break; - } - if(d == '\n'){ - square = 0; - break; - } - } - - Progress* reading; - - if(square == 0){ + if(nameMap == NULL){ + list = new ListVector(nseqs); + list->set(0, name); + } + else{ + list = new ListVector(nameMap->getListVector()); + if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; } + } + + char d; + while((d=fileHandle.get()) != EOF){ + + if(isalnum(d)){ + square = 1; + fileHandle.putback(d); + for(int i=0;i> distance; + } + break; + } + if(d == '\n'){ + square = 0; + break; + } + } + + Progress* reading; + + if(square == 0){ - reading = new Progress("Reading matrix: ", nseqs * (nseqs - 1) / 2); - - int index = 0; - - for(int i=1;i> name; - matrixNames.push_back(name); - - //there's A LOT of repeated code throughout this method... - if(nameMap == NULL){ - list->set(i, name); - - for(int j=0;j> distance; - - if (distance == -1) { distance = 1000000; } - - if(distance < cutoff){ - PCell value(i, j, distance); - D->addCell(value); - } - index++; - reading->update(index); - } - - } - else{ - if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; } - - for(int j=0;j> distance; - - if (distance == -1) { distance = 1000000; } - - if(distance < cutoff){ - PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance); - D->addCell(value); - } - index++; - reading->update(index); - } - } - } - } - else{ + reading = new Progress("Reading matrix: ", nseqs * (nseqs - 1) / 2); + + int index = 0; + + for(int i=1;i> name; + matrixNames.push_back(name); + + //there's A LOT of repeated code throughout this method... + if(nameMap == NULL){ + list->set(i, name); + + for(int j=0;j> distance; + + if (distance == -1) { distance = 1000000; } + + if(distance < cutoff){ + PCell value(i, j, distance); + D->addCell(value); + } + index++; + reading->update(index); + } + + } + else{ + if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; } + + for(int j=0;j> distance; + + if (distance == -1) { distance = 1000000; } + + if(distance < cutoff){ + PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance); + D->addCell(value); + } + index++; + reading->update(index); + } + } + } + } + else{ - reading = new Progress("Reading matrix: ", nseqs * nseqs); - - int index = nseqs; - - for(int i=1;i> name; - matrixNames.push_back(name); - - if(nameMap == NULL){ - list->set(i, name); - for(int j=0;j> distance; - - if (distance == -1) { distance = 1000000; } - - if(distance < cutoff && j < i){ - PCell value(i, j, distance); - D->addCell(value); - } - index++; - reading->update(index); - } - - } - else{ - if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; } - - for(int j=0;j> distance; - - if (distance == -1) { distance = 1000000; } - - if(distance < cutoff && j < i){ - PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance); - D->addCell(value); - } - index++; - reading->update(index); - } - } - } - } - reading->finish(); - delete reading; + reading = new Progress("Reading matrix: ", nseqs * nseqs); + + int index = nseqs; + + for(int i=1;i> name; + matrixNames.push_back(name); + + if(nameMap == NULL){ + list->set(i, name); + for(int j=0;j> distance; + + if (distance == -1) { distance = 1000000; } + + if(distance < cutoff && j < i){ + PCell value(i, j, distance); + D->addCell(value); + } + index++; + reading->update(index); + } + + } + else{ + if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; } + + for(int j=0;j> distance; + + if (distance == -1) { distance = 1000000; } + + if(distance < cutoff && j < i){ + PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance); + D->addCell(value); + } + index++; + reading->update(index); + } + } + } + } + reading->finish(); + delete reading; - list->setLabel("0"); - fileHandle.close(); + list->setLabel("0"); + fileHandle.close(); - if(nameMap != NULL){ - for(int i=0;ierase(matrixNames[i]); - } - if(nameMap->size() > 0){ - //should probably tell them what is missing if we missed something - cout << "missed something" << '\t' << nameMap->size() << endl; - } - } + if(nameMap != NULL){ + for(int i=0;ierase(matrixNames[i]); + } + if(nameMap->size() > 0){ + //should probably tell them what is missing if we missed something + cout << "missed something" << '\t' << nameMap->size() << endl; + } + } - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ReadPhylipMatrix class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the ReadPhylipMatrix class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReadPhylipMatrix class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReadPhylipMatrix class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } } /***********************************************************************/ ReadPhylipMatrix::~ReadPhylipMatrix(){ - delete D; - delete list; + delete D; + delete list; } - diff --git a/readseqscommand.cpp b/readseqscommand.cpp new file mode 100644 index 0000000..919855b --- /dev/null +++ b/readseqscommand.cpp @@ -0,0 +1,80 @@ +/* + * readseqscommand.cpp + * Mothur + * + * Created by Thomas Ryabin on 4/13/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "readseqscommand.h" + +//********************************************************************************************************************** +ReadSeqsCommand::ReadSeqsCommand(){ + try { + globaldata = GlobalData::getInstance(); + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReadOtuCommand class Function ReadOtuCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReadOtuCommand class function ReadOtuCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +//********************************************************************************************************************** + +ReadSeqsCommand::~ReadSeqsCommand(){ + //delete readFasta->getDB(); +// delete readNexus->getDB(); +// delete readClustal->getDB(); +// delete readPhylip->getDB(); +} + +//********************************************************************************************************************** + +int ReadSeqsCommand::execute(){ + try { + filebuf fb; + + //fb.open ("fasta.txt",ios::out); +// readFasta->read(); +// SequenceDB* db = readFasta->getDB(); + + //fb.open("nexus.txt",ios::out); +// readNexus->read(); +// SequenceDB* db = readNexus->getDB(); + + //fb.open("clustal.txt",ios::out); +// readClustal->read(); +// SequenceDB* db = readClustal->getDB(); + + //fb.open("phylip.txt",ios::out); +// readPhylip->read(); +// SequenceDB* db = readPhylip->getDB(); + + + + //for(int i = 0; i < db->size(); i++) { +// cout << db->get(i).getLength() << "\n" << db->get(i).getName() << ": " << db->get(i).getUnaligned() << "\n\n"; +// } + + //ostream os(&fb); +// db->print(os); +// fb.close(); + + return 0; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReadOtuCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReadOtuCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +//********************************************************************************************************************** diff --git a/readseqscommand.h b/readseqscommand.h new file mode 100644 index 0000000..660eb3f --- /dev/null +++ b/readseqscommand.h @@ -0,0 +1,55 @@ +#ifndef READSEQSCOMMAND_H +#define READSEQSCOMMAND_H +/* + * readseqscommand.h + * Mothur + * + * Created by Thomas Ryabin on 4/13/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "command.hpp" +#include "readfasta.h" +#include "readnexus.h" +#include "readclustal.h" +#include "readseqsphylip.h" +#include "inputdata.h" +#include "groupmap.h" +#include "sharedcommand.h" +#include "parselistcommand.h" + +/* The read.otu must be run before you execute a collect.single, rarefaction.single, summary.single, +collect.shared, rarefaction.shared or summary.shared command. Mothur will generate a .list, .rabund and .sabund +upon completion of the cluster command or you may use your own. The read.otu command parameter options are +listfile, rabundfile, sabundfile, groupfile and orderfile. The reaad.otu command can be used in two ways. +The first is to read a listfile, rabundfile or sabundfile and run the collect.single, rarefaction.single or summary.single. +For this use the read.otu command should be in the following format: read.otu(listfile=yourListFile, orderfile=yourOrderFile). +The listfile, rabundfile or sabundfile parameter is required, but you may only use one of them. +The second way to use the read.otu command is to read a listfile and a groupfile so you can use the collect.shared, +rarefaction.shared or summary.shared commands. In this case the read.otu command should be in the following format: +read.otu(listfile=yourListFile, groupfile=yourGroupFile). The listfile parameter and groupfile paramaters are required. +When using the command the second way read.otu command parses the .list file and separates it into groups. +It outputs a .shared file containing the OTU information for each group. The read.otu command also outputs a .list file for each group. */ + +class GlobalData; + +class ReadSeqsCommand : public Command { +public: + ReadSeqsCommand(); + ~ReadSeqsCommand(); + int execute(); + +private: + GlobalData* globaldata; + ReadFasta* readFasta; + ReadNexus* readNexus; + ReadClustal* readClustal; + ReadPhylip* readPhylip; + InputData* input; + Command* shared; + Command* parselist; + string filename; +}; + +#endif diff --git a/readseqsphylip.cpp b/readseqsphylip.cpp new file mode 100644 index 0000000..b6be40e --- /dev/null +++ b/readseqsphylip.cpp @@ -0,0 +1,128 @@ +/* + * readphylip.cpp + * Mothur + * + * Created by Thomas Ryabin on 4/24/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "readseqsphylip.h" +#include +#include + +/*******************************************************************************/ +bool ReadPhylip::isSeq(string seq) { + string validChars[] = {"A","G","C","T","U","N","-"}; + + for(int i = 0; i < seq.length(); i++) { + bool valid = false; + string c = seq.substr(i,1); + for(int k = 0; k < 7; k++) + if(c.compare(validChars[k]) == 0) { + valid = true; + k = 7; + } + if(!valid) + return false; + } + + return true; +} + +/*******************************************************************************/ +ReadPhylip::ReadPhylip(string file) { + try { + openInputFile(file, filehandle); + phylipFile = file; + globaldata = GlobalData::getInstance(); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReadTree class function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/*******************************************************************************/ +ReadPhylip::~ReadPhylip(){ +// for(int i = 0; i < sequencedb.getNumSeqs(); i++) +// delete sequencedb.get(i); +} +/*******************************************************************************/ +void ReadPhylip::read() { + string temp; + string name; + string sequence; + + int count = 0; + int letterCount = 0; + int numCols = 0; + filehandle >> temp; + int numSeqs = atoi(temp.c_str()); + filehandle >> temp; + int numLetters = atoi(temp.c_str()); + + bool firstDone = false; + bool last = false; + filehandle >> name; + + while(!filehandle.eof()) { + if(!firstDone) { + sequence = ""; + if(count == 0) { + filehandle >> temp; + while(isSeq(temp)) { + sequence += temp; + numCols++; + filehandle >> temp; + } + letterCount += sequence.length(); + } + else { + for(int i = 0; i < numCols; i++) { + filehandle >> temp; + sequence += temp; + } + if(count < numSeqs-1) + filehandle >> temp; + } + Sequence newSeq(name, sequence); + sequencedb.add(newSeq); + if(count < numSeqs-1) + name = temp; + } + else { + sequence = ""; + for(int i = 0; i < numCols; i++) { + filehandle >> temp; + sequence += temp; + if(count == 0) + letterCount += temp.length(); + if(letterCount == numLetters && count == 0) { + numCols = i + 1; + i = numCols; + } + } + if(!(last && count == 0)) + sequencedb.set(count, sequencedb.get(count).getAligned() + sequence); + if(letterCount == numLetters && count == 0) + last = true; + } + + count++; + + if(count == numSeqs) { + firstDone = true; + count = 0; + } + } + filehandle.close(); +} + +/*********************************************************************************/ +SequenceDB* ReadPhylip::getDB() { + return &sequencedb; +} diff --git a/readseqsphylip.h b/readseqsphylip.h new file mode 100644 index 0000000..c08c977 --- /dev/null +++ b/readseqsphylip.h @@ -0,0 +1,38 @@ +#ifndef READPHYLIP_H +#define READPHYLIP_H + +/* + * readphylip.h + * Mothur + * + * Created by Thomas Ryabin on 4/24/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ +using namespace std; + +#include "globaldata.hpp" +#include "sequencedb.h" +#include "mothur.h" + +/**********************************************************************************/ + +class ReadPhylip { + + public: + ReadPhylip(string); + ~ReadPhylip(); + void read(); + SequenceDB* getDB(); + + private: + GlobalData* globaldata; + string phylipFile; + ifstream filehandle; + SequenceDB sequencedb; + int readOk; // readOk = 0 means success, readOk = 1 means error(s). + + bool isSeq(string); +}; + +#endif \ No newline at end of file diff --git a/sequence.cpp b/sequence.cpp index 7bc8b05..b59363e 100644 --- a/sequence.cpp +++ b/sequence.cpp @@ -11,50 +11,29 @@ using namespace std; #include "sequence.hpp" -//******************************************************************************************************************** - -Sequence::Sequence(){} +/***********************************************************************/ -//******************************************************************************************************************** - -Sequence::Sequence(ifstream& fastaFile){ - - string accession; - fastaFile >> accession; - setName(accession); +Sequence::Sequence() {} - char letter; - string sequence; - - while(fastaFile && letter != '>'){ - - letter = fastaFile.get(); - - if(isalpha(letter)){ - - sequence += letter; - - } - - } - fastaFile.putback(letter); +/***********************************************************************/ - if(sequence.find_first_of('-') != string::npos){ +Sequence::Sequence(string newName, string sequence) { + name = newName; + if(sequence.find_first_of('-') != string::npos) { setAligned(sequence); } setUnaligned(sequence); } - //******************************************************************************************************************** -string Sequence::convert2ints(){ +string Sequence::convert2ints() { if(unaligned == "") { /* need to throw an error */ } string processed; - for(int i=0;i') { name = seqName.substr(1); } else { name = seqName; } } @@ -76,14 +55,14 @@ void Sequence::setName(string seqName){ void Sequence::setUnaligned(string sequence){ - if(sequence.find_first_of('-') != string::npos){ + if(sequence.find_first_of('-') != string::npos) { string temp = ""; - for(int j=0;j aligned.length()) + return unaligned.length(); + return aligned.length(); +} + +//******************************************************************************************************************** + +void Sequence::printSequence(ostream& out){ + string toPrint = unaligned; + if(aligned.length() > unaligned.length()) + toPrint = aligned; + out << ">" << name << "\n" << toPrint << "\n"; +} + +//******************************************************************************************************************** diff --git a/sequence.hpp b/sequence.hpp index e094fb5..03cbab7 100644 --- a/sequence.hpp +++ b/sequence.hpp @@ -19,21 +19,29 @@ using namespace std; class Sequence { public: Sequence(); - Sequence(ifstream&); + Sequence(string, string); + void setName(string); void setUnaligned(string); void setPairwise(string); void setAligned(string); + void setLength(); + string convert2ints(); - string getSeqName(); + string getName(); string getAligned(); string getPairwise(); string getUnaligned(); + int getLength(); + void printSequence(ostream&); + private: string name; string unaligned; - string pairwise; string aligned; + string pairwise; + int length; + int lengthAligned; }; /**************************************************************************************************/ diff --git a/sequencedb.cpp b/sequencedb.cpp new file mode 100644 index 0000000..e8aade7 --- /dev/null +++ b/sequencedb.cpp @@ -0,0 +1,94 @@ +/* + * sequencedb.cpp + * Mothur + * + * Created by Thomas Ryabin on 4/13/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "sequencedb.h" +#include "sequence.hpp" +#include "mothur.h" +#include "calculator.h" + + +/***********************************************************************/ + +SequenceDB::SequenceDB() {} + +/***********************************************************************/ + +SequenceDB::SequenceDB(int newSize) { + data.resize(newSize); +} + +/***********************************************************************/ + +SequenceDB::SequenceDB(ifstream&) {} + +/***********************************************************************/ + +int SequenceDB::getNumSeqs() { + return data.size(); +} + +/***********************************************************************/ + +void SequenceDB::set(int index, string newUnaligned) { + Sequence newSeq(data[index].getName(), newUnaligned); + data[index] = newSeq; +} + +/***********************************************************************/ + +void SequenceDB::set(int index, Sequence newSeq) { + data[index] = newSeq; +} + +/***********************************************************************/ + +Sequence SequenceDB::get(int index) { + return data[index]; +} + +/***********************************************************************/ + +void SequenceDB::changeSize(int newSize) { + data.resize(newSize); +} + +/***********************************************************************/ + +void SequenceDB::clear() { + data.clear(); +} + +/***********************************************************************/ + +int SequenceDB::size() { + return data.size(); +} + +/***********************************************************************/ + +void SequenceDB::print(ostream& out) { + for(int i = 0; i < data.size(); i++) + data[i].printSequence(out); +} + +/***********************************************************************/ + +void SequenceDB::add(Sequence newSequence) { + try { + data.push_back(newSequence); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the RAbundVector class Function push_back. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the RAbundVector class function push_back. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} \ No newline at end of file diff --git a/sequencedb.h b/sequencedb.h new file mode 100644 index 0000000..f31fc32 --- /dev/null +++ b/sequencedb.h @@ -0,0 +1,46 @@ +#ifndef SEQUENCEDB_H +#define SEQUENCEDB_H + +/* + * sequencedb.h + * Mothur + * + * Created by Thomas Ryabin on 4/13/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + + +using namespace std; + +#include "sequence.hpp" +#include "calculator.h" + + + + +class SequenceDB { + +public: + SequenceDB(); + SequenceDB(int); //makes data that size + SequenceDB(ifstream&); //reads file to fill data +// ~SequenceDB(); //loops through data and delete each sequence + + int getNumSeqs(); + + void set(int, string); //unaligned - should also set length + void set(int, Sequence); //unaligned - should also set length + Sequence get(int); //returns sequence name at that location + void add(Sequence); //adds unaligned sequence + void changeSize(int); //resizes data + void clear(); //clears data - remeber to loop through and delete the sequences inside or you will have a memory leak + int size(); //returns datas size + void print(ostream&); //loops through data using sequence class print + +private: + vector data; + +}; + +#endif diff --git a/sharedjackknife.cpp b/sharedjackknife.cpp new file mode 100644 index 0000000..c267a07 --- /dev/null +++ b/sharedjackknife.cpp @@ -0,0 +1,155 @@ +/* + * sharedjackknife.cpp + * Mothur + * + * Created by Thomas Ryabin on 3/30/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "sharedjackknife.h" + +/*************************************************************************************** + +***************************************************************************************/ +double SharedJackknife::simpson(int abunds[], double numInd, int numBins){ + double denom = numInd*(numInd-1); + double sum = 0; + for(int i = 0; i < numBins; i++) + sum += (double)abunds[i]*((double)abunds[i]-1)/denom; + + return sum; +} + +/*****************************************************************************************/ + +double* SharedJackknife::jackknife(){ + int numBins = groups.at(0)->getNumBins()-1; + int cArray[numBins]; + for(int i = 0; i < numBins; i++) + cArray[i] = 0; + + double numInd = 0; + for(int i = 0; i < numGroups; i++) + for(int j = 0; j < numBins; j++) { + int curAbund = groups.at(i)->get(j+1).abundance; + cArray[j] += curAbund; + numInd += (double)curAbund; + } + + double baseD = 1/simpson(cArray, numInd, numBins); + + double pseudoVals[numBins]; + double jackknifeEstimate = 0; + for(int i = 0; i < numGroups; i++) { + for(int j = 0; j < numBins-1; j++) { + int abundDiff = -groups.at(i)->get(j+1).abundance; + if(i > 0) + abundDiff += groups.at(i-1)->get(j+1).abundance; + + cArray[j] += abundDiff; + numInd += abundDiff; + } + + double curD = 1/simpson(cArray, numInd, numBins); + pseudoVals[i] = (double)numGroups*(baseD - curD) + curD; + jackknifeEstimate += pseudoVals[i]; + } + jackknifeEstimate /= (double)numGroups; + + double variance = 0; + for(int i = 0; i < numGroups; i++) + variance += pow(pseudoVals[i]-jackknifeEstimate, 2); + + variance /= (double)numGroups*((double)numGroups-1); + double stErr = sqrt(variance); + TDTable table; + double confLimit = 0; + if(numGroups <= 30) + confLimit = table.getConfLimit(numGroups-1, 1); + else + confLimit = 1.645; + + confLimit *= stErr; + + double* rdata = new double[3]; + rdata[0] = baseD; + rdata[1] = jackknifeEstimate - confLimit; + rdata[2] = jackknifeEstimate + confLimit; + + return rdata; +} + +/************************************************************************************************ +************************************************************************************************/ + +EstOutput SharedJackknife::getValues(vector vectorShared){ //Fix this for collect, mistake was that it was made with summary in mind. + try { + SharedRAbundVector* shared1 = vectorShared[0]; + SharedRAbundVector* shared2 = vectorShared[1]; + if(numGroups == -1) { + globaldata = GlobalData::getInstance(); + numGroups = globaldata->Groups.size(); + } + + if(callCount == numGroups*(numGroups-1)/2) { + currentCallDone = true; + callCount = 0; + } + callCount++; + + if(currentCallDone) { + groups.clear(); + currentCallDone = false; + } + + if(groups.size() != numGroups) { + if(groups.size() == 0) + groups.push_back(shared1); + groups.push_back(shared2); + } + + if(groups.size() == numGroups && callCount < numGroups) { + data.resize(3,0); + + double* rdata = jackknife(); + data[0] = rdata[0]; + data[1] = rdata[1]; + data[2] = rdata[2]; + + //cout << "sT = " << data[0] << " lower confLimit = " << data[1] << " upper confLimit = " << data[2] << "\n"; + if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } + if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } + if (isnan(data[2]) || isinf(data[0])) { data[2] = 0; } + + /*for(int i = 0; i < groups.size(); i++) + cout << groups.at(i)->getGroup() << " "; + cout << "\n"; + cout << groups.size() << " " << data[0] << " " << data[1] << " " << data[2] << "\n\n";*/ + + return data; + } + + data.resize(3,0); + data[0] = 0; + data[1] = 0; + data[2] = 0; + + if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } + if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } + if (isnan(data[2]) || isinf(data[2])) { data[2] = 0; } + return data; + } + + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedJackknife class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedJackknife class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************************/ + diff --git a/sharedjackknife.h b/sharedjackknife.h new file mode 100644 index 0000000..00c0de0 --- /dev/null +++ b/sharedjackknife.h @@ -0,0 +1,40 @@ +#ifndef SHAREDJACKKNIFE_H +#define SHAREDJACKKNIFE_H + +/* + * sharedjackknife.h + * Mothur + * + * Created by Thomas Ryabin on 3/30/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "calculator.h" +#include "globaldata.hpp" + +/*This class implements the SharedJackknife estimator. +It is a child of the calculator class.*/ + +/***********************************************************************/ + +class SharedJackknife : public Calculator { + +public: + SharedJackknife() : numGroups(-1), callCount(0), count(0), currentCallDone(true), Calculator("sharedjackknife", 3, false) {}; + EstOutput getValues(SAbundVector*) {return data;}; + EstOutput getValues(vector); + +private: + GlobalData* globaldata; + int numGroups, callCount, count; + bool currentCallDone; + vector groups; + double simpson(int[], double, int); + double* jackknife(); +}; + +/***********************************************************************/ + +#endif + diff --git a/sharedkstest.cpp b/sharedkstest.cpp index fefe14f..f65f271 100644 --- a/sharedkstest.cpp +++ b/sharedkstest.cpp @@ -13,8 +13,8 @@ EstOutput KSTest::getValues(vector shared){ try { - data.resize(2,0); - + data.resize(3,0); + //Must return shared1 and shared2 to original order at conclusion of kstest vector initData1 = shared[0]->getData(); vector initData2 = shared[1]->getData(); @@ -59,6 +59,8 @@ EstOutput KSTest::getValues(vector shared){ data[0] = DStatistic; data[1] = critVal; + data[2] = 0; + if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } diff --git a/sharedmarczewski.cpp b/sharedmarczewski.cpp new file mode 100644 index 0000000..be1a2fb --- /dev/null +++ b/sharedmarczewski.cpp @@ -0,0 +1,50 @@ +/* + * sharedmarczewski.cpp + * Mothur + * + * Created by Thomas Ryabin on 4/8/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "sharedmarczewski.h" + +EstOutput SharedMarczewski::getValues(vector vectorShared){ + try { + SharedRAbundVector* shared1 = vectorShared[0]; + SharedRAbundVector* shared2 = vectorShared[1]; + + data.resize(1,0); + + double a = 0; + double b = 0; + double c = 0; + for(int i = 1; i < shared1->size(); i++) + { + int abund1 = shared1->get(i).abundance; + int abund2 = shared2->get(i).abundance; + + if(abund1 > 0 && abund2 > 0) + a++; + else if(abund1 > 0 && abund2 == 0) + b++; + else if(abund1 == 0 && abund2 > 0) + c++; + } + data[0] = (b+c)/(a+b+c); + + if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } + + return data; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedMarczewski class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedMarczewski class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************************/ diff --git a/sharedmarczewski.h b/sharedmarczewski.h new file mode 100644 index 0000000..55bc7e4 --- /dev/null +++ b/sharedmarczewski.h @@ -0,0 +1,28 @@ +#ifndef SHAREDMARCZEWSKI_H +#define SHAREDMARCZEWSKI_H + +/* + * sharedmarczewski.h + * Mothur + * + * Created by Thomas Ryabin on 4/8/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "calculator.h" + +/***********************************************************************/ + +class SharedMarczewski : public Calculator { + +public: + SharedMarczewski() : Calculator("sharedmarczewski", 1, false) {}; + EstOutput getValues(SAbundVector*) {return data;}; + EstOutput getValues(vector); +private: +}; + +/***********************************************************************/ + +#endif \ No newline at end of file diff --git a/summarycommand.cpp b/summarycommand.cpp index 39972ce..bd8deb1 100644 --- a/summarycommand.cpp +++ b/summarycommand.cpp @@ -22,6 +22,7 @@ #include "qstat.h" #include "bergerparker.h" #include "bstick.h" +#include "goodscoverage.h" #include "coverage.h" //********************************************************************************************************************** @@ -67,6 +68,8 @@ SummaryCommand::SummaryCommand(){ sumCalculators.push_back(new Bootstrap()); }else if (globaldata->Estimators[i] == "nseqs") { sumCalculators.push_back(new NSeqs()); + }else if (globaldata->Estimators[i] == "goodscoverage") { + sumCalculators.push_back(new GoodsCoverage()); } } } diff --git a/summarysharedcommand.cpp b/summarysharedcommand.cpp index b3e6650..a4a345e 100644 --- a/summarysharedcommand.cpp +++ b/summarysharedcommand.cpp @@ -29,6 +29,8 @@ #include "sharedlennon.h" #include "sharedmorisitahorn.h" #include "sharedbraycurtis.h" +#include "sharedjackknife.h" +#include "sharedwhittaker.h" //********************************************************************************************************************** diff --git a/validcalculator.cpp b/validcalculator.cpp index 70299cb..f6fe280 100644 --- a/validcalculator.cpp +++ b/validcalculator.cpp @@ -186,9 +186,9 @@ void ValidCalculators::initialSingle() { single["logseries"] = "logseries"; single["qstat"] = "qstat"; single["bstick"] = "bstick"; + single["goodscoverage"] = "goodscoverage"; single["nseqs"] = "nseqs"; single["coverage"] = "coverage"; - single["default"] = "default"; } catch(exception& e) { @@ -280,6 +280,7 @@ void ValidCalculators::initialSummary() { summary["qstat"] = "qstat"; summary["bstick"] = "bstick"; summary["nseqs"] = "nseqs"; + summary["goodscoverage"]= "goodscoverage"; summary["coverage"] = "coverage"; summary["default"] = "default"; } diff --git a/validcommands.cpp b/validcommands.cpp index 94845c5..4c72dce 100644 --- a/validcommands.cpp +++ b/validcommands.cpp @@ -17,6 +17,7 @@ ValidCommands::ValidCommands() { commands["read.dist"] = "read.dist"; commands["read.otu"] = "read.otu"; commands["read.tree"] = "read.tree"; + commands["read.seqs"] = "read.seqs"; commands["bin.seqs"] = "bin.seqs"; commands["get.oturep"] = "get.oturep"; commands["cluster"] = "cluster"; @@ -40,6 +41,7 @@ ValidCommands::ValidCommands() { commands["bootstrap.shared"] = "bootstrap.shared"; commands["concensus"] = "concensus"; commands["help"] = "help"; + commands["filter.seqs"] = "filter.seqs"; commands["quit"] = "quit"; diff --git a/validparameter.cpp b/validparameter.cpp index 4234d2d..2d070cb 100644 --- a/validparameter.cpp +++ b/validparameter.cpp @@ -37,16 +37,13 @@ bool ValidParameters::isValidParameter(string parameter, string command, string bool valid = false; vector cParams = commandParameters[command]; int numParams = cParams.size(); - for(int i = 0; i < numParams; i++) - { - if(cParams.at(i).compare(parameter) == 0) - { + for(int i = 0; i < numParams; i++) { + if(cParams.at(i).compare(parameter) == 0) { valid = true; i = numParams; } } - if(!valid) - { + if(!valid) { cout << "'" << parameter << "' is not a valid parameter for the " << command << " command.\n"; cout << "The valid paramters for the " << command << " command are: "; for(int i = 0; i < numParams-1; i++) @@ -65,8 +62,7 @@ bool ValidParameters::isValidParameter(string parameter, string command, string vector values; splitAtDash(value, values); - for(int i = 0; i < values.size(); i++) - { + for(int i = 0; i < values.size(); i++) { value = values.at(i); valid = convertTest(value, pVal); @@ -79,12 +75,10 @@ bool ValidParameters::isValidParameter(string parameter, string command, string Special Cases *********************************************************************************************************/ - if(parameter.compare("precision") == 0) - { + if(parameter.compare("precision") == 0) { double logNum = log10((double)pVal); double diff = (double)((int)logNum - logNum); - if(diff != 0) - { + if(diff != 0) { cout << "The precision parameter can only take powers of 10 as a value (e.g. 10,1000,1000, etc.)\n"; return false; } @@ -110,8 +104,7 @@ bool ValidParameters::isValidParameter(string parameter, string command, string c = 0; else if(range.at(4).compare("only") == 0) c = 1; - else - { + else { cout << "The range can only be 'between' or 'only' the bounding numbers.\n"; return false; } @@ -120,8 +113,7 @@ bool ValidParameters::isValidParameter(string parameter, string command, string d = 0; else if(range.at(0).compare(">=") == 0 || range[3].compare("=>") == 0) d = 1; - else - { + else { cout << "The parameter value can only be '>', '>=', or '=>' the lower bounding number.\n"; return false; } @@ -130,8 +122,7 @@ bool ValidParameters::isValidParameter(string parameter, string command, string e = 0; else if(range.at(2).compare("<=") == 0 || range[4].compare("=<") == 0) e = 1; - else - { + else { cout << "The parameter value can only be '<', '<=', or '=<' the upper bounding number.\n"; return false; } @@ -141,24 +132,20 @@ bool ValidParameters::isValidParameter(string parameter, string command, string bool b0 = pVal < b; bool b1 = pVal <= b; - if(c != 1) - { - if(a != piSentinel && b == piSentinel) - { + if(c != 1) { + if(a != piSentinel && b == piSentinel) { if(d == 0) valid = a0; else valid = a1; } - else if(a == piSentinel && b != piSentinel) - { + else if(a == piSentinel && b != piSentinel) { if(e == 0) valid = b0; else valid = b1; } - else - { + else { if(d == 0 && e == 0) valid = (a0 && b0); else if(d == 0 && e == 1) @@ -169,8 +156,7 @@ bool ValidParameters::isValidParameter(string parameter, string command, string valid = (a1 && b1); } } - else - { + else { if(a != piSentinel && b == piSentinel) valid = (pVal == a); else if(a == piSentinel && b != piSentinel) @@ -180,30 +166,26 @@ bool ValidParameters::isValidParameter(string parameter, string command, string } - if(!valid) - { + if(!valid) { cout << "The '" << parameter << "' parameter needs to be "; if(c == 1) cout << "either '" << a << "' or '" << b << "'.\n"; - else - { - if(a != piSentinel) - { + else { + if(a != piSentinel) { cout << ">"; if(d != 0) cout << "="; cout << " '" << a << "'"; } if(b == piSentinel) - cout << ".\n"; + cout << "'.\n"; else if(a != piSentinel) cout << " and "; - if(b != piSentinel) - { + if(b != piSentinel) { cout << "<"; if(e != 0) cout << "="; - cout << " '" << b << ".\n"; + cout << " '" << b << "'.\n"; } } return false; @@ -238,6 +220,9 @@ void ValidParameters::initCommandParameters() { string readtreeArray[] = {"tree","group"}; commandParameters["read.tree"] = addParameters(readtreeArray, sizeof(readtreeArray)/sizeof(string)); + string readseqsArray[] = {"fasta","phylip","clustal","nexus","line"}; + commandParameters["read.seqs"] = addParameters(readseqsArray, sizeof(readseqsArray)/sizeof(string)); + string clusterArray[] = {"cutoff","precision","method"}; commandParameters["cluster"] = addParameters(clusterArray, sizeof(clusterArray)/sizeof(string)); @@ -285,6 +270,9 @@ void ValidParameters::initCommandParameters() { string heatmapArray[] = {"groups","line","label","sorted","scale"}; commandParameters["heatmap"] = addParameters(heatmapArray, sizeof(heatmapArray)/sizeof(string)); + + string filterseqsArray[] = {"trump", "soft", "filter"}; + commandParameters["filter.seqs"] = addParameters(filterseqsArray, sizeof(filterseqsArray)/sizeof(string)); string vennArray[] = {"groups","line","label","calc"}; commandParameters["venn"] = addParameters(vennArray, sizeof(vennArray)/sizeof(string)); @@ -345,7 +333,7 @@ void ValidParameters::initParameterRanges() { string jumbleArray[] = {">","0", "<","1", "only"}; parameterRanges["jumble"] = addParameters(jumbleArray, rangeSize); - string freqArray[] = {">","1", "<","NA", "between"}; + string freqArray[] = {">=","1", "<","NA", "between"}; parameterRanges["freq"] = addParameters(freqArray, rangeSize); //string lineArray[] = {">=","1", "<","NA", "between"}; @@ -353,6 +341,9 @@ void ValidParameters::initParameterRanges() { string abundArray[] = {">=","5", "<","NA", "between"}; parameterRanges["abund"] = addParameters(abundArray, rangeSize); + + string softArray[] = {">=","0", "<=","100", "between"}; + parameterRanges["soft"] = addParameters(softArray, rangeSize); } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ValidParameters class Function isValidParameter. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";