From c196b6b4768ccb84955d773ff0f22e4994d1ba7b Mon Sep 17 00:00:00 2001 From: ryabin Date: Mon, 4 May 2009 22:20:46 +0000 Subject: [PATCH] *** empty log message *** --- bergerparker.cpp | 4 +- bstick.cpp | 12 +- calculator.cpp | 8 +- collect.cpp | 2 + collectcommand.cpp | 8 +- collectsharedcommand.cpp | 3 + commandfactory.cpp | 5 + errorchecking.cpp | 35 ++++- errorchecking.h | 5 +- filterseqscommand.cpp | 160 ++++++++++++++++++++ filterseqscommand.h | 48 ++++++ geom.cpp | 12 +- getlabelcommand.cpp | 6 +- getlinecommand.cpp | 6 +- globaldata.cpp | 36 ++++- globaldata.hpp | 17 ++- goodscoverage.cpp | 39 +++++ goodscoverage.h | 31 ++++ logsd.cpp | 20 +-- qstat.cpp | 13 +- readclustal.cpp | 80 ++++++++++ readclustal.h | 38 +++++ readdistcommand.cpp | 1 + readfasta.cpp | 68 +++++++++ readfasta.h | 39 +++++ readnexus.cpp | 75 ++++++++++ readnexus.h | 38 +++++ readnexusal.h | 38 +++++ readphylip.cpp | 305 +++++++++++++++++++-------------------- readseqscommand.cpp | 80 ++++++++++ readseqscommand.h | 55 +++++++ readseqsphylip.cpp | 128 ++++++++++++++++ readseqsphylip.h | 38 +++++ sequence.cpp | 64 ++++---- sequence.hpp | 14 +- sequencedb.cpp | 94 ++++++++++++ sequencedb.h | 46 ++++++ sharedjackknife.cpp | 155 ++++++++++++++++++++ sharedjackknife.h | 40 +++++ sharedkstest.cpp | 6 +- sharedmarczewski.cpp | 50 +++++++ sharedmarczewski.h | 28 ++++ summarycommand.cpp | 3 + summarysharedcommand.cpp | 2 + validcalculator.cpp | 3 +- validcommands.cpp | 2 + validparameter.cpp | 69 ++++----- 47 files changed, 1735 insertions(+), 294 deletions(-) create mode 100644 filterseqscommand.cpp create mode 100644 filterseqscommand.h create mode 100644 goodscoverage.cpp create mode 100644 goodscoverage.h create mode 100644 readclustal.cpp create mode 100644 readclustal.h create mode 100644 readfasta.cpp create mode 100644 readfasta.h create mode 100644 readnexus.cpp create mode 100644 readnexus.h create mode 100644 readnexusal.h create mode 100644 readseqscommand.cpp create mode 100644 readseqscommand.h create mode 100644 readseqsphylip.cpp create mode 100644 readseqsphylip.h create mode 100644 sequencedb.cpp create mode 100644 sequencedb.h create mode 100644 sharedjackknife.cpp create mode 100644 sharedjackknife.h create mode 100644 sharedmarczewski.cpp create mode 100644 sharedmarczewski.h diff --git a/bergerparker.cpp b/bergerparker.cpp index c9f3565..b900ce1 100644 --- a/bergerparker.cpp +++ b/bergerparker.cpp @@ -23,11 +23,11 @@ EstOutput BergerParker::getValues(SAbundVector* rank){ return data; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the NPShannon class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the BergerParker class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the NPShannon class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the BergerParker class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } diff --git a/bstick.cpp b/bstick.cpp index eabf087..c3ff58f 100644 --- a/bstick.cpp +++ b/bstick.cpp @@ -25,8 +25,7 @@ RAbundVector BStick::getRAbundVector(SAbundVector* rank){ int nb = 0; int ns = 0; - for(int i = rank->size()-1; i > 0; i--) - { + for(int i = rank->size()-1; i > 0; i--) { int cur = rank->get(i); if(mr == 1 && cur > 0) mr = i; @@ -55,8 +54,7 @@ EstOutput BStick::getValues(SAbundVector* rank){ double sumObs = 0; double maxDiff = 0; - for(int i = 0; i < rdata.size(); i++) - { + for(int i = 0; i < rdata.size(); i++) { sumObs += rdata.get(i); sumExp += numInd/numSpec*invSum(i+1,numSpec); double diff = fabs(sumObs-sumExp); @@ -64,6 +62,7 @@ EstOutput BStick::getValues(SAbundVector* rank){ maxDiff = diff; } + data[0] = maxDiff/numInd; data[1] = 0.886/sqrt(rdata.size()); data[2] = 1.031/sqrt(rdata.size()); @@ -71,6 +70,7 @@ EstOutput BStick::getValues(SAbundVector* rank){ /*cout << critVal << "\n"; cout << "If D-Statistic is less than the critical value then the data fits the Broken Stick model w/ 95% confidence.\n\n";*/ + if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } if (isnan(data[2]) || isinf(data[2])) { data[2] = 0; } @@ -78,11 +78,11 @@ EstOutput BStick::getValues(SAbundVector* rank){ return data; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the NPShannon class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the BStick class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the NPShannon class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the BStick class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } diff --git a/calculator.cpp b/calculator.cpp index 71aa99c..c45ddbf 100644 --- a/calculator.cpp +++ b/calculator.cpp @@ -863,7 +863,7 @@ double TDTable::getConfLimit(int row, int col) //Rows are the degrees of freedom //Found on http://www.vgtu.lt/leidiniai/elektroniniai/Probability.pdf/Table%203.pdf //Confidence Level .90 .95 .975 .99 .995 .999 .9995 - double values[33][7] = {{3.078, 6.314, 12.706, 31.821, 63.656, 318.289, 636.578}, + double values[30][7] = {{3.078, 6.314, 12.706, 31.821, 63.656, 318.289, 636.578}, {1.886, 2.920, 4.303, 6.965, 9.925, 22.328, 31.600}, {1.638, 2.353, 3.182, 4.541, 5.841, 10.214, 12.924}, {1.533, 2.132, 2.776, 3.747, 4.604, 7.173, 8.610}, @@ -892,10 +892,8 @@ double TDTable::getConfLimit(int row, int col) //Rows are the degrees of freedom {1.314, 1.703, 2.052, 2.473, 2.771, 3.421, 3.689}, {1.313, 1.701, 2.048, 2.467, 2.763, 3.408, 3.674}, {1.311, 1.699, 2.045, 2.462, 2.756, 3.396, 3.660}, - {1.310, 1.697, 2.042, 2.457, 2.750, 3.385, 3.646}, - {1.296, 1.671, 2.000, 2.390, 2.660, 3.232, 3.460}, - {1.289, 1.658, 1.980, 2.358, 2.617, 3.160, 3.373}, - {1.282, 1.645, 1.960, 2.326, 2.576, 3.091, 3.291}}; + {1.310, 1.697, 2.042, 2.457, 2.750, 3.385, 3.646}}; + return values[row][col]; } catch(exception& e) { diff --git a/collect.cpp b/collect.cpp index d109ca0..ee46ef5 100644 --- a/collect.cpp +++ b/collect.cpp @@ -111,7 +111,9 @@ try { //calculate at 0 and the given increment if((i == 0) || (i+1) % increment == 0){ + //how many comparisons to make i.e. for group a, b, c = ab, ac, bc. + int n = 1; for (int k = 0; k < (lookup.size() - 1); k++) { // pass cdd each set of groups to commpare for (int l = n; l < lookup.size(); l++) { diff --git a/collectcommand.cpp b/collectcommand.cpp index 2687f92..098ec1f 100644 --- a/collectcommand.cpp +++ b/collectcommand.cpp @@ -22,8 +22,12 @@ #include "logsd.h" #include "bergerparker.h" #include "bstick.h" +#include "goodscoverage.h" + + #include "coverage.h" + //********************************************************************************************************************** CollectCommand::CollectCommand(){ try { @@ -64,7 +68,9 @@ CollectCommand::CollectCommand(){ }else if (globaldata->Estimators[i] == "bergerparker") { cDisplays.push_back(new CollectDisplay(new BergerParker(), new OneColumnFile(fileNameRoot+"bergerparker"))); }else if (globaldata->Estimators[i] == "bstick") { - cDisplays.push_back(new CollectDisplay(new BStick(), new OneColumnFile(fileNameRoot+"bstick"))); + cDisplays.push_back(new CollectDisplay(new BStick(), new ThreeColumnFile(fileNameRoot+"bstick"))); + }else if (globaldata->Estimators[i] == "goodscoverage") { + cDisplays.push_back(new CollectDisplay(new GoodsCoverage(), new OneColumnFile(fileNameRoot+"goodscoverage"))); } } } diff --git a/collectsharedcommand.cpp b/collectsharedcommand.cpp index bc0609e..96e495a 100644 --- a/collectsharedcommand.cpp +++ b/collectsharedcommand.cpp @@ -29,6 +29,8 @@ #include "sharedlennon.h" #include "sharedmorisitahorn.h" #include "sharedbraycurtis.h" +#include "sharedjackknife.h" +#include "sharedwhittaker.h" @@ -74,6 +76,7 @@ CollectSharedCommand::CollectSharedCommand(){ cDisplays.push_back(new CollectDisplay(new Whittaker(), new SharedOneColumnFile(fileNameRoot+"whittaker"))); }else if (globaldata->Estimators[i] == "sharednseqs") { cDisplays.push_back(new CollectDisplay(new SharedNSeqs(), new SharedOneColumnFile(fileNameRoot+"shared.nseqs"))); + }else if (globaldata->Estimators[i] == "ochiai") { cDisplays.push_back(new CollectDisplay(new Ochiai(), new SharedOneColumnFile(fileNameRoot+"ochiai"))); }else if (globaldata->Estimators[i] == "anderberg") { diff --git a/commandfactory.cpp b/commandfactory.cpp index 1f6f828..e357e6c 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -11,6 +11,7 @@ #include "readdistcommand.h" #include "readtreecommand.h" #include "readotucommand.h" +#include "readseqscommand.h" #include "clustercommand.h" #include "parselistcommand.h" #include "collectcommand.h" @@ -31,6 +32,8 @@ #include "unifracweightedcommand.h" #include "libshuffcommand.h" #include "heatmapcommand.h" +#include "filterseqscommand.h" +#include "mothur.h" #include "venncommand.h" #include "nocommands.h" #include "binsequencecommand.h" @@ -63,6 +66,7 @@ Command* CommandFactory::getCommand(string commandName){ if(commandName == "read.dist") { command = new ReadDistCommand(); } else if(commandName == "read.otu") { command = new ReadOtuCommand(); } + else if(commandName == "read.seqs") { command = new ReadSeqsCommand(); } else if(commandName == "read.tree") { command = new ReadTreeCommand(); } else if(commandName == "cluster") { command = new ClusterCommand(); } else if(commandName == "deconvolute") { command = new DeconvoluteCommand(); } @@ -82,6 +86,7 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "get.line") { command = new GetlineCommand(); } else if(commandName == "libshuff") { command = new LibShuffCommand(); } else if(commandName == "heatmap") { command = new HeatMapCommand(); } + else if(commandName == "filter.seqs") { command = new FilterSeqsCommand(); } else if(commandName == "venn") { command = new VennCommand(); } else if(commandName == "bin.seqs") { command = new BinSeqCommand(); } else if(commandName == "get.oturep") { command = new GetOTURepCommand(); } diff --git a/errorchecking.cpp b/errorchecking.cpp index 728d4d2..eaf49df 100644 --- a/errorchecking.cpp +++ b/errorchecking.cpp @@ -23,6 +23,7 @@ ErrorCheck::ErrorCheck() { /******************************************************/ void ErrorCheck::refresh() { + //columnfile = globaldata->getColumnFile(); //phylipfile = globaldata->getPhylipFile(); //listfile = globaldata->getListFile(); @@ -38,6 +39,7 @@ void ErrorCheck::refresh() { //method = globaldata->getMethod(); //randomtree = globaldata->getRandomTree(); //sharedfile = globaldata->getSharedFile(); + } /*******************************************************/ @@ -74,7 +76,6 @@ bool ErrorCheck::checkInput(string input) { //is it a valid command if (validCommand->isValidCommand(commandName) != true) { return false; } - string parameter, value; //reads in parameters and values @@ -94,9 +95,11 @@ bool ErrorCheck::checkInput(string input) { if (parameter == "name" ) { namefile = value; } if (parameter == "order" ) { orderfile = value; } if (parameter == "fasta" ) { fastafile = value; } + if (parameter == "nexus" ) { nexusfile = value; } + if (parameter == "clustal" ) { clustalfile = value; } if (parameter == "tree" ) { treefile = value; } - if (parameter == "group" ) { groupfile = value; } - if (parameter == "shared" ) { sharedfile = value; } + if (parameter == "group" ) { groupfile = value; } + if (parameter == "shared" ) { sharedfile = value; } if (parameter == "cutoff" ) { cutoff = value; } if (parameter == "precision" ) { precision = value; } if (parameter == "iters" ) { iters = value; } @@ -107,10 +110,13 @@ bool ErrorCheck::checkInput(string input) { if (parameter == "line" ) { line = value; } if (parameter == "label" ) { label = value; } if (parameter == "abund" ) { abund = value; } - if (parameter == "random" ) { randomtree = value; } - if (parameter == "sorted" ) { sorted = value; } + if (parameter == "random" ) { randomtree = value; } + if (parameter == "sorted" ) { sorted = value; } + if (parameter == "trump" ) { trump = value; } + if (parameter == "soft" ) { soft = value; } + if (parameter == "filter" ) { filter = value; } if (parameter == "scale" ) { scale = value; } - + } //gets the last parameter and value @@ -131,6 +137,8 @@ bool ErrorCheck::checkInput(string input) { if (parameter == "group" ) { groupfile = value; } if (parameter == "shared" ) { sharedfile = value; } if (parameter == "fasta" ) { fastafile = value; } + if (parameter == "nexus" ) { nexusfile = value; } + if (parameter == "clustal" ) { clustalfile = value; } if (parameter == "tree" ) { treefile = value; } if (parameter == "cutoff" ) { cutoff = value; } if (parameter == "precision" ) { precision = value; } @@ -144,7 +152,11 @@ bool ErrorCheck::checkInput(string input) { if (parameter == "random" ) { randomtree = value; } if (parameter == "abund" ) { abund = value; } if (parameter == "sorted" ) { sorted = value; } + if (parameter == "trump" ) { trump = value; } + if (parameter == "soft" ) { soft = value; } + if (parameter == "filter" ) { filter = value; } if (parameter == "scale" ) { scale = value; } + } } @@ -172,6 +184,8 @@ bool ErrorCheck::checkInput(string input) { } }else if (commandName == "read.tree") { validateTreeFiles(); //checks the treefile and groupfile parameters + }else if (commandName == "read.seqs") { + if ((fastafile == "") && (nexusfile == "") && (clustalfile == "") && (phylipfile == "")) { cout << "You must enter a fastafile, nexusfile, or clustalfile with the read.seqs() command." << endl; return false; } }else if (commandName == "deconvolute") { if (fastafile == "") { cout << "You must enter a fastafile with the deconvolute() command." << endl; return false; } validateReadFiles(); @@ -230,6 +244,12 @@ bool ErrorCheck::checkInput(string input) { } } + if (commandName == "filter.seqs"){ + if ((globaldata->getFastaFile() == "") && (globaldata->getNexusFile() == "") && (globaldata->getClustalFile() == "") && (globaldata->getPhylipFile() == "")) { + cout << "You must read either a fasta, nexus, clustal, or phylip file before you can use the filter.seqs command." << endl; return false; + } + } + if ((commandName == "bin.seqs")) { if ((globaldata->getListFile() == "")) { cout << "You must read a list file before you can use the bin.seqs command." << endl; return false; } validateBinFiles(); @@ -551,6 +571,9 @@ void ErrorCheck::clear() { groupfile = ""; orderfile = ""; sharedfile = ""; + fastafile = ""; + nexusfile = ""; + clustalfile = ""; line = ""; label = ""; method = "furthest"; diff --git a/errorchecking.h b/errorchecking.h index 05ce5cf..bbc3050 100644 --- a/errorchecking.h +++ b/errorchecking.h @@ -33,8 +33,9 @@ class ErrorCheck { void validateBinFiles(); void clear(); void refresh(); - string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, treefile, sharedfile, cutoff, format; - string precision, method, fileroot, label, line, iters, jumble, freq, single, rarefaction, shared, summary, randomtree, abund, sorted, scale; + string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, nexusfile, clustalfile, treefile, sharedfile, cutoff, format; + string precision, method, fileroot, label, line, iters, jumble, freq, single, rarefaction, shared, summary, randomtree, abund, sorted, trump, soft, filter, scale; + string commandName, optionText; bool errorFree; diff --git a/filterseqscommand.cpp b/filterseqscommand.cpp new file mode 100644 index 0000000..aff959a --- /dev/null +++ b/filterseqscommand.cpp @@ -0,0 +1,160 @@ +/* + * filterseqscommand.cpp + * Mothur + * + * Created by Thomas Ryabin on 5/4/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "filterseqscommand.h" +#include +#include + +/**************************************************************************************/ +void FilterSeqsCommand::doTrump() { + //trump = globaldata->getTrump(); +// +// for(int i = 0; i < db->size(); i++) { +// Sequence cur = db->get(i); +// string curAligned = cur.getAligned(); +// +// for(int j = 0; j < curAligned.length-1; j++) { +// string curChar = curAligned.substr(j, j+1); +// +// if(curChar.compare(trump) == 0) +// columnsToRemove[j] = true; +// } +// } +} + +/**************************************************************************************/ +void FilterSeqsCommand::doSoft() { + //soft = atoi(globaldata->getSoft().c_str()); +// vector > columnSymbolSums; +// vector > columnSymbols; +// for(int i = 0; i < db->get(0).getLength(); i++) { +// vector symbols; +// vector sums; +// columnSymbols[i] = symbols; +// columnSymbolSums[i] = sums; +// } +// +// for(int i = 0; i < db->size(); i++) { +// Sequence cur = db->get(i); +// string curAligned = cur.getAligned(); +// +// for(int j = 0; j < curAligned.length-1; j++) { +// string curChar = curAligned.substr(j, j+1); +// vector curColumnSymbols = columnSymbols[j]; +// +// bool newSymbol = true; +// +// for(int k = 0; j < curColumnSymbols.size(); j++) +// if(curChar.compare(curColumnSymbols[k]) == 0) { +// newSymbol = false; +// columnSymbolSums[j][k]++; +// } +// +// if(newSymbol) { +// columnSymbols.push_back(curChar); +// columnSymbolSums[j].push_back(1); +// } +// } +// } +// +// for(int i = 0; i < columnSymbolSums.size(); i++) { +// int totalSum = 0; +// int max = 0; +// vector curColumn = columnSymbolSums[i]; +// +// for(int j = 0; j < curColumn.size(); j++) { +// int curSum = curColumn[j]; +// if(curSum > max) +// max = curSum; +// totalSum += curSum; +// } +// +// if((double)max/(double)totalSum * 100 < soft) +// columnsToRemove[i] = true; +// } +} +void FilterSeqsCommand::doFilter() {} +/**************************************************************************************/ +int FilterSeqsCommand::execute() { + try { + globaldata = GlobalData::getInstance(); + filename = globaldata->inputFileName; + + if(globaldata->getFastaFile().compare("") != 0) { + readFasta = new ReadFasta(filename); + readFasta->read(); + db = readFasta->getDB(); + } + + else if(globaldata->getNexusFile().compare("") != 0) { + readNexus = new ReadNexus(filename); + readNexus->read(); + db = readNexus->getDB(); + } + + else if(globaldata->getClustalFile().compare("") != 0) { + readClustal = new ReadClustal(filename); + readClustal->read(); + db = readClustal->getDB(); + } + + else if(globaldata->getPhylipFile().compare("") != 0) { + readPhylip = new ReadPhylip(filename); + readPhylip->read(); + db = readPhylip->getDB(); + } + + for(int i = 0; i < db->get(0).getLength(); i++) + columnsToRemove[i] = false; + + // Trump + if(globaldata->getTrump().compare("") != 0) { + + + } + + // Soft + if(globaldata->getSoft().compare("") != 0) {} + + + + + // Filter + //if(globaldata->getFilter().compare("") != 0) { +// +// filter = globaldata->getFilter(); +// ifstream filehandle; +// openInputFile(filter, filehandle); +// +// char c; +// int count = 0; +// while(!filehandle.eof()) { +// c = filehandle.get(); +// if(c == '0') +// columnsToRemove[count] = true; +// count++; +// } +// } + + + + + + return 0; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the DeconvoluteCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the DeconvoluteCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/**************************************************************************************/ diff --git a/filterseqscommand.h b/filterseqscommand.h new file mode 100644 index 0000000..fffa457 --- /dev/null +++ b/filterseqscommand.h @@ -0,0 +1,48 @@ +#ifndef FILTERSEQSCOMMAND_H +#define FILTERSEQSCOMMAND_H + +/* + * filterseqscommand.h + * Mothur + * + * Created by Thomas Ryabin on 5/4/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "command.hpp" +#include "mothur.h" +#include "globaldata.hpp" +#include "readfasta.h" +#include "readnexus.h" +#include "readclustal.h" +#include "readseqsphylip.h" + +using namespace std; + +class FilterSeqsCommand : public Command { + +public: + FilterSeqsCommand() {}; + ~FilterSeqsCommand() {}; + int execute(); + +private: + void doTrump(); + void doSoft(); + void doFilter(); + + GlobalData* globaldata; + string filename, trump, filter; + + ReadFasta* readFasta; + ReadNexus* readNexus; + ReadClustal* readClustal; + ReadPhylip* readPhylip; + + vector columnsToRemove; + SequenceDB* db; + double soft; +}; + +#endif \ No newline at end of file diff --git a/geom.cpp b/geom.cpp index 755c1ec..a23352c 100644 --- a/geom.cpp +++ b/geom.cpp @@ -21,8 +21,7 @@ RAbundVector Geom::getRAbundVector(SAbundVector* rank){ int nb = 0; int ns = 0; - for(int i = rank->size()-1; i > 0; i--) - { + for(int i = rank->size()-1; i > 0; i--) { int cur = rank->get(i); if(mr == 1 && cur > 0) mr = i; @@ -51,8 +50,7 @@ EstOutput Geom::getValues(SAbundVector* rank){ double k = .5; double step = .49999; - while(fabs(min - numInd*kEq(k, (double)numSpec)) > .0001) //This uses a binary search to find the value of k. - { + while(fabs(min - numInd*kEq(k, (double)numSpec)) > .0001) { //This uses a binary search to find the value of k. if(numInd*kEq(k, numSpec) > min) k += step; else @@ -86,7 +84,7 @@ EstOutput Geom::getValues(SAbundVector* rank){ /*cout << critVal << "\n"; cout << "If D-Statistic is less than the critical value then the data fits the Geometric Series model w/ 95% confidence.\n\n";*/ - + if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } if (isnan(data[2]) || isinf(data[2])) { data[2] = 0; } @@ -94,11 +92,11 @@ EstOutput Geom::getValues(SAbundVector* rank){ return data; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the NPShannon class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the Geom class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the NPShannon class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the Geom class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } diff --git a/getlabelcommand.cpp b/getlabelcommand.cpp index 1e27267..6d92b8e 100644 --- a/getlabelcommand.cpp +++ b/getlabelcommand.cpp @@ -41,12 +41,10 @@ int GetlabelCommand::execute(){ string label; int numBins = 0; int count = -1; - while(in.good()) - { + while(in.good()) { if(count > numBins) count = 0; - if(count == 0) - { + if(count == 0) { cout << label << "\n"; in >> numBins; } diff --git a/getlinecommand.cpp b/getlinecommand.cpp index 09b1654..df14f51 100644 --- a/getlinecommand.cpp +++ b/getlinecommand.cpp @@ -41,12 +41,10 @@ int GetlineCommand::execute(){ int numBins = 0; int count = -1; int line = 1; - while(in.good()) - { + while(in.good()) { if(count > numBins) count = 0; - if(count == 0) - { + if(count == 0) { cout << line << "\n"; in >> numBins; line++; diff --git a/globaldata.cpp b/globaldata.cpp index ce10ee1..6849b88 100644 --- a/globaldata.cpp +++ b/globaldata.cpp @@ -54,7 +54,9 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "list" ) { listfile = value; inputFileName = value; fileroot = value; format = "list"; } if (key == "rabund" ) { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; } if (key == "sabund" ) { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; } - if (key == "fasta" ) { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; } + if (key == "fasta" ) { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; } + if (key == "nexus" ) { nexusfile = value; inputFileName = value; fileroot = value; format = "nexus"; } + if (key == "clustal" ) { clustalfile = value; inputFileName = value; fileroot = value; format = "clustal"; } if (key == "tree" ) { treefile = value; inputFileName = value; fileroot = value; format = "tree"; } if (key == "shared" ) { sharedfile = value; inputFileName = value; fileroot = value; format = "sharedfile"; } if (key == "name" ) { namefile = value; } @@ -73,10 +75,16 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "step") { step = value; } if (key == "form") { form = value; } if (key == "sorted") { sorted = value; } + if (key == "vertical") { vertical = value; } + if (key == "trump") { trump = value; } + if (key == "filter") { filter = value; } + if (key == "soft") { soft = value; } if (key == "scale") { scale = value; } + + if (key == "line") {//stores lines to be used in a set lines.clear(); labels.clear(); @@ -111,6 +119,8 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "rabund" ) { rabundfile = value; inputFileName = value; fileroot = value; format = "rabund"; } if (key == "sabund" ) { sabundfile = value; inputFileName = value; fileroot = value; format = "sabund"; } if (key == "fasta" ) { fastafile = value; inputFileName = value; fileroot = value; format = "fasta"; } + if (key == "nexus" ) { nexusfile = value; inputFileName = value; fileroot = value; format = "nexus"; } + if (key == "clustal" ) { clustalfile = value; inputFileName = value; fileroot = value; format = "clustal"; } if (key == "tree" ) { treefile = value; inputFileName = value; fileroot = value; format = "tree"; } if (key == "shared" ) { sharedfile = value; inputFileName = value; fileroot = value; format = "sharedfile"; } if (key == "name" ) { namefile = value; } @@ -129,8 +139,15 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "step") { step = value; } if (key == "form") { form = value; } if (key == "sorted") { sorted = value; } + if (key == "vertical") { vertical = value; } + if (key == "trump") { trump = value; } + if (key == "filter") { filter = value; } + if (key == "soft") { soft = value; } if (key == "scale") { scale = value; } + + + if (key == "line") {//stores lines to be used in a vector lines.clear(); @@ -163,6 +180,7 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ //input defaults for calculators if (commandName == "collect.single") { + if ((calc == "default") || (calc == "")) { calc = "sobs-chao-ace-jack-shannon-npshannon-simpson"; } Estimators.clear(); splitAtDash(calc, Estimators); @@ -173,6 +191,7 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ splitAtDash(calc, Estimators); } if (commandName == "collect.shared") { + if ((calc == "default") || (calc == "")) { calc = "sharedsobs-sharedchao-sharedace-jabund-sorabund-jclass-sorclass-jest-sorest-thetayc-thetan"; } Estimators.clear(); splitAtDash(calc, Estimators); @@ -239,6 +258,8 @@ string GlobalData::getOrderFile() { return orderfile; } string GlobalData::getTreeFile() { return treefile; } string GlobalData::getSharedFile() { return sharedfile; } string GlobalData::getFastaFile() { return fastafile; } +string GlobalData::getNexusFile() { return nexusfile; } +string GlobalData::getClustalFile() { return clustalfile; } string GlobalData::getCutOff() { return cutoff; } string GlobalData::getFormat() { return format; } string GlobalData::getPrecision() { return precision; } @@ -253,7 +274,11 @@ string GlobalData::getGroups() { return groups; } string GlobalData::getStep() { return step; } string GlobalData::getForm() { return form; } string GlobalData::getSorted() { return sorted; } +string GlobalData::getTrump() { return trump; } +string GlobalData::getSoft() { return soft; } +string GlobalData::getFilter() { return filter; } string GlobalData::getScale() { return scale; } + void GlobalData::setListFile(string file) { listfile = file; inputFileName = file;} void GlobalData::setRabundFile(string file) { rabundfile = file; inputFileName = file;} void GlobalData::setSabundFile(string file) { sabundfile = file; inputFileName = file;} @@ -289,6 +314,8 @@ void GlobalData::clear() { groupfile = ""; orderfile = ""; fastafile = ""; + nexusfile = ""; + clustalfile = ""; treefile = ""; sharedfile = ""; cutoff = "10.00"; @@ -307,7 +334,12 @@ void GlobalData::clear() { step = "0.01"; form = "integral"; sorted = "T"; //F means don't sort, T means sort. - scale = "log10"; + vertical = ""; + trump = ""; + filter = ""; + soft = ""; + scale = "log10"; + } //*******************************************************/ diff --git a/globaldata.hpp b/globaldata.hpp index cc13bae..40b8758 100644 --- a/globaldata.hpp +++ b/globaldata.hpp @@ -39,7 +39,7 @@ public: GroupMap* gGroupmap; FullMatrix* gMatrix; TreeMap* gTreemap; - string inputFileName, helpRequest, commandName; + string inputFileName, helpRequest, commandName, vertical; bool allLines; vector Estimators, Groups; //holds estimators to be used set lines; //hold lines to be used @@ -55,6 +55,8 @@ public: string getGroupFile(); string getOrderFile(); string getFastaFile(); + string getNexusFile(); + string getClustalFile(); string getTreeFile(); string getSharedFile(); string getCutOff(); @@ -71,8 +73,15 @@ public: string getStep(); string getForm(); string getSorted(); + + string getTrump(); + string getSoft(); + string getFilter(); + + string getScale(); + void setListFile(string); void setPhylipFile(string); void setColumnFile(string); @@ -97,8 +106,10 @@ public: private: - string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, treefile, sharedfile, line, label, randomtree, groups; - string cutoff, format, precision, method, fileroot, iters, jumble, freq, calc, abund, step, form, sorted, scale; + + string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, nexusfile, clustalfile, treefile, sharedfile, line, label, randomtree, groups; + string cutoff, format, precision, method, fileroot, iters, jumble, freq, calc, abund, step, form, sorted, trump, soft, filter, scale; + static GlobalData* _uniqueInstance; GlobalData( const GlobalData& ); // Disable copy constructor diff --git a/goodscoverage.cpp b/goodscoverage.cpp new file mode 100644 index 0000000..07b78b3 --- /dev/null +++ b/goodscoverage.cpp @@ -0,0 +1,39 @@ +/* + * goodscoverage.cpp + * Mothur + * + * Created by Thomas Ryabin on 4/8/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "goodscoverage.h" +#include "calculator.h" + +/**********************************************************/ + +EstOutput GoodsCoverage::getValues(SAbundVector* rank){ + try { + data.resize(1,0); + + double numSingletons = rank->get(1); + double totalIndividuals = rank->getNumSeqs(); + + data[0] = 1 - numSingletons/totalIndividuals; + + if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } + + return data; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the GoodsCoverage class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the GoodsCoverage class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************************/ + diff --git a/goodscoverage.h b/goodscoverage.h new file mode 100644 index 0000000..b4065c0 --- /dev/null +++ b/goodscoverage.h @@ -0,0 +1,31 @@ +#ifndef GOODSCOVERAGE_H +#define GOODSCOVERAGE_H + +/* + * goodscoverage.h + * Mothur + * + * Created by Thomas Ryabin on 4/8/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ +#include "calculator.h" + +/* This class implements the LogSD estimator on single group. +It is a child of the calculator class. */ + +/***********************************************************************/ + +class GoodsCoverage : public Calculator { + +public: + GoodsCoverage() : Calculator("goodscoverage", 1, false) {}; + EstOutput getValues(SAbundVector*); + EstOutput getValues(vector) {return data;}; + +private: +}; + +/***********************************************************************/ + +#endif diff --git a/logsd.cpp b/logsd.cpp index e959ab8..782af2a 100644 --- a/logsd.cpp +++ b/logsd.cpp @@ -35,8 +35,7 @@ EstOutput LogSD::getValues(SAbundVector* rank){ double x = .5; double step = .4999999999; - while(fabs(snRatio - logS(x)) > .00001) //This uses a binary search to find the value of x. - { + while(fabs(snRatio - logS(x)) > .00001) { //This uses a binary search to find the value of x. if(logS(x) > snRatio) x += step; else @@ -51,15 +50,12 @@ EstOutput LogSD::getValues(SAbundVector* rank){ double octSumExp = 0; double sumExp = 0; double maxDiff = 0; - for(int y = 1; y < rank->size(); y++) - { - if(y - .5 < pow(2.0, oct)) - { + for(int y = 1; y < rank->size(); y++) { + if(y - .5 < pow(2.0, oct)) { octSumObs += rank->get(y); octSumExp += alpha*pow(x,y)/(y); } - else - { + else { sumObs += octSumObs; octSumObs = rank->get(y); @@ -68,8 +64,7 @@ EstOutput LogSD::getValues(SAbundVector* rank){ oct++; } - if(y == rank->size()-1) - { + if(y == rank->size()-1) { sumObs += octSumObs; sumExp += octSumExp; } @@ -85,6 +80,7 @@ EstOutput LogSD::getValues(SAbundVector* rank){ cout << ".05 confidence value = " << .89196/sqrt(numSpec) << "\n"; cout << "If D Test Statistic is greater than the critical value then the data fits the Log Series Distribution model w/ 95% confidence.\n\n";*/ + data[0] = (maxDiff + .5)/numSpec; data[1] = 0.886/sqrt(numSpec); data[2] = 1.031/sqrt(numSpec); @@ -96,11 +92,11 @@ EstOutput LogSD::getValues(SAbundVector* rank){ return data; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the NPShannon class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the LogSD class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the NPShannon class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the LogSD class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } diff --git a/qstat.cpp b/qstat.cpp index a2f821e..ed4471a 100644 --- a/qstat.cpp +++ b/qstat.cpp @@ -33,20 +33,17 @@ EstOutput QStat::getValues(SAbundVector* rank){ int r3Ind = 0; int sumSpec = 0; int iqSum = 0; - for(int i = 1; i < rank->size(); i++) - { + for(int i = 1; i < rank->size(); i++) { if(r1 != -1 && r3 != -1) i = rank->size(); sumSpec += rank->get(i); - if(r1 == -1 && sumSpec >= numSpec*.25) - { + if(r1 == -1 && sumSpec >= numSpec*.25) { r1 = rank->get(i); r1Ind = i; } - else if(r3 == -1 && sumSpec >= numSpec*.75) - { + else if(r3 == -1 && sumSpec >= numSpec*.75) { r3 = rank->get(i); r3Ind = i; } @@ -63,11 +60,11 @@ EstOutput QStat::getValues(SAbundVector* rank){ return data; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the NPShannon class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the QStat class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the NPShannon class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the QStat class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } } diff --git a/readclustal.cpp b/readclustal.cpp new file mode 100644 index 0000000..2211ccd --- /dev/null +++ b/readclustal.cpp @@ -0,0 +1,80 @@ +/* + * readclustal.cpp + * Mothur + * + * Created by Thomas Ryabin on 4/24/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "readclustal.h" +#include +#include + +/*******************************************************************************/ +ReadClustal::ReadClustal(string file) { + try { + openInputFile(file, filehandle); + clustalFile = file; + globaldata = GlobalData::getInstance(); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReadTree class function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/*******************************************************************************/ +ReadClustal::~ReadClustal(){ +// for(int i = 0; i < sequencedb.getNumSeqs(); i++) +// delete sequencedb.get(i); +} +/*******************************************************************************/ +void ReadClustal::read() { + string temp; + string name; + string sequence; + string firstName = ""; + for(int i = 0; i < 6; i++) + filehandle >> temp; + + int count = 0; + int numSeqs = 0; + bool firstDone = false; + + while(!filehandle.eof()) { + filehandle >> name; + if(numSeqs != 0) { + if(count == numSeqs) + count = 0; + } + else if(!firstDone && firstName.compare("") == 0) + firstName = name; + else if(!firstDone && firstName.compare(name) == 0) { + numSeqs = count; + firstDone = true; + count = 0; + } + + if(name.find_first_of("*") == -1) { + filehandle >> sequence; + if(!firstDone) { + Sequence newSeq(name, sequence); + sequencedb.add(newSeq); + } + else + sequencedb.set(count, sequencedb.get(count).getAligned() + sequence); + + count++; + } + } + filehandle.close(); +} + +/*********************************************************************************/ +SequenceDB* ReadClustal::getDB() { + return &sequencedb; +} diff --git a/readclustal.h b/readclustal.h new file mode 100644 index 0000000..8556486 --- /dev/null +++ b/readclustal.h @@ -0,0 +1,38 @@ +#ifndef READCLUSTAL_H +#define READCLUSTAL_H + +/* + * readclustal.h + * Mothur + * + * Created by Thomas Ryabin on 4/24/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ +using namespace std; + +#include "globaldata.hpp" +#include "sequencedb.h" +#include "mothur.h" + +/**********************************************************************************/ + +class ReadClustal { + + public: + ReadClustal(string); + ~ReadClustal(); + void read(); + SequenceDB* getDB(); + + private: + GlobalData* globaldata; + string clustalFile; + ifstream filehandle; + SequenceDB sequencedb; + int readOk; // readOk = 0 means success, readOk = 1 means error(s). + + +}; + +#endif \ No newline at end of file diff --git a/readdistcommand.cpp b/readdistcommand.cpp index f19bf11..0e45ad5 100644 --- a/readdistcommand.cpp +++ b/readdistcommand.cpp @@ -10,6 +10,7 @@ #include "readdistcommand.h" #include "readphylip.h" #include "readcolumn.h" +#include "readmatrix.hpp" ReadDistCommand::ReadDistCommand(){ try { diff --git a/readfasta.cpp b/readfasta.cpp new file mode 100644 index 0000000..ff39e01 --- /dev/null +++ b/readfasta.cpp @@ -0,0 +1,68 @@ +/* + * readfasta.cpp + * Mothur + * + * Created by Thomas Ryabin on 4/21/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "readfasta.h" +#include +#include + +/*******************************************************************************/ +ReadFasta::ReadFasta(string file) { + try { + openInputFile(file, filehandle); + fastaFile = file; + globaldata = GlobalData::getInstance(); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReadTree class function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/*******************************************************************************/ +ReadFasta::~ReadFasta(){ + //for(int i = 0; i < sequencedb.getNumSeqs(); i++) + //delete sequencedb.get(i); +} +/*******************************************************************************/ +void ReadFasta::read() { + string name = ""; + string sequence = ""; + string temp; + int count = 0; + while(!filehandle.eof()){ + if(count == 0) + filehandle >> temp; + if(temp.substr(0,1).compare(">") == 0) { + if(count != 0) { + Sequence newSequence(name, sequence); + sequencedb.add(newSequence); + sequence = ""; + } + else + count++; + name = temp.substr(1,temp.length()-1); + } + else + sequence += temp; + + filehandle >> temp; + } + Sequence newSequence(name, sequence); + sequencedb.add(newSequence); + + filehandle.close(); +} + +/*********************************************************************************/ +SequenceDB* ReadFasta::getDB() { + return &sequencedb; +} diff --git a/readfasta.h b/readfasta.h new file mode 100644 index 0000000..a4fe436 --- /dev/null +++ b/readfasta.h @@ -0,0 +1,39 @@ +#ifndef READFASTA_H +#define READFASTA_H + +/* + * readfasta.h + * Mothur + * + * Created by Thomas Ryabin on 4/21/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +using namespace std; + +#include "globaldata.hpp" +#include "sequencedb.h" +#include "mothur.h" + +/**********************************************************************************/ + +class ReadFasta { + + public: + ReadFasta(string); + ~ReadFasta(); + void read(); + SequenceDB* getDB(); + + private: + GlobalData* globaldata; + string fastaFile; + ifstream filehandle; + SequenceDB sequencedb; + int readOk; // readOk = 0 means success, readOk = 1 means error(s). + + +}; + +#endif \ No newline at end of file diff --git a/readnexus.cpp b/readnexus.cpp new file mode 100644 index 0000000..bc91e16 --- /dev/null +++ b/readnexus.cpp @@ -0,0 +1,75 @@ +/* + * readnexusaln.cpp + * Mothur + * + * Created by Thomas Ryabin on 4/22/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "readnexus.h" +#include +#include + +/*******************************************************************************/ +ReadNexus::ReadNexus(string file) { + try { + openInputFile(file, filehandle); + nexusFile = file; + globaldata = GlobalData::getInstance(); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReadTree class function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/*******************************************************************************/ +ReadNexus::~ReadNexus(){ +// for(int i = 0; i < sequencedb.getNumSeqs(); i++) +// delete sequencedb.get(i); +} +/*******************************************************************************/ +void ReadNexus::read() { + string temp; + string name; + string sequence; + for(int i = 0; i < 5; i++) + filehandle >> temp; + + int numSeqs = atoi(temp.substr(temp.find_first_of("=")+1, temp.length() - temp.find_first_of("=") - 1).c_str()); + + for(int i = 0; i < 9; i++) + filehandle >> temp; + + int count = 0; + bool firstDone = false; + while(!filehandle.eof()){ + filehandle >> name; + filehandle >> sequence; + if(name.compare(";") != 0) { + if(!firstDone) { + Sequence newSeq(name, sequence); + sequencedb.add(newSeq); + } + else + sequencedb.set(count, sequencedb.get(count).getAligned() + sequence); + + count++; + if(count == numSeqs) { + if(!firstDone) + firstDone = true; + count = 0; + } + } + } + filehandle.close(); +} + +/*********************************************************************************/ +SequenceDB* ReadNexus::getDB() { + return &sequencedb; +} diff --git a/readnexus.h b/readnexus.h new file mode 100644 index 0000000..6cd4973 --- /dev/null +++ b/readnexus.h @@ -0,0 +1,38 @@ +#ifndef READNEXUS_H +#define READNEXUS_H + +/* + * readnuxus.h + * Mothur + * + * Created by Thomas Ryabin on 4/22/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ +using namespace std; + +#include "globaldata.hpp" +#include "sequencedb.h" +#include "mothur.h" + +/**********************************************************************************/ + +class ReadNexus { + + public: + ReadNexus(string); + ~ReadNexus(); + void read(); + SequenceDB* getDB(); + + private: + GlobalData* globaldata; + string nexusFile; + ifstream filehandle; + SequenceDB sequencedb; + int readOk; // readOk = 0 means success, readOk = 1 means error(s). + + +}; + +#endif \ No newline at end of file diff --git a/readnexusal.h b/readnexusal.h new file mode 100644 index 0000000..455044a --- /dev/null +++ b/readnexusal.h @@ -0,0 +1,38 @@ +#ifndef READNEXUSALN_H +#define READNEXUSALN_H + +/* + * readnexusaln.h + * Mothur + * + * Created by Thomas Ryabin on 4/22/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +using namespace std; + +#include "globaldata.hpp" +#include "sequencedb.h" +#include "utilities.hpp" + +/**********************************************************************************/ + +class ReadNexus { + + public: + ReadNexus(string); + ~ReadNexus(); + void read(); + + private: + GlobalData* globaldata; + string nexusFile; + ifstream filehandle; + SequenceDB sequencedb; + int readOk; // readOk = 0 means success, readOk = 1 means error(s). + + +}; + +#endif \ No newline at end of file diff --git a/readphylip.cpp b/readphylip.cpp index 174b7e3..bc39b52 100644 --- a/readphylip.cpp +++ b/readphylip.cpp @@ -13,174 +13,173 @@ /***********************************************************************/ ReadPhylipMatrix::ReadPhylipMatrix(string distFile){ - - successOpen = openInputFile(distFile, fileHandle); - + + successOpen = openInputFile(distFile, fileHandle); + } /***********************************************************************/ void ReadPhylipMatrix::read(NameAssignment* nameMap){ - try { - - float distance; - int square, nseqs; - string name; - vector matrixNames; - - fileHandle >> nseqs >> name; + try { + + float distance; + int square, nseqs; + string name; + vector matrixNames; + + fileHandle >> nseqs >> name; - matrixNames.push_back(name); + matrixNames.push_back(name); - if(nameMap == NULL){ - list = new ListVector(nseqs); - list->set(0, name); - } - else{ - list = new ListVector(nameMap->getListVector()); - if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; } - } - - char d; - while((d=fileHandle.get()) != EOF){ - - if(isalnum(d)){ - square = 1; - fileHandle.putback(d); - for(int i=0;i> distance; - } - break; - } - if(d == '\n'){ - square = 0; - break; - } - } - - Progress* reading; - - if(square == 0){ + if(nameMap == NULL){ + list = new ListVector(nseqs); + list->set(0, name); + } + else{ + list = new ListVector(nameMap->getListVector()); + if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; } + } + + char d; + while((d=fileHandle.get()) != EOF){ + + if(isalnum(d)){ + square = 1; + fileHandle.putback(d); + for(int i=0;i> distance; + } + break; + } + if(d == '\n'){ + square = 0; + break; + } + } + + Progress* reading; + + if(square == 0){ - reading = new Progress("Reading matrix: ", nseqs * (nseqs - 1) / 2); - - int index = 0; - - for(int i=1;i> name; - matrixNames.push_back(name); - - //there's A LOT of repeated code throughout this method... - if(nameMap == NULL){ - list->set(i, name); - - for(int j=0;j> distance; - - if (distance == -1) { distance = 1000000; } - - if(distance < cutoff){ - PCell value(i, j, distance); - D->addCell(value); - } - index++; - reading->update(index); - } - - } - else{ - if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; } - - for(int j=0;j> distance; - - if (distance == -1) { distance = 1000000; } - - if(distance < cutoff){ - PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance); - D->addCell(value); - } - index++; - reading->update(index); - } - } - } - } - else{ + reading = new Progress("Reading matrix: ", nseqs * (nseqs - 1) / 2); + + int index = 0; + + for(int i=1;i> name; + matrixNames.push_back(name); + + //there's A LOT of repeated code throughout this method... + if(nameMap == NULL){ + list->set(i, name); + + for(int j=0;j> distance; + + if (distance == -1) { distance = 1000000; } + + if(distance < cutoff){ + PCell value(i, j, distance); + D->addCell(value); + } + index++; + reading->update(index); + } + + } + else{ + if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; } + + for(int j=0;j> distance; + + if (distance == -1) { distance = 1000000; } + + if(distance < cutoff){ + PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance); + D->addCell(value); + } + index++; + reading->update(index); + } + } + } + } + else{ - reading = new Progress("Reading matrix: ", nseqs * nseqs); - - int index = nseqs; - - for(int i=1;i> name; - matrixNames.push_back(name); - - if(nameMap == NULL){ - list->set(i, name); - for(int j=0;j> distance; - - if (distance == -1) { distance = 1000000; } - - if(distance < cutoff && j < i){ - PCell value(i, j, distance); - D->addCell(value); - } - index++; - reading->update(index); - } - - } - else{ - if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; } - - for(int j=0;j> distance; - - if (distance == -1) { distance = 1000000; } - - if(distance < cutoff && j < i){ - PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance); - D->addCell(value); - } - index++; - reading->update(index); - } - } - } - } - reading->finish(); - delete reading; + reading = new Progress("Reading matrix: ", nseqs * nseqs); + + int index = nseqs; + + for(int i=1;i> name; + matrixNames.push_back(name); + + if(nameMap == NULL){ + list->set(i, name); + for(int j=0;j> distance; + + if (distance == -1) { distance = 1000000; } + + if(distance < cutoff && j < i){ + PCell value(i, j, distance); + D->addCell(value); + } + index++; + reading->update(index); + } + + } + else{ + if(nameMap->count(name)==0){ cout << "Error: Sequence '" << name << "' was not found in the names file, please correct" << endl; } + + for(int j=0;j> distance; + + if (distance == -1) { distance = 1000000; } + + if(distance < cutoff && j < i){ + PCell value(nameMap->get(matrixNames[i]), nameMap->get(matrixNames[j]), distance); + D->addCell(value); + } + index++; + reading->update(index); + } + } + } + } + reading->finish(); + delete reading; - list->setLabel("0"); - fileHandle.close(); + list->setLabel("0"); + fileHandle.close(); - if(nameMap != NULL){ - for(int i=0;ierase(matrixNames[i]); - } - if(nameMap->size() > 0){ - //should probably tell them what is missing if we missed something - cout << "missed something" << '\t' << nameMap->size() << endl; - } - } + if(nameMap != NULL){ + for(int i=0;ierase(matrixNames[i]); + } + if(nameMap->size() > 0){ + //should probably tell them what is missing if we missed something + cout << "missed something" << '\t' << nameMap->size() << endl; + } + } - } - catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the ReadPhylipMatrix class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } - catch(...) { - cout << "An unknown error has occurred in the ReadPhylipMatrix class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; - exit(1); - } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReadPhylipMatrix class Function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReadPhylipMatrix class function read. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } } /***********************************************************************/ ReadPhylipMatrix::~ReadPhylipMatrix(){ - delete D; - delete list; + delete D; + delete list; } - diff --git a/readseqscommand.cpp b/readseqscommand.cpp new file mode 100644 index 0000000..919855b --- /dev/null +++ b/readseqscommand.cpp @@ -0,0 +1,80 @@ +/* + * readseqscommand.cpp + * Mothur + * + * Created by Thomas Ryabin on 4/13/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "readseqscommand.h" + +//********************************************************************************************************************** +ReadSeqsCommand::ReadSeqsCommand(){ + try { + globaldata = GlobalData::getInstance(); + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReadOtuCommand class Function ReadOtuCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReadOtuCommand class function ReadOtuCommand. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +//********************************************************************************************************************** + +ReadSeqsCommand::~ReadSeqsCommand(){ + //delete readFasta->getDB(); +// delete readNexus->getDB(); +// delete readClustal->getDB(); +// delete readPhylip->getDB(); +} + +//********************************************************************************************************************** + +int ReadSeqsCommand::execute(){ + try { + filebuf fb; + + //fb.open ("fasta.txt",ios::out); +// readFasta->read(); +// SequenceDB* db = readFasta->getDB(); + + //fb.open("nexus.txt",ios::out); +// readNexus->read(); +// SequenceDB* db = readNexus->getDB(); + + //fb.open("clustal.txt",ios::out); +// readClustal->read(); +// SequenceDB* db = readClustal->getDB(); + + //fb.open("phylip.txt",ios::out); +// readPhylip->read(); +// SequenceDB* db = readPhylip->getDB(); + + + + //for(int i = 0; i < db->size(); i++) { +// cout << db->get(i).getLength() << "\n" << db->get(i).getName() << ": " << db->get(i).getUnaligned() << "\n\n"; +// } + + //ostream os(&fb); +// db->print(os); +// fb.close(); + + return 0; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReadOtuCommand class Function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReadOtuCommand class function execute. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +//********************************************************************************************************************** diff --git a/readseqscommand.h b/readseqscommand.h new file mode 100644 index 0000000..660eb3f --- /dev/null +++ b/readseqscommand.h @@ -0,0 +1,55 @@ +#ifndef READSEQSCOMMAND_H +#define READSEQSCOMMAND_H +/* + * readseqscommand.h + * Mothur + * + * Created by Thomas Ryabin on 4/13/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "command.hpp" +#include "readfasta.h" +#include "readnexus.h" +#include "readclustal.h" +#include "readseqsphylip.h" +#include "inputdata.h" +#include "groupmap.h" +#include "sharedcommand.h" +#include "parselistcommand.h" + +/* The read.otu must be run before you execute a collect.single, rarefaction.single, summary.single, +collect.shared, rarefaction.shared or summary.shared command. Mothur will generate a .list, .rabund and .sabund +upon completion of the cluster command or you may use your own. The read.otu command parameter options are +listfile, rabundfile, sabundfile, groupfile and orderfile. The reaad.otu command can be used in two ways. +The first is to read a listfile, rabundfile or sabundfile and run the collect.single, rarefaction.single or summary.single. +For this use the read.otu command should be in the following format: read.otu(listfile=yourListFile, orderfile=yourOrderFile). +The listfile, rabundfile or sabundfile parameter is required, but you may only use one of them. +The second way to use the read.otu command is to read a listfile and a groupfile so you can use the collect.shared, +rarefaction.shared or summary.shared commands. In this case the read.otu command should be in the following format: +read.otu(listfile=yourListFile, groupfile=yourGroupFile). The listfile parameter and groupfile paramaters are required. +When using the command the second way read.otu command parses the .list file and separates it into groups. +It outputs a .shared file containing the OTU information for each group. The read.otu command also outputs a .list file for each group. */ + +class GlobalData; + +class ReadSeqsCommand : public Command { +public: + ReadSeqsCommand(); + ~ReadSeqsCommand(); + int execute(); + +private: + GlobalData* globaldata; + ReadFasta* readFasta; + ReadNexus* readNexus; + ReadClustal* readClustal; + ReadPhylip* readPhylip; + InputData* input; + Command* shared; + Command* parselist; + string filename; +}; + +#endif diff --git a/readseqsphylip.cpp b/readseqsphylip.cpp new file mode 100644 index 0000000..b6be40e --- /dev/null +++ b/readseqsphylip.cpp @@ -0,0 +1,128 @@ +/* + * readphylip.cpp + * Mothur + * + * Created by Thomas Ryabin on 4/24/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "readseqsphylip.h" +#include +#include + +/*******************************************************************************/ +bool ReadPhylip::isSeq(string seq) { + string validChars[] = {"A","G","C","T","U","N","-"}; + + for(int i = 0; i < seq.length(); i++) { + bool valid = false; + string c = seq.substr(i,1); + for(int k = 0; k < 7; k++) + if(c.compare(validChars[k]) == 0) { + valid = true; + k = 7; + } + if(!valid) + return false; + } + + return true; +} + +/*******************************************************************************/ +ReadPhylip::ReadPhylip(string file) { + try { + openInputFile(file, filehandle); + phylipFile = file; + globaldata = GlobalData::getInstance(); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the ReadTree class Function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the ReadTree class function ReadTree. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/*******************************************************************************/ +ReadPhylip::~ReadPhylip(){ +// for(int i = 0; i < sequencedb.getNumSeqs(); i++) +// delete sequencedb.get(i); +} +/*******************************************************************************/ +void ReadPhylip::read() { + string temp; + string name; + string sequence; + + int count = 0; + int letterCount = 0; + int numCols = 0; + filehandle >> temp; + int numSeqs = atoi(temp.c_str()); + filehandle >> temp; + int numLetters = atoi(temp.c_str()); + + bool firstDone = false; + bool last = false; + filehandle >> name; + + while(!filehandle.eof()) { + if(!firstDone) { + sequence = ""; + if(count == 0) { + filehandle >> temp; + while(isSeq(temp)) { + sequence += temp; + numCols++; + filehandle >> temp; + } + letterCount += sequence.length(); + } + else { + for(int i = 0; i < numCols; i++) { + filehandle >> temp; + sequence += temp; + } + if(count < numSeqs-1) + filehandle >> temp; + } + Sequence newSeq(name, sequence); + sequencedb.add(newSeq); + if(count < numSeqs-1) + name = temp; + } + else { + sequence = ""; + for(int i = 0; i < numCols; i++) { + filehandle >> temp; + sequence += temp; + if(count == 0) + letterCount += temp.length(); + if(letterCount == numLetters && count == 0) { + numCols = i + 1; + i = numCols; + } + } + if(!(last && count == 0)) + sequencedb.set(count, sequencedb.get(count).getAligned() + sequence); + if(letterCount == numLetters && count == 0) + last = true; + } + + count++; + + if(count == numSeqs) { + firstDone = true; + count = 0; + } + } + filehandle.close(); +} + +/*********************************************************************************/ +SequenceDB* ReadPhylip::getDB() { + return &sequencedb; +} diff --git a/readseqsphylip.h b/readseqsphylip.h new file mode 100644 index 0000000..c08c977 --- /dev/null +++ b/readseqsphylip.h @@ -0,0 +1,38 @@ +#ifndef READPHYLIP_H +#define READPHYLIP_H + +/* + * readphylip.h + * Mothur + * + * Created by Thomas Ryabin on 4/24/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ +using namespace std; + +#include "globaldata.hpp" +#include "sequencedb.h" +#include "mothur.h" + +/**********************************************************************************/ + +class ReadPhylip { + + public: + ReadPhylip(string); + ~ReadPhylip(); + void read(); + SequenceDB* getDB(); + + private: + GlobalData* globaldata; + string phylipFile; + ifstream filehandle; + SequenceDB sequencedb; + int readOk; // readOk = 0 means success, readOk = 1 means error(s). + + bool isSeq(string); +}; + +#endif \ No newline at end of file diff --git a/sequence.cpp b/sequence.cpp index 7bc8b05..b59363e 100644 --- a/sequence.cpp +++ b/sequence.cpp @@ -11,50 +11,29 @@ using namespace std; #include "sequence.hpp" -//******************************************************************************************************************** - -Sequence::Sequence(){} +/***********************************************************************/ -//******************************************************************************************************************** - -Sequence::Sequence(ifstream& fastaFile){ - - string accession; - fastaFile >> accession; - setName(accession); +Sequence::Sequence() {} - char letter; - string sequence; - - while(fastaFile && letter != '>'){ - - letter = fastaFile.get(); - - if(isalpha(letter)){ - - sequence += letter; - - } - - } - fastaFile.putback(letter); +/***********************************************************************/ - if(sequence.find_first_of('-') != string::npos){ +Sequence::Sequence(string newName, string sequence) { + name = newName; + if(sequence.find_first_of('-') != string::npos) { setAligned(sequence); } setUnaligned(sequence); } - //******************************************************************************************************************** -string Sequence::convert2ints(){ +string Sequence::convert2ints() { if(unaligned == "") { /* need to throw an error */ } string processed; - for(int i=0;i') { name = seqName.substr(1); } else { name = seqName; } } @@ -76,14 +55,14 @@ void Sequence::setName(string seqName){ void Sequence::setUnaligned(string sequence){ - if(sequence.find_first_of('-') != string::npos){ + if(sequence.find_first_of('-') != string::npos) { string temp = ""; - for(int j=0;j aligned.length()) + return unaligned.length(); + return aligned.length(); +} + +//******************************************************************************************************************** + +void Sequence::printSequence(ostream& out){ + string toPrint = unaligned; + if(aligned.length() > unaligned.length()) + toPrint = aligned; + out << ">" << name << "\n" << toPrint << "\n"; +} + +//******************************************************************************************************************** diff --git a/sequence.hpp b/sequence.hpp index e094fb5..03cbab7 100644 --- a/sequence.hpp +++ b/sequence.hpp @@ -19,21 +19,29 @@ using namespace std; class Sequence { public: Sequence(); - Sequence(ifstream&); + Sequence(string, string); + void setName(string); void setUnaligned(string); void setPairwise(string); void setAligned(string); + void setLength(); + string convert2ints(); - string getSeqName(); + string getName(); string getAligned(); string getPairwise(); string getUnaligned(); + int getLength(); + void printSequence(ostream&); + private: string name; string unaligned; - string pairwise; string aligned; + string pairwise; + int length; + int lengthAligned; }; /**************************************************************************************************/ diff --git a/sequencedb.cpp b/sequencedb.cpp new file mode 100644 index 0000000..e8aade7 --- /dev/null +++ b/sequencedb.cpp @@ -0,0 +1,94 @@ +/* + * sequencedb.cpp + * Mothur + * + * Created by Thomas Ryabin on 4/13/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "sequencedb.h" +#include "sequence.hpp" +#include "mothur.h" +#include "calculator.h" + + +/***********************************************************************/ + +SequenceDB::SequenceDB() {} + +/***********************************************************************/ + +SequenceDB::SequenceDB(int newSize) { + data.resize(newSize); +} + +/***********************************************************************/ + +SequenceDB::SequenceDB(ifstream&) {} + +/***********************************************************************/ + +int SequenceDB::getNumSeqs() { + return data.size(); +} + +/***********************************************************************/ + +void SequenceDB::set(int index, string newUnaligned) { + Sequence newSeq(data[index].getName(), newUnaligned); + data[index] = newSeq; +} + +/***********************************************************************/ + +void SequenceDB::set(int index, Sequence newSeq) { + data[index] = newSeq; +} + +/***********************************************************************/ + +Sequence SequenceDB::get(int index) { + return data[index]; +} + +/***********************************************************************/ + +void SequenceDB::changeSize(int newSize) { + data.resize(newSize); +} + +/***********************************************************************/ + +void SequenceDB::clear() { + data.clear(); +} + +/***********************************************************************/ + +int SequenceDB::size() { + return data.size(); +} + +/***********************************************************************/ + +void SequenceDB::print(ostream& out) { + for(int i = 0; i < data.size(); i++) + data[i].printSequence(out); +} + +/***********************************************************************/ + +void SequenceDB::add(Sequence newSequence) { + try { + data.push_back(newSequence); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the RAbundVector class Function push_back. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the RAbundVector class function push_back. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} \ No newline at end of file diff --git a/sequencedb.h b/sequencedb.h new file mode 100644 index 0000000..f31fc32 --- /dev/null +++ b/sequencedb.h @@ -0,0 +1,46 @@ +#ifndef SEQUENCEDB_H +#define SEQUENCEDB_H + +/* + * sequencedb.h + * Mothur + * + * Created by Thomas Ryabin on 4/13/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + + +using namespace std; + +#include "sequence.hpp" +#include "calculator.h" + + + + +class SequenceDB { + +public: + SequenceDB(); + SequenceDB(int); //makes data that size + SequenceDB(ifstream&); //reads file to fill data +// ~SequenceDB(); //loops through data and delete each sequence + + int getNumSeqs(); + + void set(int, string); //unaligned - should also set length + void set(int, Sequence); //unaligned - should also set length + Sequence get(int); //returns sequence name at that location + void add(Sequence); //adds unaligned sequence + void changeSize(int); //resizes data + void clear(); //clears data - remeber to loop through and delete the sequences inside or you will have a memory leak + int size(); //returns datas size + void print(ostream&); //loops through data using sequence class print + +private: + vector data; + +}; + +#endif diff --git a/sharedjackknife.cpp b/sharedjackknife.cpp new file mode 100644 index 0000000..c267a07 --- /dev/null +++ b/sharedjackknife.cpp @@ -0,0 +1,155 @@ +/* + * sharedjackknife.cpp + * Mothur + * + * Created by Thomas Ryabin on 3/30/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "sharedjackknife.h" + +/*************************************************************************************** + +***************************************************************************************/ +double SharedJackknife::simpson(int abunds[], double numInd, int numBins){ + double denom = numInd*(numInd-1); + double sum = 0; + for(int i = 0; i < numBins; i++) + sum += (double)abunds[i]*((double)abunds[i]-1)/denom; + + return sum; +} + +/*****************************************************************************************/ + +double* SharedJackknife::jackknife(){ + int numBins = groups.at(0)->getNumBins()-1; + int cArray[numBins]; + for(int i = 0; i < numBins; i++) + cArray[i] = 0; + + double numInd = 0; + for(int i = 0; i < numGroups; i++) + for(int j = 0; j < numBins; j++) { + int curAbund = groups.at(i)->get(j+1).abundance; + cArray[j] += curAbund; + numInd += (double)curAbund; + } + + double baseD = 1/simpson(cArray, numInd, numBins); + + double pseudoVals[numBins]; + double jackknifeEstimate = 0; + for(int i = 0; i < numGroups; i++) { + for(int j = 0; j < numBins-1; j++) { + int abundDiff = -groups.at(i)->get(j+1).abundance; + if(i > 0) + abundDiff += groups.at(i-1)->get(j+1).abundance; + + cArray[j] += abundDiff; + numInd += abundDiff; + } + + double curD = 1/simpson(cArray, numInd, numBins); + pseudoVals[i] = (double)numGroups*(baseD - curD) + curD; + jackknifeEstimate += pseudoVals[i]; + } + jackknifeEstimate /= (double)numGroups; + + double variance = 0; + for(int i = 0; i < numGroups; i++) + variance += pow(pseudoVals[i]-jackknifeEstimate, 2); + + variance /= (double)numGroups*((double)numGroups-1); + double stErr = sqrt(variance); + TDTable table; + double confLimit = 0; + if(numGroups <= 30) + confLimit = table.getConfLimit(numGroups-1, 1); + else + confLimit = 1.645; + + confLimit *= stErr; + + double* rdata = new double[3]; + rdata[0] = baseD; + rdata[1] = jackknifeEstimate - confLimit; + rdata[2] = jackknifeEstimate + confLimit; + + return rdata; +} + +/************************************************************************************************ +************************************************************************************************/ + +EstOutput SharedJackknife::getValues(vector vectorShared){ //Fix this for collect, mistake was that it was made with summary in mind. + try { + SharedRAbundVector* shared1 = vectorShared[0]; + SharedRAbundVector* shared2 = vectorShared[1]; + if(numGroups == -1) { + globaldata = GlobalData::getInstance(); + numGroups = globaldata->Groups.size(); + } + + if(callCount == numGroups*(numGroups-1)/2) { + currentCallDone = true; + callCount = 0; + } + callCount++; + + if(currentCallDone) { + groups.clear(); + currentCallDone = false; + } + + if(groups.size() != numGroups) { + if(groups.size() == 0) + groups.push_back(shared1); + groups.push_back(shared2); + } + + if(groups.size() == numGroups && callCount < numGroups) { + data.resize(3,0); + + double* rdata = jackknife(); + data[0] = rdata[0]; + data[1] = rdata[1]; + data[2] = rdata[2]; + + //cout << "sT = " << data[0] << " lower confLimit = " << data[1] << " upper confLimit = " << data[2] << "\n"; + if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } + if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } + if (isnan(data[2]) || isinf(data[0])) { data[2] = 0; } + + /*for(int i = 0; i < groups.size(); i++) + cout << groups.at(i)->getGroup() << " "; + cout << "\n"; + cout << groups.size() << " " << data[0] << " " << data[1] << " " << data[2] << "\n\n";*/ + + return data; + } + + data.resize(3,0); + data[0] = 0; + data[1] = 0; + data[2] = 0; + + if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } + if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } + if (isnan(data[2]) || isinf(data[2])) { data[2] = 0; } + return data; + } + + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedJackknife class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedJackknife class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************************/ + diff --git a/sharedjackknife.h b/sharedjackknife.h new file mode 100644 index 0000000..00c0de0 --- /dev/null +++ b/sharedjackknife.h @@ -0,0 +1,40 @@ +#ifndef SHAREDJACKKNIFE_H +#define SHAREDJACKKNIFE_H + +/* + * sharedjackknife.h + * Mothur + * + * Created by Thomas Ryabin on 3/30/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "calculator.h" +#include "globaldata.hpp" + +/*This class implements the SharedJackknife estimator. +It is a child of the calculator class.*/ + +/***********************************************************************/ + +class SharedJackknife : public Calculator { + +public: + SharedJackknife() : numGroups(-1), callCount(0), count(0), currentCallDone(true), Calculator("sharedjackknife", 3, false) {}; + EstOutput getValues(SAbundVector*) {return data;}; + EstOutput getValues(vector); + +private: + GlobalData* globaldata; + int numGroups, callCount, count; + bool currentCallDone; + vector groups; + double simpson(int[], double, int); + double* jackknife(); +}; + +/***********************************************************************/ + +#endif + diff --git a/sharedkstest.cpp b/sharedkstest.cpp index fefe14f..f65f271 100644 --- a/sharedkstest.cpp +++ b/sharedkstest.cpp @@ -13,8 +13,8 @@ EstOutput KSTest::getValues(vector shared){ try { - data.resize(2,0); - + data.resize(3,0); + //Must return shared1 and shared2 to original order at conclusion of kstest vector initData1 = shared[0]->getData(); vector initData2 = shared[1]->getData(); @@ -59,6 +59,8 @@ EstOutput KSTest::getValues(vector shared){ data[0] = DStatistic; data[1] = critVal; + data[2] = 0; + if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } if (isnan(data[1]) || isinf(data[1])) { data[1] = 0; } diff --git a/sharedmarczewski.cpp b/sharedmarczewski.cpp new file mode 100644 index 0000000..be1a2fb --- /dev/null +++ b/sharedmarczewski.cpp @@ -0,0 +1,50 @@ +/* + * sharedmarczewski.cpp + * Mothur + * + * Created by Thomas Ryabin on 4/8/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "sharedmarczewski.h" + +EstOutput SharedMarczewski::getValues(vector vectorShared){ + try { + SharedRAbundVector* shared1 = vectorShared[0]; + SharedRAbundVector* shared2 = vectorShared[1]; + + data.resize(1,0); + + double a = 0; + double b = 0; + double c = 0; + for(int i = 1; i < shared1->size(); i++) + { + int abund1 = shared1->get(i).abundance; + int abund2 = shared2->get(i).abundance; + + if(abund1 > 0 && abund2 > 0) + a++; + else if(abund1 > 0 && abund2 == 0) + b++; + else if(abund1 == 0 && abund2 > 0) + c++; + } + data[0] = (b+c)/(a+b+c); + + if (isnan(data[0]) || isinf(data[0])) { data[0] = 0; } + + return data; + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the SharedMarczewski class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the SharedMarczewski class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + +/***********************************************************************/ diff --git a/sharedmarczewski.h b/sharedmarczewski.h new file mode 100644 index 0000000..55bc7e4 --- /dev/null +++ b/sharedmarczewski.h @@ -0,0 +1,28 @@ +#ifndef SHAREDMARCZEWSKI_H +#define SHAREDMARCZEWSKI_H + +/* + * sharedmarczewski.h + * Mothur + * + * Created by Thomas Ryabin on 4/8/09. + * Copyright 2009 __MyCompanyName__. All rights reserved. + * + */ + +#include "calculator.h" + +/***********************************************************************/ + +class SharedMarczewski : public Calculator { + +public: + SharedMarczewski() : Calculator("sharedmarczewski", 1, false) {}; + EstOutput getValues(SAbundVector*) {return data;}; + EstOutput getValues(vector); +private: +}; + +/***********************************************************************/ + +#endif \ No newline at end of file diff --git a/summarycommand.cpp b/summarycommand.cpp index 39972ce..bd8deb1 100644 --- a/summarycommand.cpp +++ b/summarycommand.cpp @@ -22,6 +22,7 @@ #include "qstat.h" #include "bergerparker.h" #include "bstick.h" +#include "goodscoverage.h" #include "coverage.h" //********************************************************************************************************************** @@ -67,6 +68,8 @@ SummaryCommand::SummaryCommand(){ sumCalculators.push_back(new Bootstrap()); }else if (globaldata->Estimators[i] == "nseqs") { sumCalculators.push_back(new NSeqs()); + }else if (globaldata->Estimators[i] == "goodscoverage") { + sumCalculators.push_back(new GoodsCoverage()); } } } diff --git a/summarysharedcommand.cpp b/summarysharedcommand.cpp index b3e6650..a4a345e 100644 --- a/summarysharedcommand.cpp +++ b/summarysharedcommand.cpp @@ -29,6 +29,8 @@ #include "sharedlennon.h" #include "sharedmorisitahorn.h" #include "sharedbraycurtis.h" +#include "sharedjackknife.h" +#include "sharedwhittaker.h" //********************************************************************************************************************** diff --git a/validcalculator.cpp b/validcalculator.cpp index 70299cb..f6fe280 100644 --- a/validcalculator.cpp +++ b/validcalculator.cpp @@ -186,9 +186,9 @@ void ValidCalculators::initialSingle() { single["logseries"] = "logseries"; single["qstat"] = "qstat"; single["bstick"] = "bstick"; + single["goodscoverage"] = "goodscoverage"; single["nseqs"] = "nseqs"; single["coverage"] = "coverage"; - single["default"] = "default"; } catch(exception& e) { @@ -280,6 +280,7 @@ void ValidCalculators::initialSummary() { summary["qstat"] = "qstat"; summary["bstick"] = "bstick"; summary["nseqs"] = "nseqs"; + summary["goodscoverage"]= "goodscoverage"; summary["coverage"] = "coverage"; summary["default"] = "default"; } diff --git a/validcommands.cpp b/validcommands.cpp index 94845c5..4c72dce 100644 --- a/validcommands.cpp +++ b/validcommands.cpp @@ -17,6 +17,7 @@ ValidCommands::ValidCommands() { commands["read.dist"] = "read.dist"; commands["read.otu"] = "read.otu"; commands["read.tree"] = "read.tree"; + commands["read.seqs"] = "read.seqs"; commands["bin.seqs"] = "bin.seqs"; commands["get.oturep"] = "get.oturep"; commands["cluster"] = "cluster"; @@ -40,6 +41,7 @@ ValidCommands::ValidCommands() { commands["bootstrap.shared"] = "bootstrap.shared"; commands["concensus"] = "concensus"; commands["help"] = "help"; + commands["filter.seqs"] = "filter.seqs"; commands["quit"] = "quit"; diff --git a/validparameter.cpp b/validparameter.cpp index 4234d2d..2d070cb 100644 --- a/validparameter.cpp +++ b/validparameter.cpp @@ -37,16 +37,13 @@ bool ValidParameters::isValidParameter(string parameter, string command, string bool valid = false; vector cParams = commandParameters[command]; int numParams = cParams.size(); - for(int i = 0; i < numParams; i++) - { - if(cParams.at(i).compare(parameter) == 0) - { + for(int i = 0; i < numParams; i++) { + if(cParams.at(i).compare(parameter) == 0) { valid = true; i = numParams; } } - if(!valid) - { + if(!valid) { cout << "'" << parameter << "' is not a valid parameter for the " << command << " command.\n"; cout << "The valid paramters for the " << command << " command are: "; for(int i = 0; i < numParams-1; i++) @@ -65,8 +62,7 @@ bool ValidParameters::isValidParameter(string parameter, string command, string vector values; splitAtDash(value, values); - for(int i = 0; i < values.size(); i++) - { + for(int i = 0; i < values.size(); i++) { value = values.at(i); valid = convertTest(value, pVal); @@ -79,12 +75,10 @@ bool ValidParameters::isValidParameter(string parameter, string command, string Special Cases *********************************************************************************************************/ - if(parameter.compare("precision") == 0) - { + if(parameter.compare("precision") == 0) { double logNum = log10((double)pVal); double diff = (double)((int)logNum - logNum); - if(diff != 0) - { + if(diff != 0) { cout << "The precision parameter can only take powers of 10 as a value (e.g. 10,1000,1000, etc.)\n"; return false; } @@ -110,8 +104,7 @@ bool ValidParameters::isValidParameter(string parameter, string command, string c = 0; else if(range.at(4).compare("only") == 0) c = 1; - else - { + else { cout << "The range can only be 'between' or 'only' the bounding numbers.\n"; return false; } @@ -120,8 +113,7 @@ bool ValidParameters::isValidParameter(string parameter, string command, string d = 0; else if(range.at(0).compare(">=") == 0 || range[3].compare("=>") == 0) d = 1; - else - { + else { cout << "The parameter value can only be '>', '>=', or '=>' the lower bounding number.\n"; return false; } @@ -130,8 +122,7 @@ bool ValidParameters::isValidParameter(string parameter, string command, string e = 0; else if(range.at(2).compare("<=") == 0 || range[4].compare("=<") == 0) e = 1; - else - { + else { cout << "The parameter value can only be '<', '<=', or '=<' the upper bounding number.\n"; return false; } @@ -141,24 +132,20 @@ bool ValidParameters::isValidParameter(string parameter, string command, string bool b0 = pVal < b; bool b1 = pVal <= b; - if(c != 1) - { - if(a != piSentinel && b == piSentinel) - { + if(c != 1) { + if(a != piSentinel && b == piSentinel) { if(d == 0) valid = a0; else valid = a1; } - else if(a == piSentinel && b != piSentinel) - { + else if(a == piSentinel && b != piSentinel) { if(e == 0) valid = b0; else valid = b1; } - else - { + else { if(d == 0 && e == 0) valid = (a0 && b0); else if(d == 0 && e == 1) @@ -169,8 +156,7 @@ bool ValidParameters::isValidParameter(string parameter, string command, string valid = (a1 && b1); } } - else - { + else { if(a != piSentinel && b == piSentinel) valid = (pVal == a); else if(a == piSentinel && b != piSentinel) @@ -180,30 +166,26 @@ bool ValidParameters::isValidParameter(string parameter, string command, string } - if(!valid) - { + if(!valid) { cout << "The '" << parameter << "' parameter needs to be "; if(c == 1) cout << "either '" << a << "' or '" << b << "'.\n"; - else - { - if(a != piSentinel) - { + else { + if(a != piSentinel) { cout << ">"; if(d != 0) cout << "="; cout << " '" << a << "'"; } if(b == piSentinel) - cout << ".\n"; + cout << "'.\n"; else if(a != piSentinel) cout << " and "; - if(b != piSentinel) - { + if(b != piSentinel) { cout << "<"; if(e != 0) cout << "="; - cout << " '" << b << ".\n"; + cout << " '" << b << "'.\n"; } } return false; @@ -238,6 +220,9 @@ void ValidParameters::initCommandParameters() { string readtreeArray[] = {"tree","group"}; commandParameters["read.tree"] = addParameters(readtreeArray, sizeof(readtreeArray)/sizeof(string)); + string readseqsArray[] = {"fasta","phylip","clustal","nexus","line"}; + commandParameters["read.seqs"] = addParameters(readseqsArray, sizeof(readseqsArray)/sizeof(string)); + string clusterArray[] = {"cutoff","precision","method"}; commandParameters["cluster"] = addParameters(clusterArray, sizeof(clusterArray)/sizeof(string)); @@ -285,6 +270,9 @@ void ValidParameters::initCommandParameters() { string heatmapArray[] = {"groups","line","label","sorted","scale"}; commandParameters["heatmap"] = addParameters(heatmapArray, sizeof(heatmapArray)/sizeof(string)); + + string filterseqsArray[] = {"trump", "soft", "filter"}; + commandParameters["filter.seqs"] = addParameters(filterseqsArray, sizeof(filterseqsArray)/sizeof(string)); string vennArray[] = {"groups","line","label","calc"}; commandParameters["venn"] = addParameters(vennArray, sizeof(vennArray)/sizeof(string)); @@ -345,7 +333,7 @@ void ValidParameters::initParameterRanges() { string jumbleArray[] = {">","0", "<","1", "only"}; parameterRanges["jumble"] = addParameters(jumbleArray, rangeSize); - string freqArray[] = {">","1", "<","NA", "between"}; + string freqArray[] = {">=","1", "<","NA", "between"}; parameterRanges["freq"] = addParameters(freqArray, rangeSize); //string lineArray[] = {">=","1", "<","NA", "between"}; @@ -353,6 +341,9 @@ void ValidParameters::initParameterRanges() { string abundArray[] = {">=","5", "<","NA", "between"}; parameterRanges["abund"] = addParameters(abundArray, rangeSize); + + string softArray[] = {">=","0", "<=","100", "between"}; + parameterRanges["soft"] = addParameters(softArray, rangeSize); } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the ValidParameters class Function isValidParameter. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; -- 2.39.2