A77410F614697C300098E6AC /* seqnoise.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77410F414697C300098E6AC /* seqnoise.cpp */; };
A778FE6B134CA6CA00C0BA33 /* getcommandinfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */; };
A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */; };
+ A77B7185173D2240002163C2 /* sparcccommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7184173D2240002163C2 /* sparcccommand.cpp */; };
+ A77B7188173D4042002163C2 /* randomnumber.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7186173D4041002163C2 /* randomnumber.cpp */; };
+ A77B718B173D40E5002163C2 /* calcsparcc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7189173D40E4002163C2 /* calcsparcc.cpp */; };
A77E1938161B201E00DB1A2A /* randomforest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77E1937161B201E00DB1A2A /* randomforest.cpp */; };
A77E193B161B289600DB1A2A /* rftreenode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77E193A161B289600DB1A2A /* rftreenode.cpp */; };
A77EBD2F1523709100ED407C /* createdatabasecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77EBD2E1523709100ED407C /* createdatabasecommand.cpp */; };
A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getcommandinfocommand.cpp; sourceTree = "<group>"; };
A77A221D139001B600B0BE70 /* deuniquetreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deuniquetreecommand.h; sourceTree = "<group>"; };
A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deuniquetreecommand.cpp; sourceTree = "<group>"; };
+ A77B7183173D222F002163C2 /* sparcccommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sparcccommand.h; sourceTree = "<group>"; };
+ A77B7184173D2240002163C2 /* sparcccommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sparcccommand.cpp; sourceTree = "<group>"; };
+ A77B7186173D4041002163C2 /* randomnumber.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = randomnumber.cpp; sourceTree = "<group>"; };
+ A77B7187173D4041002163C2 /* randomnumber.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = randomnumber.h; sourceTree = "<group>"; };
+ A77B7189173D40E4002163C2 /* calcsparcc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = calcsparcc.cpp; sourceTree = "<group>"; };
+ A77B718A173D40E4002163C2 /* calcsparcc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = calcsparcc.h; sourceTree = "<group>"; };
A77E1937161B201E00DB1A2A /* randomforest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = randomforest.cpp; sourceTree = "<group>"; };
A77E193A161B289600DB1A2A /* rftreenode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rftreenode.cpp; sourceTree = "<group>"; };
A77EBD2C1523707F00ED407C /* createdatabasecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = createdatabasecommand.h; sourceTree = "<group>"; };
A7A61F1A130035C800E05B6B /* LICENSE */,
A70332B512D3A13400761E33 /* makefile */,
A7E9B65912D37EC300DA6239 /* averagelinkage.cpp */,
+ A77B718A173D40E4002163C2 /* calcsparcc.h */,
+ A77B7189173D40E4002163C2 /* calcsparcc.cpp */,
A7E9BA4F12D398D700DA6239 /* clearcut */,
A7E9B69812D37EC400DA6239 /* cluster.cpp */,
A7E9B69912D37EC400DA6239 /* cluster.hpp */,
A7E9B79C12D37EC400DA6239 /* progress.hpp */,
A7548FAF171440ED00B1F05A /* qFinderDMM.h */,
A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */,
+ A77B7187173D4041002163C2 /* randomnumber.h */,
+ A77B7186173D4041002163C2 /* randomnumber.cpp */,
A7E9B7A512D37EC400DA6239 /* rarecalc.cpp */,
A7386C191619C9FB00651424 /* randomforest */,
A7E9B7A612D37EC400DA6239 /* rarecalc.h */,
A774101314695AF60098E6AC /* shhhseqscommand.cpp */,
A7A32DAC14DC43D10001D2E5 /* sortseqscommand.h */,
A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */,
+ A77B7183173D222F002163C2 /* sparcccommand.h */,
+ A77B7184173D2240002163C2 /* sparcccommand.cpp */,
A7E9B84012D37EC400DA6239 /* splitabundcommand.h */,
A7E9B83F12D37EC400DA6239 /* splitabundcommand.cpp */,
A7E9B84212D37EC400DA6239 /* splitgroupscommand.h */,
A799314B16CBD0CD0017E888 /* mergetaxsummarycommand.cpp in Sources */,
A7548FAD17142EBC00B1F05A /* getmetacommunitycommand.cpp in Sources */,
A7548FB0171440ED00B1F05A /* qFinderDMM.cpp in Sources */,
+ A77B7185173D2240002163C2 /* sparcccommand.cpp in Sources */,
+ A77B7188173D4042002163C2 /* randomnumber.cpp in Sources */,
+ A77B718B173D40E5002163C2 /* calcsparcc.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
--- /dev/null
+//
+// runSparcc.cpp
+// PDSSparCC
+//
+// Created by Patrick Schloss on 10/31/12.
+// Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+#include "calcSparcc.h"
+#include "linearalgebra.h"
+
+/**************************************************************************************************/
+
+CalcSparcc::CalcSparcc(vector<vector<float> > sharedVector, int maxIterations, int numSamplings, string method){
+ try {
+ m = MothurOut::getInstance();
+ numOTUs = (int)sharedVector[0].size();
+ numGroups = (int)sharedVector.size();
+ normalizationMethod = method;
+ int numOTUs = (int)sharedVector[0].size();
+
+ addPseudoCount(sharedVector);
+
+ vector<vector<vector<float> > > allCorrelations(numSamplings);
+
+ // float cycClockStart = clock();
+ // unsigned long long cycTimeStart = time(NULL);
+
+ for(int i=0;i<numSamplings;i++){
+ if (m->control_pressed) { break; }
+ vector<float> logFractions = getLogFractions(sharedVector, method);
+ getT_Matrix(logFractions); //this step is slow...
+ getT_Vector();
+ getD_Matrix();
+ vector<float> basisVariances = getBasisVariances(); //this step is slow...
+ vector<vector<float> > correlation = getBasisCorrelations(basisVariances);
+
+ excluded.resize(numOTUs);
+ for(int j=0;j<numOTUs;j++){ excluded[j].assign(numOTUs, 0); }
+
+ float maxRho = 1;
+ int excludeRow = -1;
+ int excludeColumn = -1;
+
+ int iter = 0;
+ while(maxRho > 0.10 && iter < maxIterations){
+ maxRho = getExcludedPairs(correlation, excludeRow, excludeColumn);
+ excludeValues(excludeRow, excludeColumn);
+ vector<float> excludedBasisVariances = getBasisVariances();
+ correlation = getBasisCorrelations(excludedBasisVariances);
+ iter++;
+ }
+ allCorrelations[i] = correlation;
+ }
+
+ if (!m->control_pressed) {
+ if(numSamplings > 1){
+ getMedian(allCorrelations);
+ }
+ else{
+ median = allCorrelations[0];
+ }
+ }
+// cout << median[0][3] << '\t' << median[0][6] << endl;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "CalcSparcc");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::addPseudoCount(vector<vector<float> >& sharedVector){
+ try {
+ for(int i=0;i<numGroups;i++){ //iterate across the groups
+ if (m->control_pressed) { return; }
+ for(int j=0;j<numOTUs;j++){
+ sharedVector[i][j] += 1;
+ }
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "addPseudoCount");
+ exit(1);
+ }
+
+}
+
+/**************************************************************************************************/
+
+vector<float> CalcSparcc::getLogFractions(vector<vector<float> > sharedVector, string method){ //dirichlet by default
+ try {
+ vector<float> logSharedFractions(numGroups * numOTUs, 0);
+
+ if(method == "dirichlet"){
+ vector<float> alphas(numGroups);
+ for(int i=0;i<numGroups;i++){ //iterate across the groups
+ if (m->control_pressed) { return logSharedFractions; }
+ alphas = RNG.randomDirichlet(sharedVector[i]);
+
+ for(int j=0;j<numOTUs;j++){
+ logSharedFractions[i * numOTUs + j] = alphas[j];
+ }
+ }
+ }
+ else if(method == "relabund"){
+ for(int i=0;i<numGroups;i++){
+ if (m->control_pressed) { return logSharedFractions; }
+ float total = 0.0;
+ for(int j=0;j<numOTUs;j++){
+ total += sharedVector[i][j];
+ }
+ for(int j=0;j<numOTUs;j++){
+ logSharedFractions[i * numOTUs + j] = sharedVector[i][j]/total;
+ }
+ }
+ }
+
+ for(int i=0;i<logSharedFractions.size();i++){
+ logSharedFractions[i] = log(logSharedFractions[i]);
+ }
+
+ return logSharedFractions;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "addPseudoCount");
+ exit(1);
+ }
+
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::getT_Matrix(vector<float> sharedFractions){
+ try {
+ tMatrix.resize(numOTUs * numOTUs, 0);
+
+ vector<float> diff(numGroups);
+
+ for(int j1=0;j1<numOTUs;j1++){
+ for(int j2=0;j2<j1;j2++){
+ if (m->control_pressed) { return; }
+ float mean = 0.0;
+ for(int i=0;i<numGroups;i++){
+ diff[i] = sharedFractions[i * numOTUs + j1] - sharedFractions[i * numOTUs + j2];
+ mean += diff[i];
+ }
+ mean /= float(numGroups);
+
+ float variance = 0.0;
+ for(int i=0;i<numGroups;i++){
+ variance += (diff[i] - mean) * (diff[i] - mean);
+ }
+ variance /= (float)(numGroups-1);
+
+ tMatrix[j1 * numOTUs + j2] = variance;
+ tMatrix[j2 * numOTUs + j1] = tMatrix[j1 * numOTUs + j2];
+ }
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "getT_Matrix");
+ exit(1);
+ }
+
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::getT_Vector(){
+ try {
+ tVector.assign(numOTUs, 0);
+
+ for(int j1=0;j1<numOTUs;j1++){
+ if (m->control_pressed) { return; }
+ for(int j2=0;j2<numOTUs;j2++){
+ tVector[j1] += tMatrix[j1 * numOTUs + j2];
+ }
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "getT_Vector");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::getD_Matrix(){
+ try {
+ float d = numOTUs - 1.0;
+
+ dMatrix.resize(numOTUs);
+ for(int i=0;i<numOTUs;i++){
+ if (m->control_pressed) { return; }
+ dMatrix[i].resize(numOTUs, 1);
+ dMatrix[i][i] = d;
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "getD_Matrix");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+vector<float> CalcSparcc::getBasisVariances(){
+ try {
+ LinearAlgebra LA;
+
+ vector<float> variances = LA.solveEquations(dMatrix, tVector);
+
+ for(int i=0;i<variances.size();i++){
+ if (m->control_pressed) { return variances; }
+ if(variances[i] < 0){ variances[i] = 1e-4; }
+ }
+
+ return variances;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "getBasisVariances");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+vector<vector<float> > CalcSparcc::getBasisCorrelations(vector<float> basisVariance){
+ try {
+ vector<vector<float> > rho(numOTUs);
+ for(int i=0;i<numOTUs;i++){ rho[i].resize(numOTUs, 0); }
+
+ for(int i=0;i<numOTUs;i++){
+ float var_i = basisVariance[i];
+ float sqrt_var_i = sqrt(var_i);
+
+ rho[i][i] = 1.00;
+
+ for(int j=0;j<i;j++){
+ if (m->control_pressed) { return rho; }
+ float var_j = basisVariance[j];
+
+ rho[i][j] = (var_i + var_j - tMatrix[i * numOTUs + j]) / (2.0 * sqrt_var_i * sqrt(var_j));
+ if(rho[i][j] > 1.0) { rho[i][j] = 1.0; }
+ else if(rho[i][j] < -1.0) { rho[i][j] = -1.0; }
+
+ rho[j][i] = rho[i][j];
+
+ }
+ }
+
+ return rho;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "getBasisCorrelations");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+float CalcSparcc::getExcludedPairs(vector<vector<float> > rho, int& maxRow, int& maxColumn){
+ try {
+ float maxRho = 0;
+ maxRow = -1;
+ maxColumn = -1;
+
+ for(int i=0;i<numOTUs;i++){
+
+ for(int j=0;j<i;j++){
+ if (m->control_pressed) { return maxRho; }
+ float tester = abs(rho[i][j]);
+
+ if(tester > maxRho && excluded[i][j] != 1){
+ maxRho = tester;
+ maxRow = i;
+ maxColumn = j;
+ }
+ }
+
+ }
+
+ return maxRho;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "getExcludedPairs");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::excludeValues(int excludeRow, int excludeColumn){
+ try {
+ tVector[excludeRow] -= tMatrix[excludeRow * numOTUs + excludeColumn];
+ tVector[excludeColumn] -= tMatrix[excludeRow * numOTUs + excludeColumn];
+
+ dMatrix[excludeRow][excludeColumn] = 0;
+ dMatrix[excludeColumn][excludeRow] = 0;
+ dMatrix[excludeRow][excludeRow]--;
+ dMatrix[excludeColumn][excludeColumn]--;
+
+ excluded[excludeRow][excludeColumn] = 1;
+ excluded[excludeColumn][excludeRow] = 1;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "excludeValues");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::getMedian(vector<vector<vector<float> > > allCorrelations){
+ try {
+ int numSamples = (int)allCorrelations.size();
+ median.resize(numOTUs);
+ for(int i=0;i<numOTUs;i++){ median[i].assign(numOTUs, 1); }
+
+ vector<float> hold(numSamples);
+
+ for(int i=0;i<numOTUs;i++){
+ for(int j=0;j<i;j++){
+ if (m->control_pressed) { return; }
+
+ for(int k=0;k<numSamples;k++){
+ hold[k] = allCorrelations[k][i][j];
+ }
+
+ sort(hold.begin(), hold.end());
+ median[i][j] = hold[int(numSamples * 0.5)];
+ median[j][i] = median[i][j];
+ }
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "getMedian");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
--- /dev/null
+
+#ifndef PDSSparCC_runSparcc_h
+#define PDSSparCC_runSparcc_h
+
+//
+// runSparcc.h
+// PDSSparCC
+//
+// Created by Patrick Schloss on 10/31/12.
+// Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+/**************************************************************************************************/
+
+//#include "sparcc.h"
+#include "randomnumber.h"
+#include "mothurout.h"
+
+/**************************************************************************************************/
+
+class CalcSparcc {
+
+public:
+ CalcSparcc(vector<vector<float> >, int, int, string);
+ vector<vector<float> > getRho() { return median; }
+private:
+ MothurOut* m;
+ void addPseudoCount(vector<vector<float> >&);
+ vector<float> getLogFractions(vector<vector<float> >, string);
+ void getT_Matrix(vector<float>);
+
+
+ void getT_Vector();
+ void getD_Matrix();
+ vector<float> getBasisVariances();
+ vector<vector<float> > getBasisCorrelations(vector<float>);
+ float getExcludedPairs(vector<vector<float> >, int&, int&);
+ void excludeValues(int, int);
+ void getMedian(vector<vector<vector<float> > >);
+
+ vector<float> tMatrix;
+
+ vector<vector<float> > dMatrix;
+ vector<float> tVector;
+ vector<vector<int> > excluded;
+ vector<vector<float> > median;
+
+ int numOTUs;
+ int numGroups;
+ string normalizationMethod;
+
+ RandomNumberGenerator RNG;
+};
+
+#endif
+
+/**************************************************************************************************/
#include "removedistscommand.h"
#include "mergetaxsummarycommand.h"
#include "getmetacommunitycommand.h"
+#include "sparcccommand.h"
/*******************************************************/
commands["remove.dists"] = "remove.dists";
commands["merge.taxsummary"] = "merge.taxsummary";
commands["get.metacommunity"] = "get.metacommunity";
+ commands["sparcc"] = "sparcc";
}
else if(commandName == "remove.dists") { command = new RemoveDistsCommand(optionString); }
else if(commandName == "merge.taxsummary") { command = new MergeTaxSummaryCommand(optionString); }
else if(commandName == "get.metacommunity") { command = new GetMetaCommunityCommand(optionString); }
+ else if(commandName == "sparcc") { command = new SparccCommand(optionString); }
else { command = new NoCommand(optionString); }
return command;
else if(commandName == "remove.dists") { pipecommand = new RemoveDistsCommand(optionString); }
else if(commandName == "merge.taxsummary") { pipecommand = new MergeTaxSummaryCommand(optionString); }
else if(commandName == "get.metacommunity") { pipecommand = new GetMetaCommunityCommand(optionString); }
+ else if(commandName == "sparcc") { pipecommand = new SparccCommand(optionString); }
else { pipecommand = new NoCommand(optionString); }
return pipecommand;
else if(commandName == "remove.dists") { shellcommand = new RemoveDistsCommand(); }
else if(commandName == "merge.taxsummary") { shellcommand = new MergeTaxSummaryCommand(); }
else if(commandName == "get.metacommunity") { shellcommand = new GetMetaCommunityCommand(); }
+ else if(commandName == "sparcc") { shellcommand = new SparccCommand(); }
else { shellcommand = new NoCommand(); }
return shellcommand;
exit(1);
}
}
+/*********************************************************************************************************************************/
+vector<float> LinearAlgebra::solveEquations(vector<vector<float> > A, vector<float> b){
+ try {
+ int length = (int)b.size();
+ vector<double> x(length, 0);
+ vector<int> index(length);
+ for(int i=0;i<length;i++){ index[i] = i; }
+ float d;
+
+ ludcmp(A, index, d); if (m->control_pressed) { return b; }
+ lubksb(A, index, b);
+
+ return b;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LinearAlgebra", "solveEquations");
+ exit(1);
+ }
+}
/*********************************************************************************************************************************/
exit(1);
}
}
+/*********************************************************************************************************************************/
+
+void LinearAlgebra::ludcmp(vector<vector<float> >& A, vector<int>& index, float& d){
+ try {
+ double tiny = 1e-20;
+
+ int n = (int)A.size();
+ vector<float> vv(n, 0.0);
+ double temp;
+ int imax;
+
+ d = 1.0;
+
+ for(int i=0;i<n;i++){
+ float big = 0.0;
+ for(int j=0;j<n;j++){ if((temp=fabs(A[i][j])) > big ) big=temp; }
+ if(big==0.0){ m->mothurOut("Singular matrix in routine ludcmp\n"); }
+ vv[i] = 1.0/big;
+ }
+
+ for(int j=0;j<n;j++){
+ if (m->control_pressed) { break; }
+ for(int i=0;i<j;i++){
+ float sum = A[i][j];
+ for(int k=0;k<i;k++){ sum -= A[i][k] * A[k][j]; }
+ A[i][j] = sum;
+ }
+
+ float big = 0.0;
+ for(int i=j;i<n;i++){
+ float sum = A[i][j];
+ for(int k=0;k<j;k++){ sum -= A[i][k] * A[k][j]; }
+ A[i][j] = sum;
+ float dum;
+ if((dum = vv[i] * fabs(sum)) >= big){
+ big = dum;
+ imax = i;
+ }
+ }
+ if(j != imax){
+ for(int k=0;k<n;k++){
+ float dum = A[imax][k];
+ A[imax][k] = A[j][k];
+ A[j][k] = dum;
+ }
+ d = -d;
+ vv[imax] = vv[j];
+ }
+ index[j] = imax;
+
+ if(A[j][j] == 0.0){ A[j][j] = tiny; }
+
+ if(j != n-1){
+ float dum = 1.0/A[j][j];
+ for(int i=j+1;i<n;i++){ A[i][j] *= dum; }
+ }
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LinearAlgebra", "ludcmp");
+ exit(1);
+ }
+}
+
+/*********************************************************************************************************************************/
+
+void LinearAlgebra::lubksb(vector<vector<float> >& A, vector<int>& index, vector<float>& b){
+ try {
+ float total;
+ int n = (int)A.size();
+ int ii = 0;
+
+ for(int i=0;i<n;i++){
+ if (m->control_pressed) { break; }
+ int ip = index[i];
+ total = b[ip];
+ b[ip] = b[i];
+
+ if (ii != 0) {
+ for(int j=ii-1;j<i;j++){
+ total -= A[i][j] * b[j];
+ }
+ }
+ else if(total != 0){ ii = i+1; }
+ b[i] = total;
+ }
+ for(int i=n-1;i>=0;i--){
+ total = b[i];
+ for(int j=i+1;j<n;j++){ total -= A[i][j] * b[j]; }
+ b[i] = total / A[i][i];
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LinearAlgebra", "lubksb");
+ exit(1);
+ }
+}
+
/*********************************************************************************************************************************/
double calcKendallSig(double, double); //length, coeff.
vector<double> solveEquations(vector<vector<double> >, vector<double>);
+ vector<float> solveEquations(vector<vector<float> >, vector<float>);
vector<vector<double> > getInverse(vector<vector<double> >);
void ludcmp(vector<vector<double> >&, vector<int>&, double&);
void lubksb(vector<vector<double> >&, vector<int>&, vector<double>&);
+
+ void ludcmp(vector<vector<float> >&, vector<int>&, float&);
+ void lubksb(vector<vector<float> >&, vector<int>&, vector<float>&);
};
--- /dev/null
+#ifndef RANDOMNUMBER_H
+#define RANDOMNUMBER_H
+
+/*
+ * randomnumber.cpp
+ *
+ *
+ * Created by Pat Schloss on 7/6/11.
+ * Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "randomnumber.h"
+#include <cmath>
+
+/**************************************************************************************************/
+
+RandomNumberGenerator::RandomNumberGenerator(){
+ srand( (unsigned)time( NULL ) );
+}
+
+/**************************************************************************************************/
+
+float RandomNumberGenerator::randomUniform(){
+
+ float randUnif = 0.0000;
+
+ while(randUnif == 0.0000){
+
+ randUnif = rand() / (float)RAND_MAX;
+
+ }
+
+ return randUnif;
+}
+
+/**************************************************************************************************/
+//
+//Code shamelessly swiped and modified from Numerical Recipes in C++
+//
+/**************************************************************************************************/
+
+float RandomNumberGenerator::randomExp(){
+
+ float randExp = 0.0000;
+
+ while(randExp == 0.0000){
+
+ randExp = -log(randomUniform());
+
+ }
+
+ return randExp;
+}
+
+/**************************************************************************************************/
+//
+//Code shamelessly swiped and modified from Numerical Recipes in C++
+//
+/**************************************************************************************************/
+
+float RandomNumberGenerator::randomNorm(){
+
+ float x, y, rsquare;
+
+ do{
+ x = 2.0 * randomUniform() - 1.0;
+ y = 2.0 * randomUniform() - 1.0;
+
+ rsquare = x * x + y * y;
+ } while(rsquare >= 1.0 || rsquare == 0.0);
+
+ float fac = sqrt(-2.0 * log(rsquare)/rsquare);
+
+ return x * fac;
+}
+
+
+/**************************************************************************************************/
+/*
+ * Slightly modified version of:
+ *
+ * Mathlib : A C Library of Special Functions
+ * Copyright (C) 1998 Ross Ihaka
+ * Copyright (C) 2000-2005 The R Development Core Team
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, a copy is available at
+ * http://www.r-project.org/Licenses/
+ *
+ * SYNOPSIS
+ *
+ * #include <Rmath.h>
+ * float rgamma(float a, float scale);
+ *
+ * DESCRIPTION
+ *
+ * Random variates from the gamma distribution.
+ *
+ * REFERENCES
+ *
+ * [1] Shape parameter a >= 1. Algorithm GD in:
+ *
+ * Ahrens, J.H. and Dieter, U. (1982).
+ * Generating gamma variates by a modified
+ * rejection technique.
+ * Comm. ACM, 25, 47-54.
+ *
+ *
+ * [2] Shape parameter 0 < a < 1. Algorithm GS in:
+ *
+ * Ahrens, J.H. and Dieter, U. (1974).
+ * Computer methods for sampling from gamma, beta,
+ * poisson and binomial distributions.
+ * Computing, 12, 223-246.
+ *
+ * Input: a = parameter (mean) of the standard gamma distribution.
+ * Output: a variate from the gamma(a)-distribution
+ */
+
+float RandomNumberGenerator::randomGamma(float a)
+{
+ /* Constants : */
+ const static float sqrt32 = 5.656854;
+ const static float exp_m1 = 0.36787944117144232159;/* exp(-1) = 1/e */
+ float scale = 1.0;
+
+ /* Coefficients q[k] - for q0 = sum(q[k]*a^(-k))
+ * Coefficients a[k] - for q = q0+(t*t/2)*sum(a[k]*v^k)
+ * Coefficients e[k] - for exp(q)-1 = sum(e[k]*q^k)
+ */
+ const static float q1 = 0.04166669;
+ const static float q2 = 0.02083148;
+ const static float q3 = 0.00801191;
+ const static float q4 = 0.00144121;
+ const static float q5 = -7.388e-5;
+ const static float q6 = 2.4511e-4;
+ const static float q7 = 2.424e-4;
+
+ const static float a1 = 0.3333333;
+ const static float a2 = -0.250003;
+ const static float a3 = 0.2000062;
+ const static float a4 = -0.1662921;
+ const static float a5 = 0.1423657;
+ const static float a6 = -0.1367177;
+ const static float a7 = 0.1233795;
+
+ /* State variables [FIXME for threading!] :*/
+ static float aa = 0.;
+ static float aaa = 0.;
+ static float s, s2, d; /* no. 1 (step 1) */
+ static float q0, b, si, c;/* no. 2 (step 4) */
+
+ float e, p, q, r, t, u, v, w, x, ret_val;
+
+ if (a <= 0.0 || scale <= 0.0){ cout << "error alpha or scale parameter are less than zero." << endl; exit(1); }
+
+ if (a < 1.) { /* GS algorithm for parameters a < 1 */
+ e = 1.0 + exp_m1 * a;
+ for(;;) {
+ p = e * randomUniform();
+ if (p >= 1.0) {
+ x = -log((e - p) / a);
+ if (randomExp() >= (1.0 - a) * log(x))
+ break;
+ } else {
+ x = exp(log(p) / a);
+ if (randomExp() >= x)
+ break;
+ }
+ }
+ return scale * x;
+ }
+
+ /* --- a >= 1 : GD algorithm --- */
+
+ /* Step 1: Recalculations of s2, s, d if a has changed */
+ if (a != aa) {
+ aa = a;
+ s2 = a - 0.5;
+ s = sqrt(s2);
+ d = sqrt32 - s * 12.0;
+ }
+ /* Step 2: t = standard normal deviate,
+ x = (s,1/2) -normal deviate. */
+
+ /* immediate acceptance (i) */
+ t = randomNorm();
+ x = s + 0.5 * t;
+ ret_val = x * x;
+ if (t >= 0.0)
+ return scale * ret_val;
+
+ /* Step 3: u = 0,1 - uniform sample. squeeze acceptance (s) */
+ u = randomUniform();
+ if (d * u <= t * t * t)
+ return scale * ret_val;
+
+ /* Step 4: recalculations of q0, b, si, c if necessary */
+
+ if (a != aaa) {
+ aaa = a;
+ r = 1.0 / a;
+ q0 = ((((((q7 * r + q6) * r + q5) * r + q4) * r + q3) * r
+ + q2) * r + q1) * r;
+
+ /* Approximation depending on size of parameter a */
+ /* The constants in the expressions for b, si and c */
+ /* were established by numerical experiments */
+
+ if (a <= 3.686) {
+ b = 0.463 + s + 0.178 * s2;
+ si = 1.235;
+ c = 0.195 / s - 0.079 + 0.16 * s;
+ } else if (a <= 13.022) {
+ b = 1.654 + 0.0076 * s2;
+ si = 1.68 / s + 0.275;
+ c = 0.062 / s + 0.024;
+ } else {
+ b = 1.77;
+ si = 0.75;
+ c = 0.1515 / s;
+ }
+ }
+ /* Step 5: no quotient test if x not positive */
+
+ if (x > 0.0) {
+ /* Step 6: calculation of v and quotient q */
+ v = t / (s + s);
+ if (fabs(v) <= 0.25)
+ q = q0 + 0.5 * t * t * ((((((a7 * v + a6) * v + a5) * v + a4) * v
+ + a3) * v + a2) * v + a1) * v;
+ else
+ q = q0 - s * t + 0.25 * t * t + (s2 + s2) * log(1.0 + v);
+
+
+ /* Step 7: quotient acceptance (q) */
+ if (log(1.0 - u) <= q)
+ return scale * ret_val;
+ }
+
+ for(;;) {
+ /* Step 8: e = standard exponential deviate
+ * u = 0,1 -uniform deviate
+ * t = (b,si)-float exponential (laplace) sample */
+ e = randomExp();
+ u = randomUniform();
+ u = u + u - 1.0;
+ if (u < 0.0)
+ t = b - si * e;
+ else
+ t = b + si * e;
+ /* Step 9: rejection if t < tau(1) = -0.71874483771719 */
+ if (t >= -0.71874483771719) {
+ /* Step 10: calculation of v and quotient q */
+ v = t / (s + s);
+ if (fabs(v) <= 0.25)
+ q = q0 + 0.5 * t * t *
+ ((((((a7 * v + a6) * v + a5) * v + a4) * v + a3) * v
+ + a2) * v + a1) * v;
+ else
+ q = q0 - s * t + 0.25 * t * t + (s2 + s2) * log(1.0 + v);
+ /* Step 11: hat acceptance (h) */
+ /* (if q not positive go to step 8) */
+ if (q > 0.0) {
+ w = expm1(q);
+ /* ^^^^^ original code had approximation with rel.err < 2e-7 */
+ /* if t is rejected sample again at step 8 */
+ if (c * fabs(u) <= w * exp(e - 0.5 * t * t))
+ break;
+ }
+ }
+ } /* for(;;) .. until `t' is accepted */
+ x = s + 0.5 * t;
+ return scale * x * x;
+}
+
+/**************************************************************************************************/
+//
+// essentially swiped from http://en.wikipedia.org/wiki/Dirichlet_distribution#Random_number_generation
+//
+/**************************************************************************************************/
+
+vector<float> RandomNumberGenerator::randomDirichlet(vector<float> alphas){
+
+ int nAlphas = (int)alphas.size();
+ vector<float> dirs(nAlphas, 0.0000);
+
+ float sum = 0.0000;
+ for(int i=0;i<nAlphas;i++){
+ dirs[i] = randomGamma(alphas[i]);
+ sum += dirs[i];
+ }
+
+ for(int i=0;i<nAlphas;i++){
+ dirs[i] /= sum;
+ }
+
+ return dirs;
+
+}
+
+/**************************************************************************************************/
+
+#endif
--- /dev/null
+#ifndef RANDOMNUMBER
+#define RANDOMNUMBER
+
+/*
+ * randomnumber.h
+ *
+ *
+ * Created by Pat Schloss on 7/6/11.
+ * Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+
+/**************************************************************************************************/
+
+#include "mothur.h"
+
+/**************************************************************************************************/
+
+class RandomNumberGenerator {
+
+public:
+ RandomNumberGenerator();
+ float randomUniform();
+ float randomExp();
+ float randomNorm();
+ float randomGamma(float);
+ vector<float> randomDirichlet(vector<float> alphas);
+
+};
+
+/**************************************************************************************************/
+
+#endif
//make sure groups are valid
//takes care of user setting groupNames that are invalid or setting groups=all
- SharedUtil* util = new SharedUtil();
vector<string> namesGroups = groupMap->getNamesOfGroups();
- util->setGroups(Groups, namesGroups);
- delete util;
-
+ vector<string> checkedGroups;
+ for (int i = 0; i < Groups.size(); i++) {
+ if (m->inUsersGroups(Groups[i], namesGroups)) { checkedGroups.push_back(Groups[i]); }
+ else { m->mothurOut("[WARNING]: " + Groups[i] + " is not a valid group in your groupfile, ignoring.\n"); }
+ }
+
+ if (checkedGroups.size() == 0) { m->mothurOut("[ERROR]: no valid groups, aborting.\n"); delete groupMap; return 0; }
+ else {
+ Groups = checkedGroups;
+ m->setGroups(Groups);
+ }
+
//fill names with names of sequences that are from the groups we want to remove
fillNames();
m->setProcessors(temp);
m->mothurConvert(temp, processors);
- flowOrder = validParameter.validFile(parameters, "order", false);
- if (flowOrder == "not found"){ flowOrder = "TACG"; }
- else if(flowOrder.length() != 4){
- m->mothurOut("The value of the order option must be four bases long\n");
- }
+ temp = validParameter.validFile(parameters, "order", false); if (temp == "not found"){ temp = "A"; }
+ if (temp.length() > 1) { m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true;
+ }
+ else {
+ if (toupper(temp[0]) == 'A') { flowOrder = "TACG"; }
+ else if(toupper(temp[0]) == 'B'){
+ flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC"; }
+ else if(toupper(temp[0]) == 'I'){
+ flowOrder = "TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC"; }
+ else {
+ m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true;
+ }
+ }
+
temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found"){ temp = "0.01"; }
m->mothurConvert(temp, cutoff);
return data[index].abundance;
}
/***********************************************************************/
+//returns vector of abundances
+vector<float> SharedRAbundFloatVector::getAbundances(){
+ vector<float> abunds;
+ for (int i = 0; i < data.size(); i++) {
+ abunds.push_back(data[i].abundance);
+ }
+
+ return abunds;
+}
+/***********************************************************************/
individualFloat SharedRAbundFloatVector::get(int index){
return data[index];
}
individualFloat get(int);
vector <individual> getData();
float getAbundance(int);
+ vector<float> getAbundances();
void push_front(float, int, string); //abundance, otu, groupname
void insert(float, int, string); //abundance, otu, groupname
void push_back(float, string); //abundance, groupname
--- /dev/null
+//
+// sparcccommand.cpp
+// Mothur
+//
+// Created by SarahsWork on 5/10/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "sparcccommand.h"
+
+
+//**********************************************************************************************************************
+vector<string> SparccCommand::setParameters(){
+ try {
+ CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared);
+ CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+ CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+ CommandParameter psamplings("samplings", "Number", "", "20", "", "", "","",false,false,false); parameters.push_back(psamplings);
+ CommandParameter piterations("iterations", "Number", "", "10", "", "", "","",false,false,false); parameters.push_back(piterations);
+ CommandParameter ppermutations("permutations", "Number", "", "1000", "", "", "","",false,false,false); parameters.push_back(ppermutations);
+ CommandParameter pmethod("method", "Multiple", "relabund-dirichlet", "dirichlet", "", "", "","",false,false); parameters.push_back(pmethod);
+ CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+ //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files.
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparccCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string SparccCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The sparcc command allows you to ....\n";
+ helpString += "The sparcc command parameters are: shared, groups, label, samplings, iterations, permutations, processors and method.\n";
+ helpString += "The samplings parameter is used to .... Default=20.\n";
+ helpString += "The iterations parameter is used to ....Default=10.\n";
+ helpString += "The permutations parameter is used to ....Default=1000.\n";
+ helpString += "The method parameter is used to ....Options are relabund and dirichlet. Default=dirichlet.\n";
+ helpString += "The default value for groups is all the groups in your sharedfile.\n";
+ helpString += "The label parameter is used to analyze specific labels in your shared file.\n";
+ helpString += "The sparcc command should be in the following format: sparcc(shared=yourSharedFile)\n";
+ helpString += "sparcc(shared=final.an.shared)\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparccCommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string SparccCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
+
+ if (type == "corr") { pattern = "[filename],[distance],sparcc_correlation"; }
+ else if (type == "pvalue") { pattern = "[filename],[distance],sparcc_pvalue"; }
+ else if (type == "sparccrelabund") { pattern = "[filename],[distance],sparcc_relabund"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparccCommand", "getOutputPattern");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+SparccCommand::SparccCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["corr"] = tempOutNames;
+ outputTypes["pvalue"] = tempOutNames;
+ outputTypes["sparccrelabund"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparccCommand", "SparccCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+SparccCommand::SparccCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+ allLines = 1;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+
+ else {
+ //valid paramters for this command
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+ map<string,string>::iterator it;
+ //check to make sure all parameters are valid for command
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ vector<string> tempOutNames;
+ outputTypes["corr"] = tempOutNames; //filetypes should be things like: shared, fasta, accnos...
+ outputTypes["pvalue"] = tempOutNames;
+ outputTypes["sparccrelabund"] = tempOutNames;
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+
+ string path;
+ it = parameters.find("shared");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["shared"] = inputDir + it->second; }
+ }
+ }
+
+ //check for parameters
+ //get shared file, it is required
+ sharedfile = validParameter.validFile(parameters, "shared", true);
+ if (sharedfile == "not open") { sharedfile = ""; abort = true; }
+ else if (sharedfile == "not found") {
+ //if there is a current shared file, use it
+ sharedfile = m->getSharedFile();
+ if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
+ }else { m->setSharedFile(sharedfile); }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
+ outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
+ }
+
+ normalizeMethod = validParameter.validFile(parameters, "method", false);
+ if (normalizeMethod == "not found") { normalizeMethod = "dirichlet"; }
+ if ((normalizeMethod == "dirichlet") || (normalizeMethod == "relabund")) { }
+ else { m->mothurOut(normalizeMethod + " is not a valid method. Valid methods are dirichlet and relabund."); m->mothurOutEndLine(); abort = true; }
+
+
+ string temp = validParameter.validFile(parameters, "samplings", false); if (temp == "not found"){ temp = "20"; }
+ m->mothurConvert(temp, numSamplings);
+
+ if(normalizeMethod == "relabund"){ numSamplings = 1; }
+
+ temp = validParameter.validFile(parameters, "iterations", false); if (temp == "not found"){ temp = "10"; }
+ m->mothurConvert(temp, maxIterations);
+
+ temp = validParameter.validFile(parameters, "permutations", false); if (temp == "not found"){ temp = "1000"; }
+ m->mothurConvert(temp, numPermutations);
+
+ temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
+ m->setProcessors(temp);
+ m->mothurConvert(temp, processors);
+
+ string groups = validParameter.validFile(parameters, "groups", false);
+ if (groups == "not found") { groups = ""; }
+ else { m->splitAtDash(groups, Groups); }
+ m->setGroups(Groups);
+
+ string label = validParameter.validFile(parameters, "label", false);
+ if (label == "not found") { label = ""; }
+ else {
+ if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
+ else { allLines = 1; }
+ }
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparccCommand", "SparccCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int SparccCommand::execute(){
+ try {
+
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ int start = time(NULL);
+
+ InputData input(sharedfile, "sharedfile");
+ vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+ string lastLabel = lookup[0]->getLabel();
+
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> processedLabels;
+ set<string> userLabels = labels;
+
+ //as long as you are not at the end of the file or done wih the lines you want
+ while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+
+ if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }return 0; }
+
+ if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
+
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ process(lookup);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+ }
+
+ if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = lookup[0]->getLabel();
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ process(lookup);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+
+ //restore real lastlabel to save below
+ lookup[0]->setLabel(saveLabel);
+ }
+
+ lastLabel = lookup[0]->getLabel();
+ //prevent memory leak
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
+
+ if (m->control_pressed) { return 0; }
+
+ //get next line to process
+ lookup = input.getSharedRAbundVectors();
+ }
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ m->mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+ needToRun = true;
+ }else {
+ m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+ }
+ }
+
+ //run last label if you need to
+ if (needToRun == true) {
+ for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ process(lookup);
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ }
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to process.");
+ m->mothurOutEndLine();
+ m->mothurOutEndLine();
+
+ //output files created by command
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparccCommand", "execute");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+vector<vector<float> > SparccCommand::shuffleSharedVector(vector<vector<float> >& sharedVector){
+ try {
+ int numGroups = (int)sharedVector.size();
+ int numOTUs = (int)sharedVector[0].size();
+
+ vector<vector<float> > shuffledVector = sharedVector;
+
+ for(int i=0;i<numGroups;i++){
+ for(int j=0;j<numOTUs;j++){
+ shuffledVector[i][j] = sharedVector[rand()%numGroups][j];
+ }
+ }
+
+ return shuffledVector;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparccCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int SparccCommand::process(vector<SharedRAbundVector*>& lookup){
+ try {
+ cout.setf(ios::fixed, ios::floatfield);
+ cout.setf(ios::showpoint);
+
+ vector<vector<float> > sharedVector;
+ vector<string> otuNames = m->currentBinLabels;
+
+ //fill sharedVector to pass to CalcSparcc
+ for (int i = 0; i < lookup.size(); i++) {
+ vector<int> abunds = lookup[i]->getAbundances();
+ vector<float> temp;
+ for (int j = 0; j < abunds.size(); j++) { temp.push_back((float) abunds[j]); }
+ sharedVector.push_back(temp);
+ }
+ int numOTUs = (int)sharedVector[0].size();
+ int numGroups = lookup.size();
+
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+ variables["[distance]"] = lookup[0]->getLabel();
+
+
+ string relAbundFileName = getOutputFileName("sparccrelabund", variables);
+ ofstream relAbundFile;
+ m->openOutputFile(relAbundFileName, relAbundFile);
+ outputNames.push_back(relAbundFileName); outputTypes["sparccrelabund"].push_back(relAbundFileName);
+
+ relAbundFile << "OTU\taveRelAbund\n";
+ for(int i=0;i<numOTUs;i++){
+ if (m->control_pressed) { relAbundFile.close(); return 0; }
+
+ double relAbund = 0.0000;
+ for(int j=0;j<numGroups;j++){
+ relAbund += sharedVector[j][i]/(double)lookup[j]->getNumSeqs();
+ }
+ relAbundFile << otuNames[i] <<'\t' << relAbund / (double) numGroups << endl;
+ }
+ relAbundFile.close();
+
+ CalcSparcc originalData(sharedVector, maxIterations, numSamplings, normalizeMethod);
+ vector<vector<float> > origCorrMatrix = originalData.getRho();
+
+ string correlationFileName = getOutputFileName("corr", variables);
+ ofstream correlationFile;
+ m->openOutputFile(correlationFileName, correlationFile);
+ outputNames.push_back(correlationFileName); outputTypes["corr"].push_back(correlationFileName);
+ correlationFile.setf(ios::fixed, ios::floatfield);
+ correlationFile.setf(ios::showpoint);
+
+ for(int i=0;i<numOTUs;i++){ correlationFile << '\t' << otuNames[i]; } correlationFile << endl;
+ for(int i=0;i<numOTUs;i++){
+ correlationFile << otuNames[i];
+ for(int j=0;j<numOTUs;j++){
+ correlationFile << '\t' << origCorrMatrix[i][j];
+ }
+ correlationFile << endl;
+ }
+
+
+ if(numPermutations != 0){
+ vector<vector<float> > pValues = createProcesses(sharedVector, origCorrMatrix);
+
+ if (m->control_pressed) { return 0; }
+
+ string pValueFileName = getOutputFileName("pvalue", variables);
+ ofstream pValueFile;
+ m->openOutputFile(pValueFileName, pValueFile);
+ outputNames.push_back(pValueFileName); outputTypes["pvalue"].push_back(pValueFileName);
+ pValueFile.setf(ios::fixed, ios::floatfield);
+ pValueFile.setf(ios::showpoint);
+
+ for(int i=0;i<numOTUs;i++){ pValueFile << '\t' << otuNames[i]; } pValueFile << endl;
+ for(int i=0;i<numOTUs;i++){
+ pValueFile << otuNames[i];
+ for(int j=0;j<numOTUs;j++){
+ pValueFile << '\t' << pValues[i][j];
+ }
+ pValueFile << endl;
+ }
+ }
+
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparccCommand", "process");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<vector<float> > SparccCommand::createProcesses(vector<vector<float> >& sharedVector, vector<vector<float> >& origCorrMatrix){
+ try {
+ int numOTUs = sharedVector[0].size();
+ vector<vector<float> > pValues;
+
+ if(processors == 1){
+ pValues = driver(sharedVector, origCorrMatrix, numPermutations);
+ }else{
+ //divide iters between processors
+ vector<int> procIters;
+ int numItersPerProcessor = numPermutations / processors;
+
+ //divide iters between processes
+ for (int h = 0; h < processors; h++) {
+ if(h == processors - 1){ numItersPerProcessor = numPermutations - h * numItersPerProcessor; }
+ procIters.push_back(numItersPerProcessor);
+ }
+
+ vector<int> processIDS;
+ int process = 1;
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+
+ //loop through and create all the processes you want
+ while (process != processors) {
+ int pid = fork();
+
+ if (pid > 0) {
+ processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
+ process++;
+ }else if (pid == 0){
+ pValues = driver(sharedVector, origCorrMatrix, procIters[process]);
+
+ //pass pvalues to parent
+ ofstream out;
+ string tempFile = toString(getpid()) + ".pvalues.temp";
+ m->openOutputFile(tempFile, out);
+
+ //pass values
+ for (int i = 0; i < pValues.size(); i++) {
+ for (int j = 0; j < pValues[i].size(); j++) {
+ out << pValues[i][j] << '\t';
+ }
+ out << endl;
+ }
+ out << endl;
+
+ out.close();
+
+ exit(0);
+ }else {
+ m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+ for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+ exit(0);
+ }
+ }
+
+ //do my part
+ pValues = driver(sharedVector, origCorrMatrix, procIters[0]);
+
+ //force parent to wait until all the processes are done
+ for (int i=0;i<processIDS.size();i++) {
+ int temp = processIDS[i];
+ wait(&temp);
+ }
+
+ //combine results
+ for (int i = 0; i < processIDS.size(); i++) {
+ ifstream in;
+ string tempFile = toString(processIDS[i]) + ".pvalues.temp";
+ m->openInputFile(tempFile, in);
+
+ ////// to do ///////////
+ int numTemp; numTemp = 0;
+
+ for (int j = 0; j < pValues.size(); j++) {
+ for (int k = 0; k < pValues.size(); k++) {
+ in >> numTemp; m->gobble(in);
+ pValues[j][k] += numTemp;
+ }
+ m->gobble(in);
+ }
+ in.close(); m->mothurRemove(tempFile);
+ }
+#else
+
+ //fill in functions
+ vector<sparccData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+
+ //Create processor worker threads.
+ for( int i=1; i<processors; i++ ){
+
+ //make copy so we don't get access violations
+ vector< vector<float> > copySharedVector = sharedVector;
+ vector< vector<float> > copyOrig = origCorrMatrix;
+
+ sparccData* temp = new sparccData(m, procIters[i], copySharedVector, copyOrig, numSamplings, maxIterations, numPermutations, normalizeMethod);
+ pDataArray.push_back(temp);
+ processIDS.push_back(i);
+
+ hThreadArray[i-1] = CreateThread(NULL, 0, MySparccThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+ }
+
+ //do my part
+ pValues = driver(sharedVector, origCorrMatrix, procIters[0]);
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ for (int j = 0; j < pDataArray[i]->pValues.size(); j++) {
+ for (int k = 0; k < pDataArray[i]->pValues[j].size(); k++) {
+ pValues[j][k] += pDataArray[i]->pValues[j][k];
+ }
+ }
+
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
+#endif
+ }
+
+ for(int i=0;i<numOTUs;i++){
+ pValues[i][i] = 1;
+ for(int j=0;j<i;j++){
+ pValues[i][j] /= (double)numPermutations;
+ pValues[j][i] = pValues[i][j];
+ }
+ }
+
+ return pValues;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparccCommand", "createProcesses");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+vector<vector<float> > SparccCommand::driver(vector<vector<float> >& sharedVector, vector<vector<float> >& origCorrMatrix, int numPerms){
+ try {
+ int numOTUs = sharedVector[0].size();
+ vector<vector<float> > sharedShuffled = sharedVector;
+ vector<vector<float> > pValues(numOTUs);
+ for(int i=0;i<numOTUs;i++){ pValues[i].assign(numOTUs, 0); }
+
+ for(int i=0;i<numPerms;i++){
+ if (m->control_pressed) { return pValues; }
+ sharedShuffled = shuffleSharedVector(sharedVector);
+ CalcSparcc permutedData(sharedShuffled, maxIterations, numSamplings, normalizeMethod);
+ vector<vector<float> > permuteCorrMatrix = permutedData.getRho();
+
+ for(int j=0;j<numOTUs;j++){
+ for(int k=0;k<j;k++){
+ double randValue = permuteCorrMatrix[j][k];
+ double observedValue = origCorrMatrix[j][k];
+ if(observedValue >= 0 && randValue > observedValue) { pValues[j][k]++; }//this method seems to deflate the
+ else if(observedValue < 0 && randValue < observedValue){ pValues[j][k]++; }//pvalues of small rho values
+ }
+ }
+ if((i+1) % (int)(numPermutations * 0.05) == 0){ cout << i+1 << endl; }
+ }
+
+ return pValues;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparccCommand", "driver");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
--- /dev/null
+//
+// sparcccommand.h
+// Mothur
+//
+// Created by SarahsWork on 5/10/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_sparcccommand_h
+#define Mothur_sparcccommand_h
+
+#include "command.hpp"
+#include "inputdata.h"
+#include "calcsparcc.h"
+
+/**************************************************************************************************/
+
+class SparccCommand : public Command {
+public:
+ SparccCommand(string);
+ SparccCommand();
+ ~SparccCommand(){}
+
+ vector<string> setParameters();
+ string getCommandName() { return "sparcc"; }
+ string getCommandCategory() { return "OTU-Based Approaches"; }
+
+ string getOutputPattern(string);
+ //commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden
+ string getHelpString();
+ string getCitation() { return "Friedman J, Alm EJ (2012) Inferring Correlation Networks from Genomic Survey Data. PLoS Comput Biol 8(9): e1002687. doi:10.1371/journal.pcbi.1002687 http://www.mothur.org/wiki/Sparcc"; }
+ string getDescription() { return "brief description"; }
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+private:
+ bool abort, allLines;
+ string outputDir, sharedfile, normalizeMethod;
+ int numSamplings, maxIterations, numPermutations, processors;
+ set<string> labels;
+ vector<string> Groups;
+ vector<string> outputNames;
+
+ int process(vector<SharedRAbundVector*>&);
+ vector<vector<float> > createProcesses(vector<vector<float> >&, vector<vector<float> >&);
+ vector<vector<float> > driver(vector<vector<float> >&, vector<vector<float> >&, int);
+ vector<vector<float> > shuffleSharedVector(vector<vector<float> >&);
+};
+
+/**************************************************************************************************/
+
+struct sparccData {
+ MothurOut* m;
+ int numPerms;
+ vector< vector<float> > sharedVector;
+ vector< vector<float> > origCorrMatrix;
+ vector<vector<float> > pValues;
+ int numSamplings, maxIterations, numPermutations;
+ string normalizeMethod;
+
+ sparccData(){}
+ sparccData(MothurOut* mout, int it, vector< vector<float> > cs, vector< vector<float> > co, int ns, int mi, int np, string nm) {
+ m = mout;
+ numPerms = it;
+ sharedVector = cs;
+ origCorrMatrix = co;
+ numSamplings = ns;
+ maxIterations = mi;
+ numPermutations = np;
+ normalizeMethod = nm;
+ }
+};
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MySparccThreadFunction(LPVOID lpParam){
+ sparccData* pDataArray;
+ pDataArray = (sparccData*)lpParam;
+
+ try {
+
+ int numOTUs = pDataArray->sharedVector[0].size();
+ vector<vector<float> > sharedShuffled = pDataArray->sharedVector;
+ pDataArray->pValues.resize(numOTUs);
+ for(int i=0;i<numOTUs;i++){ pDataArray->pValues[i].assign(numOTUs, 0); }
+
+ for(int i=0;i<pDataArray->numPerms;i++){
+ if (pDataArray->m->control_pressed) { return 0; }
+
+ //sharedShuffled = shuffleSharedVector(sharedVector);
+ //////////////////////////////////////////////////////////
+ int numGroups = (int)pDataArray->sharedVector.size();
+ sharedShuffled = pDataArray->sharedVector;
+
+ for(int k=0;k<numGroups;k++){
+ for(int j=0;j<numOTUs;j++){
+ sharedShuffled[k][j] = pDataArray->sharedVector[rand()%numGroups][j];
+ }
+ }
+ /////////////////////////////////////////////////////////
+
+ CalcSparcc permutedData(sharedShuffled, pDataArray->maxIterations, pDataArray->numSamplings, pDataArray->normalizeMethod);
+ vector<vector<float> > permuteCorrMatrix = permutedData.getRho();
+
+ for(int j=0;j<numOTUs;j++){
+ for(int k=0;k<j;k++){
+ double randValue = permuteCorrMatrix[j][k];
+ double observedValue = pDataArray->origCorrMatrix[j][k];
+ if(observedValue >= 0 && randValue > observedValue) { pDataArray->pValues[j][k]++; }//this method seems to deflate the
+ else if(observedValue < 0 && randValue < observedValue){ pDataArray->pValues[j][k]++; }//pvalues of small rho values
+ }
+ }
+ if((i+1) % (int)(pDataArray->numPermutations * 0.05) == 0){ cout << i+1 << endl; }
+ }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ pDataArray->m->errorOut(e, "SparccCommand", "MySparccThreadFunction");
+ exit(1);
+ }
+}
+#endif
+
+
+/**************************************************************************************************/
+
+
+
+
+#endif
}
copyGroups.clear();
-
+
//process each distance file
for (int i = 0; i < numGroups; i++) {
else { m->mothurRemove((countfile + "." + toString(i) + ".temp")); }
}
+ //restore old fasta file name since dist.seqs overwrites it with the temp files
+ m->setFastaFile(fastafile);
+
vector<string> tempDistFiles;
for(int i=0;i<numGroups;i++){
if (outputDir == "") { outputDir = m->hasPath(fastafile); }
if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
}
-
numGroups = lookup.size();
+
+ //sanity check to make sure processors < numComparisions
+ int numDists = 0;
+ for(int i=0;i<numGroups;i++){
+ for(int j=0;j<i;j++){
+ numDists++;
+ if (numDists > processors) { break; }
+ }
+ }
+ if (numDists < processors) { processors = numDists; }
+
lines.resize(processors);
for (int i = 0; i < processors; i++) {
lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
thisItersLookup.clear();
for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
}
+
+ if (m->debug) { m->mothurOut("[DEBUG]: iter = " + toString(thisIter) + ".\n"); }
}
if (m->debug) { m->mothurOut("[DEBUG]: done with iters.\n"); }