A77410F614697C300098E6AC /* seqnoise.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77410F414697C300098E6AC /* seqnoise.cpp */; };
A778FE6B134CA6CA00C0BA33 /* getcommandinfocommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */; };
A77A221F139001B600B0BE70 /* deuniquetreecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */; };
+ A77B7185173D2240002163C2 /* sparcccommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7184173D2240002163C2 /* sparcccommand.cpp */; };
+ A77B7188173D4042002163C2 /* randomnumber.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7186173D4041002163C2 /* randomnumber.cpp */; };
+ A77B718B173D40E5002163C2 /* calcsparcc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77B7189173D40E4002163C2 /* calcsparcc.cpp */; };
A77E1938161B201E00DB1A2A /* randomforest.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77E1937161B201E00DB1A2A /* randomforest.cpp */; };
A77E193B161B289600DB1A2A /* rftreenode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77E193A161B289600DB1A2A /* rftreenode.cpp */; };
A77EBD2F1523709100ED407C /* createdatabasecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A77EBD2E1523709100ED407C /* createdatabasecommand.cpp */; };
A7C7DAB915DA758B0059B0CF /* sffmultiplecommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7C7DAB815DA758B0059B0CF /* sffmultiplecommand.cpp */; };
A7D755DA1535F679009BF21A /* treereader.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7D755D91535F679009BF21A /* treereader.cpp */; };
A7E0243D15B4520A00A5F046 /* sparsedistancematrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E0243C15B4520A00A5F046 /* sparsedistancematrix.cpp */; };
+ A7E6F69E17427D06006775E2 /* makelookupcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E6F69D17427D06006775E2 /* makelookupcommand.cpp */; };
A7E9B88112D37EC400DA6239 /* ace.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B64F12D37EC300DA6239 /* ace.cpp */; };
A7E9B88212D37EC400DA6239 /* aligncommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65112D37EC300DA6239 /* aligncommand.cpp */; };
A7E9B88312D37EC400DA6239 /* alignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = A7E9B65312D37EC300DA6239 /* alignment.cpp */; };
A778FE6A134CA6CA00C0BA33 /* getcommandinfocommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getcommandinfocommand.cpp; sourceTree = "<group>"; };
A77A221D139001B600B0BE70 /* deuniquetreecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = deuniquetreecommand.h; sourceTree = "<group>"; };
A77A221E139001B600B0BE70 /* deuniquetreecommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = deuniquetreecommand.cpp; sourceTree = "<group>"; };
+ A77B7183173D222F002163C2 /* sparcccommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = sparcccommand.h; sourceTree = "<group>"; };
+ A77B7184173D2240002163C2 /* sparcccommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sparcccommand.cpp; sourceTree = "<group>"; };
+ A77B7186173D4041002163C2 /* randomnumber.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = randomnumber.cpp; sourceTree = "<group>"; };
+ A77B7187173D4041002163C2 /* randomnumber.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = randomnumber.h; sourceTree = "<group>"; };
+ A77B7189173D40E4002163C2 /* calcsparcc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = calcsparcc.cpp; sourceTree = "<group>"; };
+ A77B718A173D40E4002163C2 /* calcsparcc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = calcsparcc.h; sourceTree = "<group>"; };
A77E1937161B201E00DB1A2A /* randomforest.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = randomforest.cpp; sourceTree = "<group>"; };
A77E193A161B289600DB1A2A /* rftreenode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rftreenode.cpp; sourceTree = "<group>"; };
A77EBD2C1523707F00ED407C /* createdatabasecommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = createdatabasecommand.h; sourceTree = "<group>"; };
A7DAAFA3133A254E003956EB /* commandparameter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = commandparameter.h; sourceTree = "<group>"; };
A7E0243C15B4520A00A5F046 /* sparsedistancematrix.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = sparsedistancematrix.cpp; sourceTree = "<group>"; };
A7E0243F15B4522000A5F046 /* sparsedistancematrix.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = sparsedistancematrix.h; sourceTree = "<group>"; };
+ A7E6F69C17427CF2006775E2 /* makelookupcommand.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = makelookupcommand.h; sourceTree = "<group>"; };
+ A7E6F69D17427D06006775E2 /* makelookupcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = makelookupcommand.cpp; sourceTree = "<group>"; };
A7E9B64F12D37EC300DA6239 /* ace.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ace.cpp; sourceTree = "<group>"; };
A7E9B65012D37EC300DA6239 /* ace.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ace.h; sourceTree = "<group>"; };
A7E9B65112D37EC300DA6239 /* aligncommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = aligncommand.cpp; sourceTree = "<group>"; };
A7A61F1A130035C800E05B6B /* LICENSE */,
A70332B512D3A13400761E33 /* makefile */,
A7E9B65912D37EC300DA6239 /* averagelinkage.cpp */,
+ A77B718A173D40E4002163C2 /* calcsparcc.h */,
+ A77B7189173D40E4002163C2 /* calcsparcc.cpp */,
A7E9BA4F12D398D700DA6239 /* clearcut */,
A7E9B69812D37EC400DA6239 /* cluster.cpp */,
A7E9B69912D37EC400DA6239 /* cluster.hpp */,
A7E9B79C12D37EC400DA6239 /* progress.hpp */,
A7548FAF171440ED00B1F05A /* qFinderDMM.h */,
A7548FAE171440EC00B1F05A /* qFinderDMM.cpp */,
+ A77B7187173D4041002163C2 /* randomnumber.h */,
+ A77B7186173D4041002163C2 /* randomnumber.cpp */,
A7E9B7A512D37EC400DA6239 /* rarecalc.cpp */,
A7386C191619C9FB00651424 /* randomforest */,
A7E9B7A612D37EC400DA6239 /* rarecalc.h */,
A799F5B81309A3E000AEEFA0 /* makefastqcommand.cpp */,
A7E9B74412D37EC400DA6239 /* makegroupcommand.h */,
A7E9B74312D37EC400DA6239 /* makegroupcommand.cpp */,
+ A7E6F69C17427CF2006775E2 /* makelookupcommand.h */,
+ A7E6F69D17427D06006775E2 /* makelookupcommand.cpp */,
A7E9B74A12D37EC400DA6239 /* matrixoutputcommand.h */,
A7E9B74912D37EC400DA6239 /* matrixoutputcommand.cpp */,
A7E9B75412D37EC400DA6239 /* mergefilecommand.h */,
A774101314695AF60098E6AC /* shhhseqscommand.cpp */,
A7A32DAC14DC43D10001D2E5 /* sortseqscommand.h */,
A7A32DA914DC43B00001D2E5 /* sortseqscommand.cpp */,
+ A77B7183173D222F002163C2 /* sparcccommand.h */,
+ A77B7184173D2240002163C2 /* sparcccommand.cpp */,
A7E9B84012D37EC400DA6239 /* splitabundcommand.h */,
A7E9B83F12D37EC400DA6239 /* splitabundcommand.cpp */,
A7E9B84212D37EC400DA6239 /* splitgroupscommand.h */,
A799314B16CBD0CD0017E888 /* mergetaxsummarycommand.cpp in Sources */,
A7548FAD17142EBC00B1F05A /* getmetacommunitycommand.cpp in Sources */,
A7548FB0171440ED00B1F05A /* qFinderDMM.cpp in Sources */,
+ A77B7185173D2240002163C2 /* sparcccommand.cpp in Sources */,
+ A77B7188173D4042002163C2 /* randomnumber.cpp in Sources */,
+ A77B718B173D40E5002163C2 /* calcsparcc.cpp in Sources */,
+ A7E6F69E17427D06006775E2 /* makelookupcommand.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
--- /dev/null
+//
+// runSparcc.cpp
+// PDSSparCC
+//
+// Created by Patrick Schloss on 10/31/12.
+// Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+#include "calcSparcc.h"
+#include "linearalgebra.h"
+
+/**************************************************************************************************/
+
+CalcSparcc::CalcSparcc(vector<vector<float> > sharedVector, int maxIterations, int numSamplings, string method){
+ try {
+ m = MothurOut::getInstance();
+ numOTUs = (int)sharedVector[0].size();
+ numGroups = (int)sharedVector.size();
+ normalizationMethod = method;
+ int numOTUs = (int)sharedVector[0].size();
+
+ addPseudoCount(sharedVector);
+
+ vector<vector<vector<float> > > allCorrelations(numSamplings);
+
+ // float cycClockStart = clock();
+ // unsigned long long cycTimeStart = time(NULL);
+
+ for(int i=0;i<numSamplings;i++){
+ if (m->control_pressed) { break; }
+ vector<float> logFractions = getLogFractions(sharedVector, method);
+ getT_Matrix(logFractions); //this step is slow...
+ getT_Vector();
+ getD_Matrix();
+ vector<float> basisVariances = getBasisVariances(); //this step is slow...
+ vector<vector<float> > correlation = getBasisCorrelations(basisVariances);
+
+ excluded.resize(numOTUs);
+ for(int j=0;j<numOTUs;j++){ excluded[j].assign(numOTUs, 0); }
+
+ float maxRho = 1;
+ int excludeRow = -1;
+ int excludeColumn = -1;
+
+ int iter = 0;
+ while(maxRho > 0.10 && iter < maxIterations){
+ maxRho = getExcludedPairs(correlation, excludeRow, excludeColumn);
+ excludeValues(excludeRow, excludeColumn);
+ vector<float> excludedBasisVariances = getBasisVariances();
+ correlation = getBasisCorrelations(excludedBasisVariances);
+ iter++;
+ }
+ allCorrelations[i] = correlation;
+ }
+
+ if (!m->control_pressed) {
+ if(numSamplings > 1){
+ getMedian(allCorrelations);
+ }
+ else{
+ median = allCorrelations[0];
+ }
+ }
+// cout << median[0][3] << '\t' << median[0][6] << endl;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "CalcSparcc");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::addPseudoCount(vector<vector<float> >& sharedVector){
+ try {
+ for(int i=0;i<numGroups;i++){ //iterate across the groups
+ if (m->control_pressed) { return; }
+ for(int j=0;j<numOTUs;j++){
+ sharedVector[i][j] += 1;
+ }
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "addPseudoCount");
+ exit(1);
+ }
+
+}
+
+/**************************************************************************************************/
+
+vector<float> CalcSparcc::getLogFractions(vector<vector<float> > sharedVector, string method){ //dirichlet by default
+ try {
+ vector<float> logSharedFractions(numGroups * numOTUs, 0);
+
+ if(method == "dirichlet"){
+ vector<float> alphas(numGroups);
+ for(int i=0;i<numGroups;i++){ //iterate across the groups
+ if (m->control_pressed) { return logSharedFractions; }
+ alphas = RNG.randomDirichlet(sharedVector[i]);
+
+ for(int j=0;j<numOTUs;j++){
+ logSharedFractions[i * numOTUs + j] = alphas[j];
+ }
+ }
+ }
+ else if(method == "relabund"){
+ for(int i=0;i<numGroups;i++){
+ if (m->control_pressed) { return logSharedFractions; }
+ float total = 0.0;
+ for(int j=0;j<numOTUs;j++){
+ total += sharedVector[i][j];
+ }
+ for(int j=0;j<numOTUs;j++){
+ logSharedFractions[i * numOTUs + j] = sharedVector[i][j]/total;
+ }
+ }
+ }
+
+ for(int i=0;i<logSharedFractions.size();i++){
+ logSharedFractions[i] = log(logSharedFractions[i]);
+ }
+
+ return logSharedFractions;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "addPseudoCount");
+ exit(1);
+ }
+
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::getT_Matrix(vector<float> sharedFractions){
+ try {
+ tMatrix.resize(numOTUs * numOTUs, 0);
+
+ vector<float> diff(numGroups);
+
+ for(int j1=0;j1<numOTUs;j1++){
+ for(int j2=0;j2<j1;j2++){
+ if (m->control_pressed) { return; }
+ float mean = 0.0;
+ for(int i=0;i<numGroups;i++){
+ diff[i] = sharedFractions[i * numOTUs + j1] - sharedFractions[i * numOTUs + j2];
+ mean += diff[i];
+ }
+ mean /= float(numGroups);
+
+ float variance = 0.0;
+ for(int i=0;i<numGroups;i++){
+ variance += (diff[i] - mean) * (diff[i] - mean);
+ }
+ variance /= (float)(numGroups-1);
+
+ tMatrix[j1 * numOTUs + j2] = variance;
+ tMatrix[j2 * numOTUs + j1] = tMatrix[j1 * numOTUs + j2];
+ }
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "getT_Matrix");
+ exit(1);
+ }
+
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::getT_Vector(){
+ try {
+ tVector.assign(numOTUs, 0);
+
+ for(int j1=0;j1<numOTUs;j1++){
+ if (m->control_pressed) { return; }
+ for(int j2=0;j2<numOTUs;j2++){
+ tVector[j1] += tMatrix[j1 * numOTUs + j2];
+ }
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "getT_Vector");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::getD_Matrix(){
+ try {
+ float d = numOTUs - 1.0;
+
+ dMatrix.resize(numOTUs);
+ for(int i=0;i<numOTUs;i++){
+ if (m->control_pressed) { return; }
+ dMatrix[i].resize(numOTUs, 1);
+ dMatrix[i][i] = d;
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "getD_Matrix");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+vector<float> CalcSparcc::getBasisVariances(){
+ try {
+ LinearAlgebra LA;
+
+ vector<float> variances = LA.solveEquations(dMatrix, tVector);
+
+ for(int i=0;i<variances.size();i++){
+ if (m->control_pressed) { return variances; }
+ if(variances[i] < 0){ variances[i] = 1e-4; }
+ }
+
+ return variances;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "getBasisVariances");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+vector<vector<float> > CalcSparcc::getBasisCorrelations(vector<float> basisVariance){
+ try {
+ vector<vector<float> > rho(numOTUs);
+ for(int i=0;i<numOTUs;i++){ rho[i].resize(numOTUs, 0); }
+
+ for(int i=0;i<numOTUs;i++){
+ float var_i = basisVariance[i];
+ float sqrt_var_i = sqrt(var_i);
+
+ rho[i][i] = 1.00;
+
+ for(int j=0;j<i;j++){
+ if (m->control_pressed) { return rho; }
+ float var_j = basisVariance[j];
+
+ rho[i][j] = (var_i + var_j - tMatrix[i * numOTUs + j]) / (2.0 * sqrt_var_i * sqrt(var_j));
+ if(rho[i][j] > 1.0) { rho[i][j] = 1.0; }
+ else if(rho[i][j] < -1.0) { rho[i][j] = -1.0; }
+
+ rho[j][i] = rho[i][j];
+
+ }
+ }
+
+ return rho;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "getBasisCorrelations");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+float CalcSparcc::getExcludedPairs(vector<vector<float> > rho, int& maxRow, int& maxColumn){
+ try {
+ float maxRho = 0;
+ maxRow = -1;
+ maxColumn = -1;
+
+ for(int i=0;i<numOTUs;i++){
+
+ for(int j=0;j<i;j++){
+ if (m->control_pressed) { return maxRho; }
+ float tester = abs(rho[i][j]);
+
+ if(tester > maxRho && excluded[i][j] != 1){
+ maxRho = tester;
+ maxRow = i;
+ maxColumn = j;
+ }
+ }
+
+ }
+
+ return maxRho;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "getExcludedPairs");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::excludeValues(int excludeRow, int excludeColumn){
+ try {
+ tVector[excludeRow] -= tMatrix[excludeRow * numOTUs + excludeColumn];
+ tVector[excludeColumn] -= tMatrix[excludeRow * numOTUs + excludeColumn];
+
+ dMatrix[excludeRow][excludeColumn] = 0;
+ dMatrix[excludeColumn][excludeRow] = 0;
+ dMatrix[excludeRow][excludeRow]--;
+ dMatrix[excludeColumn][excludeColumn]--;
+
+ excluded[excludeRow][excludeColumn] = 1;
+ excluded[excludeColumn][excludeRow] = 1;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "excludeValues");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
+
+void CalcSparcc::getMedian(vector<vector<vector<float> > > allCorrelations){
+ try {
+ int numSamples = (int)allCorrelations.size();
+ median.resize(numOTUs);
+ for(int i=0;i<numOTUs;i++){ median[i].assign(numOTUs, 1); }
+
+ vector<float> hold(numSamples);
+
+ for(int i=0;i<numOTUs;i++){
+ for(int j=0;j<i;j++){
+ if (m->control_pressed) { return; }
+
+ for(int k=0;k<numSamples;k++){
+ hold[k] = allCorrelations[k][i][j];
+ }
+
+ sort(hold.begin(), hold.end());
+ median[i][j] = hold[int(numSamples * 0.5)];
+ median[j][i] = median[i][j];
+ }
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "CalcSparcc", "getMedian");
+ exit(1);
+ }
+}
+
+/**************************************************************************************************/
--- /dev/null
+
+#ifndef PDSSparCC_runSparcc_h
+#define PDSSparCC_runSparcc_h
+
+//
+// runSparcc.h
+// PDSSparCC
+//
+// Created by Patrick Schloss on 10/31/12.
+// Copyright (c) 2012 University of Michigan. All rights reserved.
+//
+
+/**************************************************************************************************/
+
+//#include "sparcc.h"
+#include "randomnumber.h"
+#include "mothurout.h"
+
+/**************************************************************************************************/
+
+class CalcSparcc {
+
+public:
+ CalcSparcc(vector<vector<float> >, int, int, string);
+ vector<vector<float> > getRho() { return median; }
+private:
+ MothurOut* m;
+ void addPseudoCount(vector<vector<float> >&);
+ vector<float> getLogFractions(vector<vector<float> >, string);
+ void getT_Matrix(vector<float>);
+
+
+ void getT_Vector();
+ void getD_Matrix();
+ vector<float> getBasisVariances();
+ vector<vector<float> > getBasisCorrelations(vector<float>);
+ float getExcludedPairs(vector<vector<float> >, int&, int&);
+ void excludeValues(int, int);
+ void getMedian(vector<vector<vector<float> > >);
+
+ vector<float> tMatrix;
+
+ vector<vector<float> > dMatrix;
+ vector<float> tVector;
+ vector<vector<int> > excluded;
+ vector<vector<float> > median;
+
+ int numOTUs;
+ int numGroups;
+ string normalizationMethod;
+
+ RandomNumberGenerator RNG;
+};
+
+#endif
+
+/**************************************************************************************************/
if (itLabel->first == -1) { thisLabel = "unique"; }
else { thisLabel = toString(itLabel->first, length-1); }
- outList << thisLabel << '\t' << itLabel->second << '\t';
+ //outList << thisLabel << '\t' << itLabel->second << '\t';
RAbundVector* rabund = NULL;
+ ListVector completeList;
+ completeList.setLabel(thisLabel);
+
if (countfile == "") {
rabund = new RAbundVector();
rabund->setLabel(thisLabel);
//add in singletons
if (listSingle != NULL) {
for (int j = 0; j < listSingle->getNumBins(); j++) {
- outList << listSingle->get(j) << '\t';
+ //outList << listSingle->get(j) << '\t';
+ completeList.push_back(listSingle->get(j));
if (countfile == "") { rabund->push_back(m->getNumNames(listSingle->get(j))); }
}
}
if (list == NULL) { m->mothurOut("Error merging listvectors in file " + listNames[k]); m->mothurOutEndLine(); }
else {
for (int j = 0; j < list->getNumBins(); j++) {
- outList << list->get(j) << '\t';
+ //outList << list->get(j) << '\t';
+ completeList.push_back(list->get(j));
if (countfile == "") { rabund->push_back(m->getNumNames(list->get(j))); }
}
delete list;
sabund.print(outSabund);
rabund->print(outRabund);
}
- outList << endl;
+ //outList << endl;
+ completeList.print(outList);
if (rabund != NULL) { delete rabund; }
}
#include "removedistscommand.h"
#include "mergetaxsummarycommand.h"
#include "getmetacommunitycommand.h"
+#include "sparcccommand.h"
+#include "makelookupcommand.h"
/*******************************************************/
commands["remove.dists"] = "remove.dists";
commands["merge.taxsummary"] = "merge.taxsummary";
commands["get.metacommunity"] = "get.metacommunity";
+ commands["sparcc"] = "sparcc";
+ commands["make.lookup"] = "make.lookup";
}
else if(commandName == "remove.dists") { command = new RemoveDistsCommand(optionString); }
else if(commandName == "merge.taxsummary") { command = new MergeTaxSummaryCommand(optionString); }
else if(commandName == "get.metacommunity") { command = new GetMetaCommunityCommand(optionString); }
+ else if(commandName == "sparcc") { command = new SparccCommand(optionString); }
+ else if(commandName == "make.lookup") { command = new MakeLookupCommand(optionString); }
else { command = new NoCommand(optionString); }
return command;
else if(commandName == "remove.dists") { pipecommand = new RemoveDistsCommand(optionString); }
else if(commandName == "merge.taxsummary") { pipecommand = new MergeTaxSummaryCommand(optionString); }
else if(commandName == "get.metacommunity") { pipecommand = new GetMetaCommunityCommand(optionString); }
+ else if(commandName == "sparcc") { pipecommand = new SparccCommand(optionString); }
+ else if(commandName == "make.lookup") { pipecommand = new MakeLookupCommand(optionString); }
else { pipecommand = new NoCommand(optionString); }
return pipecommand;
else if(commandName == "remove.dists") { shellcommand = new RemoveDistsCommand(); }
else if(commandName == "merge.taxsummary") { shellcommand = new MergeTaxSummaryCommand(); }
else if(commandName == "get.metacommunity") { shellcommand = new GetMetaCommunityCommand(); }
+ else if(commandName == "sparcc") { shellcommand = new SparccCommand(); }
+ else if(commandName == "make.lookup") { shellcommand = new MakeLookupCommand(); }
else { shellcommand = new NoCommand(); }
return shellcommand;
vector<string> CreateDatabaseCommand::setParameters(){
try {
CommandParameter pfasta("repfasta", "InputTypes", "", "", "none", "none", "none","database",false,true,true); parameters.push_back(pfasta);
- CommandParameter pname("repname", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pname);
+ CommandParameter pname("repname", "InputTypes", "", "", "NameCount", "NameCount", "none","",false,false,true); parameters.push_back(pname);
+ CommandParameter pcount("count", "InputTypes", "", "", "NameCount-CountGroup", "NameCount", "none","",false,false,true); parameters.push_back(pcount);
+ CommandParameter pgroup("group", "InputTypes", "", "", "CountGroup", "none", "none","",false,false,true); parameters.push_back(pgroup);
CommandParameter pcontaxonomy("contaxonomy", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(pcontaxonomy);
CommandParameter plist("list", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(plist);
CommandParameter pshared("shared", "InputTypes", "", "", "ListShared", "ListShared", "none","",false,false,true); parameters.push_back(pshared);
- CommandParameter pgroup("group", "InputTypes", "", "", "none", "none", "none","",false,false); parameters.push_back(pgroup);
CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
string CreateDatabaseCommand::getHelpString(){
try {
string helpString = "";
- helpString += "The create.database command reads a list file or a shared file, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, and creates a database file.\n";
- helpString += "The create.database command parameters are repfasta, list, shared, repname, contaxonomy, group and label. List, repfasta, repnames, and contaxonomy are required.\n";
+ helpString += "The create.database command reads a list file or a shared file, *.cons.taxonomy, *.rep.fasta, *.rep.names and optional groupfile, or count file and creates a database file.\n";
+ helpString += "The create.database command parameters are repfasta, list, shared, repname, contaxonomy, group, count and label. List, repfasta, repnames or count, and contaxonomy are required.\n";
helpString += "The repfasta file is fasta file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
helpString += "The repname file is the name file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, name=yourNameFile).\n";
- helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile).\n";
+ helpString += "The count file is the count file outputted by get.oturep(fasta=yourFastaFile, list=yourListfile, column=yourDistFile, count=yourCountFile). If it includes group info, mothur will give you the abundance breakdown by group. \n";
+ helpString += "The contaxonomy file is the taxonomy file outputted by classify.otu(list=yourListfile, taxonomy=yourTaxonomyFile, name=yourNameFile).\n";
helpString += "The group file is optional and will just give you the abundance breakdown by group.\n";
helpString += "The label parameter allows you to specify a label to be used from your listfile.\n";
helpString += "NOTE: Make SURE the repfasta, repnames and contaxonomy are for the same label as the listfile.\n";
helpString += "The create.database command should be in the following format: \n";
helpString += "create.database(repfasta=yourFastaFileFromGetOTURep, repname=yourNameFileFromGetOTURep, contaxonomy=yourConTaxFileFromClassifyOTU, list=yourListFile) \n";
- helpString += "Example: create.database(repfasta=final.an.0.03.rep.fasta, name=final.an.0.03.rep.names, list=fina.an.list, label=0.03, contaxonomy=final.an.0.03.cons.taxonomy) \n";
+ helpString += "Example: create.database(repfasta=final.an.0.03.rep.fasta, repname=final.an.0.03.rep.names, list=final.an.list, label=0.03, contaxonomy=final.an.0.03.cons.taxonomy) \n";
helpString += "Note: No spaces between parameter labels (i.e. repfasta), '=' and parameters (i.e.yourFastaFileFromGetOTURep).\n";
return helpString;
}
if (path == "") { parameters["group"] = inputDir + it->second; }
}
+ it = parameters.find("count");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["count"] = inputDir + it->second; }
+ }
+
it = parameters.find("shared");
//user has given a template file
if(it != parameters.end()){
else if (repfastafile == "not open") { repfastafile = ""; abort = true; }
repnamesfile = validParameter.validFile(parameters, "repname", true);
- if (repnamesfile == "not found") { //if there is a current list file, use it
- repnamesfile = ""; m->mothurOut("The repnames parameter is required, aborting."); m->mothurOutEndLine(); abort = true;
- }
+ if (repnamesfile == "not found") { repnamesfile = ""; }
else if (repnamesfile == "not open") { repnamesfile = ""; abort = true; }
+
+ countfile = validParameter.validFile(parameters, "count", true);
+ if (countfile == "not found") { countfile = ""; }
+ else if (countfile == "not open") { countfile = ""; abort = true; }
+
+ if ((countfile == "") && (repnamesfile == "")) {
+ //if there is a current name file, use it, else look for current count file
+ string repnamesfile = m->getNameFile();
+ if (repnamesfile != "") { m->mothurOut("Using " + repnamesfile + " as input file for the repname parameter."); m->mothurOutEndLine(); }
+ else {
+ countfile = m->getCountTableFile();
+ if (countfile != "") { m->mothurOut("Using " + countfile + " as input file for the count parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("[ERROR]: You must provide a count or repname file."); m->mothurOutEndLine(); abort = true; }
+ }
+ }
groupfile = validParameter.validFile(parameters, "group", true);
if (groupfile == "not open") { groupfile = ""; abort = true; }
//names redundants to uniques. backwards to how we normally do it, but each bin is the list file will be a key entry in the map.
map<string, string> repNames;
- int numUniqueNamesFile = m->readNames(repnamesfile, repNames, 1);
+ map<string, int> nameMap;
+ int numUniqueNamesFile = 0;
+ CountTable ct;
+ if (countfile == "") {
+ numUniqueNamesFile = m->readNames(repnamesfile, repNames, 1);
+ //the repnames file does not have the same order as the list file bins so we need to sort and reassemble for the search below
+ map<string, string> tempRepNames;
+ for (map<string, string>::iterator it = repNames.begin(); it != repNames.end();) {
+ string bin = it->first;
+ vector<string> temp;
+ m->splitAtChar(bin, temp, ',');
+ sort(temp.begin(), temp.end());
+ bin = "";
+ for (int i = 0; i < temp.size()-1; i++) {
+ bin += temp[i] + ',';
+ }
+ bin += temp[temp.size()-1];
+ tempRepNames[bin] = it->second;
+ repNames.erase(it++);
+ }
+ repNames = tempRepNames;
+ }else {
+ ct.readTable(countfile, true);
+ numUniqueNamesFile = ct.getNumUniqueSeqs();
+ nameMap = ct.getNameMap();
+ }
//are there the same number of otus in the fasta and name files
if (repOtusSizes.size() != numUniqueNamesFile) { m->mothurOut("[ERROR]: you have " + toString(numUniqueNamesFile) + " unique seqs in your repname file, but " + toString(repOtusSizes.size()) + " seqs in your repfasta file. These should match.\n"); m->control_pressed = true; }
if (groupfile != "") {
header = "OTUNumber\t";
for (int i = 0; i < groupmap->getNamesOfGroups().size(); i++) { header += (groupmap->getNamesOfGroups())[i] + '\t'; }
+ }else if (countfile != "") {
+ if (ct.hasGroupInfo()) {
+ header = "OTUNumber\t";
+ for (int i = 0; i < ct.getNamesOfGroups().size(); i++) { header += (ct.getNamesOfGroups())[i] + '\t'; }
+ }
}
header += "repSeqName\trepSeq\tOTUConTaxonomy";
out << header << endl;
vector<string> binNames;
string bin = list->get(i);
-
- map<string, string>::iterator it = repNames.find(bin);
- if (it == repNames.end()) {
- m->mothurOut("[ERROR: OTU " + otuLabels[i] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
- }
-
m->splitAtComma(bin, binNames);
- //sanity check
- if (binNames.size() != classifyOtuSizes[i]) {
- m->mothurOut("[ERROR: OTU " + otuLabels[i] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
+ string seqRepName = "";
+ int numSeqsRep = 0;
+
+ if (countfile == "") {
+ sort(binNames.begin(), binNames.end());
+ bin = "";
+ for (int i = 0; i < binNames.size()-1; i++) {
+ bin += binNames[i] + ',';
+ }
+ bin += binNames[binNames.size()-1];
+ map<string, string>::iterator it = repNames.find(bin);
+
+ if (it == repNames.end()) {
+ m->mothurOut("[ERROR: OTU " + otuLabels[i] + " is not in the repnames file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
+ }else { seqRepName = it->second; numSeqsRep = binNames.size(); }
+
+ //sanity check
+ if (binNames.size() != classifyOtuSizes[i]) {
+ m->mothurOut("[ERROR: OTU " + otuLabels[i] + " contains " + toString(binNames.size()) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
+ }
+ }else {
+ //find rep sequence in bin
+ for (int j = 0; j < binNames.size(); j++) {
+ map<string, int>::iterator itNameMap = nameMap.find(binNames[j]); //if you are in the counttable you must be the rep. because get.oturep with a countfile only includes the rep sequences in the rep.count file.
+ if (itNameMap != nameMap.end()) {
+ seqRepName = itNameMap->first;
+ numSeqsRep = itNameMap->second;
+ j += binNames.size();
+ }
+ }
+
+ if (seqRepName == "") {
+ m->mothurOut("[ERROR: OTU " + otuLabels[i] + " is not in the count file. Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
+ }
+
+ if (numSeqsRep != classifyOtuSizes[i]) {
+ m->mothurOut("[ERROR: OTU " + otuLabels[i] + " contains " + toString(numSeqsRep) + " sequence, but the rep and taxonomy files indicated this OTU should have " + toString(classifyOtuSizes[i]) + ". Make sure you are using files for the same distance.\n"); m->control_pressed = true; break;
+ }
}
//output abundances
for (int j = 0; j < groupmap->getNamesOfGroups().size(); j++) { out << counts[(groupmap->getNamesOfGroups())[j]] << '\t'; }
if (error) { m->control_pressed = true; }
- }else { out << binNames.size() << '\t'; }
+ }else if (countfile != "") {
+ if (ct.hasGroupInfo()) {
+ vector<int> groupCounts = ct.getGroupCounts(seqRepName);
+ for (int j = 0; j < groupCounts.size(); j++) { out << groupCounts[j] << '\t'; }
+ }else { out << numSeqsRep << '\t'; }
+ }else { out << numSeqsRep << '\t'; }
//output repSeq
- out << it->second << '\t' << seqs[i].getAligned() << '\t' << taxonomies[i] << endl;
+ out << seqRepName << '\t' << seqs[i].getAligned() << '\t' << taxonomies[i] << endl;
}
ifstream in;
m->openInputFile(repfastafile, in);
+ set<int> sanity;
while (!in.eof()) {
if (m->control_pressed) { break; }
//the binInfo should look like - binNumber|size ie. 1|200 if it is binNumber|size|group then the user gave us the wrong repfasta file
vector<string> info;
m->splitAtChar(binInfo, info, '|');
- if (info.size() != 2) { m->mothurOut("[ERROR]: your repfasta file is not the right format. The create database command is designed to be used with the output from get.oturep. When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n"); m->control_pressed = true; break;}
+ //if (info.size() != 2) { m->mothurOut("[ERROR]: your repfasta file is not the right format. The create database command is designed to be used with the output from get.oturep. When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n"); m->control_pressed = true; break;}
int size = 0;
m->mothurConvert(info[1], size);
+ int binNumber = 0;
+ string temp = "";
+ for (int i = 0; i < info[0].size(); i++) { if (isspace(info[0][i])) {;}else{temp +=info[0][i]; } }
+ m->mothurConvert(temp, binNumber);
+ set<int>::iterator it = sanity.find(binNumber);
+ if (it != sanity.end()) {
+ m->mothurOut("[ERROR]: your repfasta file is not the right format. The create database command is designed to be used with the output from get.oturep. When running get.oturep you can not use a group file, because mothur is only expecting one representative sequence per OTU and when you use a group file with get.oturep a representative is found for each group.\n"); m->control_pressed = true; break;
+ }else { sanity.insert(binNumber); }
+
sizes.push_back(size);
seqs.push_back(seq);
}
private:
bool abort;
- string sharedfile, listfile, groupfile, repfastafile, repnamesfile, contaxonomyfile, label, outputDir;
+ string sharedfile, listfile, groupfile, repfastafile, repnamesfile, contaxonomyfile, label, outputDir, countfile;
vector<string> outputNames;
flowFile >> name;
if (name.length() != 0) {
- for (int i = 0; i < name.length(); i++) {
- if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
- }
+ m->checkName(name);
}else{ m->mothurOut("Error in reading your flowfile, at position " + toString(flowFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true; }
return name;
if ((sharedfile != "") || (listfile != "")) {
label = validParameter.validFile(parameters, "label", false);
- if (label == "not found") { label = ""; m->mothurOut("You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); label=""; }
+ if (label == "not found") { label = ""; m->mothurOut("[WARNING]: You did not provide a label, I will use the first label in your inputfile."); m->mothurOutEndLine(); }
}
if (countfile == "") {
indicatorValues = getValues(groupings, indicatorGroups, randomGroupingsMap);
- pValues = getPValues(groupings, randomGroupingsMap, lookup.size(), indicatorValues);
+ pValues = getPValues(groupings, lookup.size(), indicatorValues);
}else {
vector< vector<SharedRAbundFloatVector*> > groupings;
set<string> groupsAlreadyAdded;
indicatorValues = getValues(groupings, indicatorGroups, randomGroupingsMap);
- pValues = getPValues(groupings, randomGroupingsMap, lookupFloat.size(), indicatorValues);
+ pValues = getPValues(groupings, lookupFloat.size(), indicatorValues);
}
if (m->control_pressed) { out.close(); return 0; }
indicatorValues = getValues(groupings, indicatorGroups, randomGroupingsMap);
- pValues = getPValues(groupings, randomGroupingsMap, lookup.size(), indicatorValues);
+ pValues = getPValues(groupings, lookup.size(), indicatorValues);
}else {
vector< vector<SharedRAbundFloatVector*> > groupings;
indicatorValues = getValues(groupings, indicatorGroups, randomGroupingsMap);
- pValues = getPValues(groupings, randomGroupingsMap, lookupFloat.size(), indicatorValues);
+ pValues = getPValues(groupings, lookupFloat.size(), indicatorValues);
}
if (m->control_pressed) { out.close(); return 0; }
}
}
//**********************************************************************************************************************
-vector<float> IndicatorCommand::driver(vector< vector<SharedRAbundFloatVector*> >& groupings, map< vector<int>, vector<int> > groupingsMap, int num, vector<float> indicatorValues, int numIters){
+vector<float> IndicatorCommand::driver(vector< vector<SharedRAbundFloatVector*> >& groupings, int num, vector<float> indicatorValues, int numIters){
try {
vector<float> pvalues;
pvalues.resize(indicatorValues.size(), 0);
for(int i=0;i<numIters;i++){
if (m->control_pressed) { break; }
- groupingsMap = randomizeGroupings(groupings, num);
+ map< vector<int>, vector<int> > groupingsMap = randomizeGroupings(groupings, num);
vector<float> randomIndicatorValues = getValues(groupings, notUsedGroupings, groupingsMap);
for (int j = 0; j < indicatorValues.size(); j++) {
}
}
//**********************************************************************************************************************
-vector<float> IndicatorCommand::getPValues(vector< vector<SharedRAbundFloatVector*> >& groupings, map< vector<int>, vector<int> > groupingsMap, int num, vector<float> indicatorValues){
+vector<float> IndicatorCommand::getPValues(vector< vector<SharedRAbundFloatVector*> >& groupings, int num, vector<float> indicatorValues){
try {
vector<float> pvalues;
-
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+
if(processors == 1){
- pvalues = driver(groupings, groupingsMap, num, indicatorValues, iters);
- for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
+ pvalues = driver(groupings, num, indicatorValues, iters);
+ for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
}else{
+ //divide iters between processors
+ vector<int> procIters;
+ int numItersPerProcessor = iters / processors;
+
+ //divide iters between processes
+ for (int h = 0; h < processors; h++) {
+ if(h == processors - 1){ numItersPerProcessor = iters - h * numItersPerProcessor; }
+ procIters.push_back(numItersPerProcessor);
+ }
+
+ vector<int> processIDS;
+ int process = 1;
- //divide iters between processors
- int numItersPerProcessor = iters / processors;
-
- vector<int> processIDS;
- int process = 1;
- int num = 0;
-
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+
//loop through and create all the processes you want
while (process != processors) {
int pid = fork();
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- pvalues = driver(groupings, groupingsMap, num, indicatorValues, numItersPerProcessor);
+ pvalues = driver(groupings, num, indicatorValues, procIters[process]);
//pass pvalues to parent
ofstream out;
}
//do my part
- //special case for last processor in case it doesn't divide evenly
- numItersPerProcessor = iters - ((processors-1) * numItersPerProcessor);
- pvalues = driver(groupings, groupingsMap, num, indicatorValues, numItersPerProcessor);
+ pvalues = driver(groupings, num, indicatorValues, procIters[0]);
//force parent to wait until all the processes are done
for (int i=0;i<processIDS.size();i++) {
}
in.close(); m->mothurRemove(tempFile);
}
- for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
- }
+ for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
#else
- pvalues = driver(groupings, groupingsMap, num, indicatorValues, iters);
- for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
+
+ //fill in functions
+ vector<indicatorData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+
+ //Create processor worker threads.
+ for( int i=1; i<processors; i++ ){
+
+ //make copy of lookup so we don't get access violations
+ vector< vector<SharedRAbundFloatVector*> > newGroupings;
+
+ for (int k = 0; k < groupings.size(); k++) {
+ vector<SharedRAbundFloatVector*> newLookup;
+ for (int l = 0; l < groupings[k].size(); l++) {
+ SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
+ temp->setLabel(groupings[k][l]->getLabel());
+ temp->setGroup(groupings[k][l]->getGroup());
+ newLookup.push_back(temp);
+ }
+ newGroupings.push_back(newLookup);
+ }
+
+ //for each bin
+ for (int l = 0; l < groupings.size(); l++) {
+ for (int k = 0; k < groupings[l][0]->getNumBins(); k++) {
+ if (m->control_pressed) { for (int j = 0; j < newGroupings.size(); j++) { for (int u = 0; u < newGroupings[j].size(); u++) { delete newGroupings[j][u]; } } return pvalues; }
+
+ for (int j = 0; j < groupings[l].size(); j++) { newGroupings[l][j]->push_back(groupings[l][j]->getAbundance(k), groupings[l][j]->getGroup()); }
+ }
+ }
+
+ vector<float> copyIValues = indicatorValues;
+
+ indicatorData* temp = new indicatorData(m, procIters[i], newGroupings, num, copyIValues);
+ pDataArray.push_back(temp);
+ processIDS.push_back(i);
+
+ hThreadArray[i-1] = CreateThread(NULL, 0, MyIndicatorThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+ }
+
+ //do my part
+ pvalues = driver(groupings, num, indicatorValues, procIters[0]);
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ for (int j = 0; j < pDataArray[i]->pvalues.size(); j++) { pvalues[j] += pDataArray[i]->pvalues[j]; }
+
+ for (int l = 0; l < pDataArray[i]->groupings.size(); l++) {
+ for (int j = 0; j < pDataArray[i]->groupings[l].size(); j++) { delete pDataArray[i]->groupings[l][j]; }
+ }
+
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
+
+ for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
#endif
+ }
+
return pvalues;
}
//**********************************************************************************************************************
//same as above, just data type difference
-vector<float> IndicatorCommand::driver(vector< vector<SharedRAbundVector*> >& groupings, map< vector<int>, vector<int> > groupingsMap, int num, vector<float> indicatorValues, int numIters){
+vector<float> IndicatorCommand::driver(vector< vector<SharedRAbundVector*> >& groupings, int num, vector<float> indicatorValues, int numIters){
try {
vector<float> pvalues;
pvalues.resize(indicatorValues.size(), 0);
for(int i=0;i<numIters;i++){
if (m->control_pressed) { break; }
- groupingsMap = randomizeGroupings(groupings, num);
+ map< vector<int>, vector<int> > groupingsMap = randomizeGroupings(groupings, num);
vector<float> randomIndicatorValues = getValues(groupings, notUsedGroupings, groupingsMap);
for (int j = 0; j < indicatorValues.size(); j++) {
}
//**********************************************************************************************************************
//same as above, just data type difference
-vector<float> IndicatorCommand::getPValues(vector< vector<SharedRAbundVector*> >& groupings, map< vector<int>, vector<int> > groupingsMap, int num, vector<float> indicatorValues){
+vector<float> IndicatorCommand::getPValues(vector< vector<SharedRAbundVector*> >& groupings, int num, vector<float> indicatorValues){
try {
vector<float> pvalues;
-
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+
if(processors == 1){
- pvalues = driver(groupings, groupingsMap, num, indicatorValues, iters);
- for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
+ pvalues = driver(groupings, num, indicatorValues, iters);
+ for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
}else{
+ //divide iters between processors
+ vector<int> procIters;
+ int numItersPerProcessor = iters / processors;
+
+ //divide iters between processes
+ for (int h = 0; h < processors; h++) {
+ if(h == processors - 1){ numItersPerProcessor = iters - h * numItersPerProcessor; }
+ procIters.push_back(numItersPerProcessor);
+ }
+
+ vector<int> processIDS;
+ int process = 1;
- //divide iters between processors
- int numItersPerProcessor = iters / processors;
-
- vector<int> processIDS;
- int process = 1;
-
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+
//loop through and create all the processes you want
while (process != processors) {
int pid = fork();
processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
process++;
}else if (pid == 0){
- pvalues = driver(groupings, groupingsMap, num, indicatorValues, numItersPerProcessor);
+ pvalues = driver(groupings, num, indicatorValues, procIters[process]);
//pass pvalues to parent
ofstream out;
out.close();
exit(0);
- }else {
- m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+ }else {
+ m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
exit(0);
}
}
//do my part
- //special case for last processor in case it doesn't divide evenly
- numItersPerProcessor = iters - ((processors-1) * numItersPerProcessor);
- pvalues = driver(groupings, groupingsMap, num, indicatorValues, numItersPerProcessor);
+ pvalues = driver(groupings, num, indicatorValues, procIters[0]);
//force parent to wait until all the processes are done
- for (int i=0;i<processIDS.size();i++) {
+ for (int i=0;i<processIDS.size();i++) {
int temp = processIDS[i];
wait(&temp);
}
- //combine results
+ //combine results
for (int i = 0; i < processIDS.size(); i++) {
ifstream in;
string tempFile = toString(processIDS[i]) + ".pvalues.temp";
m->openInputFile(tempFile, in);
////// to do ///////////
- int numTemp; numTemp = 0;
+ int numTemp; numTemp = 0;
for (int j = 0; j < pvalues.size(); j++) {
in >> numTemp; m->gobble(in);
pvalues[j] += numTemp;
}
in.close(); m->mothurRemove(tempFile);
}
- for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
- }
+ for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
#else
- pvalues = driver(groupings, groupingsMap, num, indicatorValues, iters);
- for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
+
+ //fill in functions
+ vector<indicatorData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+
+ //Create processor worker threads.
+ for( int i=1; i<processors; i++ ){
+
+ //make copy of lookup so we don't get access violations
+ vector< vector<SharedRAbundFloatVector*> > newGroupings;
+
+ for (int k = 0; k < groupings.size(); k++) {
+ vector<SharedRAbundFloatVector*> newLookup;
+ for (int l = 0; l < groupings[k].size(); l++) {
+ SharedRAbundFloatVector* temp = new SharedRAbundFloatVector();
+ temp->setLabel(groupings[k][l]->getLabel());
+ temp->setGroup(groupings[k][l]->getGroup());
+ newLookup.push_back(temp);
+ }
+ newGroupings.push_back(newLookup);
+ }
+
+ //for each bin
+ for (int l = 0; l < groupings.size(); l++) {
+ for (int k = 0; k < groupings[l][0]->getNumBins(); k++) {
+ if (m->control_pressed) { for (int j = 0; j < newGroupings.size(); j++) { for (int u = 0; u < newGroupings[j].size(); u++) { delete newGroupings[j][u]; } } return pvalues; }
+
+ for (int j = 0; j < groupings[l].size(); j++) { newGroupings[l][j]->push_back((float)(groupings[l][j]->getAbundance(k)), groupings[l][j]->getGroup()); }
+ }
+ }
+
+ vector<float> copyIValues = indicatorValues;
+
+ indicatorData* temp = new indicatorData(m, procIters[i], newGroupings, num, copyIValues);
+ pDataArray.push_back(temp);
+ processIDS.push_back(i);
+
+ hThreadArray[i-1] = CreateThread(NULL, 0, MyIndicatorThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+ }
+
+ //do my part
+ pvalues = driver(groupings, num, indicatorValues, procIters[0]);
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ for (int j = 0; j < pDataArray[i]->pvalues.size(); j++) { pvalues[j] += pDataArray[i]->pvalues[j]; }
+
+ for (int l = 0; l < pDataArray[i]->groupings.size(); l++) {
+ for (int j = 0; j < pDataArray[i]->groupings[l].size(); j++) { delete pDataArray[i]->groupings[l][j]; }
+ }
+
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
+
+ for (int i = 0; i < pvalues.size(); i++) { pvalues[i] /= (double)iters; }
#endif
+ }
+
return pvalues;
}
catch(exception& e) {
- m->errorOut(e, "IndicatorCommand", "getPValues");
+ m->errorOut(e, "IndicatorCommand", "getPValues");
exit(1);
}
}
map< vector<int>, vector<int> > randomizeGroupings(vector< vector<SharedRAbundVector*> >&, int);
map< vector<int>, vector<int> > randomizeGroupings(vector< vector<SharedRAbundFloatVector*> >&, int);
- vector<float> driver(vector< vector<SharedRAbundFloatVector*> >&, map< vector<int>, vector<int> >, int, vector<float>, int);
- vector<float> driver(vector< vector<SharedRAbundVector*> >&, map< vector<int>, vector<int> >, int, vector<float>, int);
+ vector<float> driver(vector< vector<SharedRAbundFloatVector*> >&, int, vector<float>, int);
+ vector<float> driver(vector< vector<SharedRAbundVector*> >&, int, vector<float>, int);
- vector<float> getPValues(vector< vector<SharedRAbundFloatVector*> >&, map< vector<int>, vector<int> >, int, vector<float>);
- vector<float> getPValues(vector< vector<SharedRAbundVector*> >&, map< vector<int>, vector<int> >, int, vector<float>);
+ vector<float> getPValues(vector< vector<SharedRAbundFloatVector*> >&, int, vector<float>);
+ vector<float> getPValues(vector< vector<SharedRAbundVector*> >&, int, vector<float>);
};
+/**************************************************************************************************/
+
+struct indicatorData {
+ vector< vector<SharedRAbundFloatVector*> > groupings;
+ MothurOut* m;
+ int iters, num;
+ vector<float> indicatorValues;
+ vector<float> pvalues;
+
+ indicatorData(){}
+ indicatorData(MothurOut* mout, int it, vector< vector<SharedRAbundFloatVector*> > ng, int n, vector<float> iv) {
+ m = mout;
+ iters = it;
+ groupings = ng;
+ indicatorValues = iv;
+ num = n;
+ }
+};
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyIndicatorThreadFunction(LPVOID lpParam){
+ indicatorData* pDataArray;
+ pDataArray = (indicatorData*)lpParam;
+
+ try {
+
+ pDataArray->pvalues.resize(pDataArray->indicatorValues.size(), 0);
+
+ for(int i=0;i<pDataArray->iters;i++){
+ if (pDataArray->m->control_pressed) { break; }
+
+ //groupingsMap = randomizeGroupings(groupings, num);
+ ///////////////////////////////////////////////////////////////////////
+ map< vector<int>, vector<int> > randomGroupings;
+
+ for (int j = 0; j < pDataArray->num; j++) {
+
+ //get random groups to swap to switch with
+ //generate random int between 0 and groupings.size()-1
+ int z = pDataArray->m->getRandomIndex(pDataArray->groupings.size()-1);
+ int x = pDataArray->m->getRandomIndex(pDataArray->groupings.size()-1);
+ int a = pDataArray->m->getRandomIndex(pDataArray->groupings[z].size()-1);
+ int b = pDataArray->m->getRandomIndex(pDataArray->groupings[x].size()-1);
+ //cout << i << '\t' << z << '\t' << x << '\t' << a << '\t' << b << endl;
+
+ vector<int> from;
+ vector<int> to;
+
+ from.push_back(z); from.push_back(a);
+ to.push_back(x); to.push_back(b);
+
+ randomGroupings[from] = to;
+ }
+ ///////////////////////////////////////////////////////////////////////
+
+ //vector<float> randomIndicatorValues = getValues(groupings, notUsedGroupings, randomGroupings);
+ ///////////////////////////////////////////////////////////////////////
+ vector<float> randomIndicatorValues;
+ map< vector<int>, vector<int> >::iterator it;
+
+ //for each otu
+ for (int i = 0; i < pDataArray->groupings[0][0]->getNumBins(); i++) {
+
+ if (pDataArray->m->control_pressed) { return 0; }
+
+ vector<float> terms;
+ float AijDenominator = 0.0;
+ vector<float> Bij;
+
+ //get overall abundance of each grouping
+ for (int j = 0; j < pDataArray->groupings.size(); j++) {
+
+ float totalAbund = 0;
+ int numNotZero = 0;
+ for (int k = 0; k < pDataArray->groupings[j].size(); k++) {
+ vector<int> temp; temp.push_back(j); temp.push_back(k);
+ it = randomGroupings.find(temp);
+
+ if (it == randomGroupings.end()) { //this one didnt get moved
+ totalAbund += pDataArray->groupings[j][k]->getAbundance(i);
+ if (pDataArray->groupings[j][k]->getAbundance(i) != 0.0) { numNotZero++; }
+ }else {
+ totalAbund += pDataArray->groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i);
+ if (pDataArray->groupings[(it->second)[0]][(it->second)[1]]->getAbundance(i) != 0.0) { numNotZero++; }
+ }
+
+ }
+
+ //mean abundance
+ float Aij = (totalAbund / (float) pDataArray->groupings[j].size());
+ terms.push_back(Aij);
+
+ //percentage of sites represented
+ Bij.push_back(numNotZero / (float) pDataArray->groupings[j].size());
+
+ AijDenominator += Aij;
+ }
+
+ float maxIndVal = 0.0;
+ for (int j = 0; j < terms.size(); j++) {
+ float thisAij = (terms[j] / AijDenominator); //relative abundance
+ float thisValue = thisAij * Bij[j] * 100.0;
+
+ //save largest
+ if (thisValue > maxIndVal) { maxIndVal = thisValue; }
+ }
+
+ randomIndicatorValues.push_back(maxIndVal);
+ }
+
+ ///////////////////////////////////////////////////////////////////////
+
+ for (int j = 0; j < pDataArray->indicatorValues.size(); j++) {
+ if (randomIndicatorValues[j] >= pDataArray->indicatorValues[j]) { pDataArray->pvalues[j]++; }
+ }
+ }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ pDataArray->m->errorOut(e, "IndicatorCommand", "MyIndicatorThreadFunction");
+ exit(1);
+ }
+}
+#endif
+
+
#endif
exit(1);
}
}
+/*********************************************************************************************************************************/
+vector<float> LinearAlgebra::solveEquations(vector<vector<float> > A, vector<float> b){
+ try {
+ int length = (int)b.size();
+ vector<double> x(length, 0);
+ vector<int> index(length);
+ for(int i=0;i<length;i++){ index[i] = i; }
+ float d;
+
+ ludcmp(A, index, d); if (m->control_pressed) { return b; }
+ lubksb(A, index, b);
+
+ return b;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LinearAlgebra", "solveEquations");
+ exit(1);
+ }
+}
/*********************************************************************************************************************************/
exit(1);
}
}
+/*********************************************************************************************************************************/
+
+void LinearAlgebra::ludcmp(vector<vector<float> >& A, vector<int>& index, float& d){
+ try {
+ double tiny = 1e-20;
+
+ int n = (int)A.size();
+ vector<float> vv(n, 0.0);
+ double temp;
+ int imax;
+
+ d = 1.0;
+
+ for(int i=0;i<n;i++){
+ float big = 0.0;
+ for(int j=0;j<n;j++){ if((temp=fabs(A[i][j])) > big ) big=temp; }
+ if(big==0.0){ m->mothurOut("Singular matrix in routine ludcmp\n"); }
+ vv[i] = 1.0/big;
+ }
+
+ for(int j=0;j<n;j++){
+ if (m->control_pressed) { break; }
+ for(int i=0;i<j;i++){
+ float sum = A[i][j];
+ for(int k=0;k<i;k++){ sum -= A[i][k] * A[k][j]; }
+ A[i][j] = sum;
+ }
+
+ float big = 0.0;
+ for(int i=j;i<n;i++){
+ float sum = A[i][j];
+ for(int k=0;k<j;k++){ sum -= A[i][k] * A[k][j]; }
+ A[i][j] = sum;
+ float dum;
+ if((dum = vv[i] * fabs(sum)) >= big){
+ big = dum;
+ imax = i;
+ }
+ }
+ if(j != imax){
+ for(int k=0;k<n;k++){
+ float dum = A[imax][k];
+ A[imax][k] = A[j][k];
+ A[j][k] = dum;
+ }
+ d = -d;
+ vv[imax] = vv[j];
+ }
+ index[j] = imax;
+
+ if(A[j][j] == 0.0){ A[j][j] = tiny; }
+
+ if(j != n-1){
+ float dum = 1.0/A[j][j];
+ for(int i=j+1;i<n;i++){ A[i][j] *= dum; }
+ }
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LinearAlgebra", "ludcmp");
+ exit(1);
+ }
+}
+
+/*********************************************************************************************************************************/
+
+void LinearAlgebra::lubksb(vector<vector<float> >& A, vector<int>& index, vector<float>& b){
+ try {
+ float total;
+ int n = (int)A.size();
+ int ii = 0;
+
+ for(int i=0;i<n;i++){
+ if (m->control_pressed) { break; }
+ int ip = index[i];
+ total = b[ip];
+ b[ip] = b[i];
+
+ if (ii != 0) {
+ for(int j=ii-1;j<i;j++){
+ total -= A[i][j] * b[j];
+ }
+ }
+ else if(total != 0){ ii = i+1; }
+ b[i] = total;
+ }
+ for(int i=n-1;i>=0;i--){
+ total = b[i];
+ for(int j=i+1;j<n;j++){ total -= A[i][j] * b[j]; }
+ b[i] = total / A[i][i];
+ }
+ }
+ catch(exception& e) {
+ m->errorOut(e, "LinearAlgebra", "lubksb");
+ exit(1);
+ }
+}
+
/*********************************************************************************************************************************/
double calcKendallSig(double, double); //length, coeff.
vector<double> solveEquations(vector<vector<double> >, vector<double>);
+ vector<float> solveEquations(vector<vector<float> >, vector<float>);
vector<vector<double> > getInverse(vector<vector<double> >);
void ludcmp(vector<vector<double> >&, vector<int>&, double&);
void lubksb(vector<vector<double> >&, vector<int>&, vector<double>&);
+
+ void ludcmp(vector<vector<float> >&, vector<int>&, float&);
+ void lubksb(vector<vector<float> >&, vector<int>&, vector<float>&);
};
CYGWIN_BUILD ?= no
USECOMPRESSION ?= no
MOTHUR_FILES="\"Enter_your_default_path_here\""
-RELEASE_DATE = "\"2/12/2013\""
-VERSION = "\"1.29.2\""
+RELEASE_DATE = "\"5/24/2013\""
+VERSION = "\"1.31.0\""
FORTAN_COMPILER = gfortran
FORTRAN_FLAGS =
--- /dev/null
+//
+// makelookupcommand.cpp
+// Mothur
+//
+// Created by SarahsWork on 5/14/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "makelookupcommand.h"
+
+//**********************************************************************************************************************
+vector<string> MakeLookupCommand::setParameters(){
+ try {
+ CommandParameter ptemplate("reference", "InputTypes", "", "", "none", "none", "none","",false,true,true); parameters.push_back(ptemplate);
+ CommandParameter pflow("flow", "InputTypes", "", "", "none", "none", "none","lookup",false,true,true); parameters.push_back(pflow);
+ CommandParameter perrors("error", "InputTypes", "", "", "none", "none", "none","none",false,true,true); parameters.push_back(perrors);
+ CommandParameter pbarcode("barcode", "String", "", "AACCGTGTC", "", "", "","",false,false); parameters.push_back(pbarcode);
+ CommandParameter pkey("key", "String", "", "TCAG", "", "", "","",false,false); parameters.push_back(pkey);
+ CommandParameter pthreshold("threshold", "Number", "", "10000", "", "", "","",false,false); parameters.push_back(pthreshold);
+ CommandParameter porder("order", "Multiple", "A-B-I", "A", "", "", "","",false,false, true); parameters.push_back(porder);
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLookupCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string MakeLookupCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The make.lookup command allows you to create custom lookup files for use with shhh.flows.\n";
+ helpString += "The make.lookup command parameters are: reference, flow, error, barcode, key, threshold and order.\n";
+ helpString += "The reference file needs to be in the same direction as the flow data and it must start with the forward primer sequence. It is required.\n";
+ helpString += "The flow parameter is used to provide the flow data. It is required.\n";
+ helpString += "The error parameter is used to provide the error summary. It is required.\n";
+ helpString += "The barcode parameter is used to provide the barcode sequence. Default=AACCGTGTC.\n";
+ helpString += "The key parameter is used to provide the key sequence. Default=TCAG.\n";
+ helpString += "The threshold parameter is ....Default=10000.\n";
+ helpString += "The order parameter options are A, B or I. Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n";
+ helpString += "The make.lookup should be in the following format: make.lookup(reference=HMP_MOCK.v53.fasta, flow=H3YD4Z101.mock3.flow_450.flow, error=H3YD4Z101.mock3.flow_450.error.summary, barcode=AACCTGGC)\n";
+ helpString += "new(...)\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLookupCommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string MakeLookupCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
+
+ if (type == "lookup") { pattern = "[filename],lookup"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLookupCommand", "getOutputPattern");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+MakeLookupCommand::MakeLookupCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["lookup"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLookupCommand", "MakeLookupCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+MakeLookupCommand::MakeLookupCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+
+ else {
+ //valid paramters for this command
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+ map<string,string>::iterator it;
+ //check to make sure all parameters are valid for command
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ vector<string> tempOutNames;
+ outputTypes["lookup"] = tempOutNames;
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+ string path;
+ it = parameters.find("flow");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["flow"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("error");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["error"] = inputDir + it->second; }
+ }
+
+ it = parameters.find("reference");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["reference"] = inputDir + it->second; }
+ }
+
+ }
+
+ //check for parameters
+ errorFileName = validParameter.validFile(parameters, "error", true);
+ if (errorFileName == "not open") { errorFileName = ""; abort = true; }
+ else if (errorFileName == "not found") { errorFileName = ""; m->mothurOut("[ERROR]: error parameter is required."); m->mothurOutEndLine(); abort = true; }
+
+ flowFileName = validParameter.validFile(parameters, "flow", true);
+ if (flowFileName == "not open") { flowFileName = ""; abort = true; }
+ else if (flowFileName == "not found") { flowFileName = ""; m->mothurOut("[ERROR]: flow parameter is required."); m->mothurOutEndLine(); abort = true; }
+ else { m->setFlowFile(flowFileName); }
+
+ refFastaFileName = validParameter.validFile(parameters, "reference", true);
+ if (refFastaFileName == "not open") { abort = true; }
+ else if (refFastaFileName == "not found") { refFastaFileName = ""; m->mothurOut("[ERROR]: reference parameter is required."); m->mothurOutEndLine(); abort = true; }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
+ outputDir = m->hasPath(flowFileName); //if user entered a file with a path then preserve it
+ }
+
+ string temp = validParameter.validFile(parameters, "threshold", false); if (temp == "not found"){ temp = "10000"; }
+ m->mothurConvert(temp, thresholdCount);
+
+ barcodeSequence = validParameter.validFile(parameters, "barcode", false); if (barcodeSequence == "not found"){ barcodeSequence = "AACCGTGTC"; }
+
+ keySequence = validParameter.validFile(parameters, "key", false); if (keySequence == "not found"){ keySequence = "TCAG"; }
+
+ temp = validParameter.validFile(parameters, "order", false); if (temp == "not found"){ temp = "A"; }
+ if (temp.length() > 1) { m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true;
+ }
+ else {
+ if (toupper(temp[0]) == 'A') { flowOrder = "TACG"; }
+ else if(toupper(temp[0]) == 'B'){
+ flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC"; }
+ else if(toupper(temp[0]) == 'I'){
+ flowOrder = "TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC"; }
+ else {
+ m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true;
+ }
+ }
+
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLookupCommand", "MakeLookupCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+int MakeLookupCommand::execute(){
+ try {
+
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ cout.setf(ios::fixed, ios::floatfield);
+ cout.setf(ios::showpoint);
+
+ double gapOpening = 10;
+ int maxHomoP = 101;
+ vector<vector<double> > penaltyMatrix; penaltyMatrix.resize(maxHomoP);
+ for(int i=0;i<maxHomoP;i++){
+ penaltyMatrix[i].resize(maxHomoP, 5);
+ penaltyMatrix[i][i] = 0;
+ }
+
+ //Create flows for reference sequences...
+ ifstream refFASTA;
+ m->openInputFile(refFastaFileName, refFASTA); // * open reference sequence file
+ map<string, vector<double> > refFlowgrams;
+
+ while(!refFASTA.eof()){
+ if (m->control_pressed) { refFASTA.close(); return 0; }
+
+ Sequence seq(refFASTA); m->gobble(refFASTA);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: seq = " + seq.getName() + ".\n"); }
+
+ string fullSequence = keySequence + barcodeSequence + seq.getAligned(); // * concatenate the keySequence, barcodeSequence, and
+ // referenceSequences
+ refFlowgrams[seq.getName()] = convertSeqToFlow(fullSequence, flowOrder); // * translate concatenated sequences into flowgram
+ }
+ refFASTA.close();
+
+ vector<vector<double> > lookupTable; lookupTable.resize(1000);
+ for(int i=0;i<1000;i++){
+ lookupTable[i].resize(11, 0);
+ }
+
+ if (m->debug) { m->mothurOut("[DEBUG]: here .\n"); }
+ //Loop through each sequence in the flow file and the error summary file.
+ ifstream flowFile;
+ m->openInputFile(flowFileName, flowFile);
+ int numFlows;
+ flowFile >> numFlows;
+
+ if (m->debug) { m->mothurOut("[DEBUG]: numflows = " + toString(numFlows) + ".\n"); }
+
+ ifstream errorFile;
+ m->openInputFile(errorFileName, errorFile);
+ m->getline(errorFile); //grab headers
+
+ string errorQuery, flowQuery, referenceName, dummy;
+ string chimera;
+ float intensity;
+
+ vector<double> std; std.resize(11, 0);
+
+ while(errorFile && flowFile){
+
+ if (m->control_pressed) { errorFile.close(); flowFile.close(); return 0; }
+
+ // * if it's chimeric, chuck it
+ errorFile >> errorQuery >> referenceName;
+ for(int i=2;i<40;i++){
+ errorFile >> dummy;
+ }
+ errorFile >> chimera;
+
+
+ if(chimera == "2"){
+ m->getline(flowFile);
+ }
+ else{
+
+ flowFile >> flowQuery >> dummy;
+ if(flowQuery != errorQuery){ cout << flowQuery << " != " << errorQuery << endl; }
+
+ map<string, vector<double> >::iterator it = refFlowgrams.find(referenceName); // * compare sequence to its closest reference
+ if (it == refFlowgrams.end()) {
+ m->mothurOut("[WARNING]: missing reference flow " + referenceName + ", ignoring flow " + flowQuery + ".\n");
+ m->getline(flowFile); m->gobble(flowFile);
+ }else {
+ vector<double> refFlow = it->second;
+ vector<double> flowgram; flowgram.resize(numFlows);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: flowQuery = " + flowQuery + ".\t" + "refName " + referenceName+ ".\n"); }
+
+ for(int i=0;i<numFlows;i++){
+ flowFile >> intensity;
+ flowgram[i] = intensity;// (int)round(100 * intensity);
+ }
+ m->gobble(flowFile);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: before align.\n"); }
+
+ alignFlowGrams(flowgram, refFlow, gapOpening, penaltyMatrix, flowOrder);
+
+ if (m->debug) { m->mothurOut("[DEBUG]: after align.\n"); }
+
+ if (m->control_pressed) { errorFile.close(); flowFile.close(); return 0; }
+
+ for(int i=0;i<flowgram.size();i++){
+ int count = (int)round(100*flowgram[i]);
+ if(count > 1000){count = 999;}
+ if(abs(flowgram[i]-refFlow[i])<=0.50){
+ lookupTable[count][int(refFlow[i])]++; // * build table
+ std[int(refFlow[i])] += (100*refFlow[i]-count)*(100*refFlow[i]-count);
+ }
+ }
+ }
+
+ }
+ m->gobble(errorFile);
+ m->gobble(flowFile);
+ }
+ errorFile.close(); flowFile.close();
+
+
+ //get probabilities
+ vector<int> counts; counts.resize(11, 0);
+ int totalCount = 0;
+ for(int i=0;i<1000;i++){
+ for(int j=0;j<11;j++){
+ counts[j] += lookupTable[i][j];
+ totalCount += lookupTable[i][j];
+ }
+ }
+
+ int N = 11;
+ for(int i=0;i<11;i++){
+ if(counts[i] < thresholdCount){ N = i; break; } //bring back
+ std[i] = sqrt(std[i]/(double)(counts[i])); //bring back
+ }
+
+ regress(std, N); //bring back
+
+ if (m->control_pressed) { return 0; }
+
+ double minProbability = 0.1 / (double)totalCount;
+
+ //calculate the negative log probabilities of each intensity given the actual homopolymer length; impute with a guassian when counts are too low
+ double sqrtTwoPi = 2.50662827463;//pow(2.0 * 3.14159, 0.5);
+
+ for(int i=0;i<1000;i++){
+ if (m->control_pressed) { return 0; }
+
+ for(int j=0;j<N;j++){
+ if(lookupTable[i][j] == 0){
+ lookupTable[i][j] = 1; //bring back
+ }
+ lookupTable[i][j] = -log(lookupTable[i][j]/double(counts[j])); //bring back
+ }
+
+ for(int j=N;j<11;j++){ //bring back
+ double normalProbability = 1.0/((double)std[j] * sqrtTwoPi) * exp(-double(i - j*100)* double(i - j*100)/ double(2*std[j]*std[j]));
+ if(normalProbability > minProbability){
+ lookupTable[i][j] = -log(normalProbability);
+ }
+ else{
+ lookupTable[i][j] = -log(minProbability);
+ }
+ }
+ }
+
+
+ //calculate the probability of each homopolymer length
+ vector<double> negLogHomoProb; negLogHomoProb.resize(11, 0.00); //bring back
+ for(int i=0;i<N;i++){
+ negLogHomoProb[i] = -log(counts[i] / (double)totalCount);
+ }
+ regress(negLogHomoProb, N);
+
+ if (m->control_pressed) { return 0; }
+
+ //output data table. column one is the probability of each homopolymer length
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(flowFileName));
+ string outputFile = getOutputFileName("lookup",variables);
+ outputNames.push_back(outputFile); outputTypes["lookup"].push_back(outputFile);
+
+ ofstream lookupFile;
+ m->openOutputFile(outputFile, lookupFile);
+ lookupFile.precision(8);
+
+ for(int j=0;j<11;j++){
+ // lookupFile << counts[j];
+ lookupFile << showpoint << negLogHomoProb[j]; //bring back
+ for(int i=0;i<1000;i++){
+ lookupFile << '\t' << lookupTable[i][j];
+ }
+ lookupFile << endl;
+ }
+ lookupFile.close();
+
+ m->mothurOut("\nData for homopolymer lengths of " + toString(N) + " and longer were imputed for this analysis\n\n");
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLookupCommand", "execute");
+ exit(1);
+ }
+}
+//******************************************************************************************************************************
+
+vector<double> MakeLookupCommand::convertSeqToFlow(string sequence, string order){
+ try {
+ int seqLength = (int)sequence.length();
+ int numFlows = (int)order.length();
+ vector<double> flowgram;
+
+ int orderIndex = 0;
+ int sequenceIndex = 0;
+
+ while(orderIndex < numFlows && sequenceIndex < seqLength){
+
+ if (m->control_pressed) { return flowgram; }
+
+ int homopolymerLength = 1;
+
+ char base = sequence[sequenceIndex];
+
+ while(base == sequence[sequenceIndex+1] && sequenceIndex < seqLength){
+ homopolymerLength++;
+ sequenceIndex++;
+ }
+
+ sequenceIndex++;
+
+ for(int i=orderIndex; i<orderIndex+numFlows;i++){
+ if(order[i%numFlows] == base){
+ //flowgram[i] = homopolymerLength;
+ orderIndex = i%numFlows;
+ break;
+ }else { flowgram.push_back(0); }
+ }
+
+ //flowgram[orderIndex] = homopolymerLength;
+ flowgram.push_back(homopolymerLength);
+
+ orderIndex++;
+ orderIndex = orderIndex % numFlows;
+ }
+
+ return flowgram;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLookupCommand", "convertSeqToFlow");
+ exit(1);
+ }
+}
+//******************************************************************************************************************************
+
+int MakeLookupCommand::alignFlowGrams(vector<double>& flowgram, vector<double>& refFlow, double gapOpening, vector<vector<double> > penaltyMatrix, string flowOrder){
+ try {
+ int numQueryFlows = (int)flowgram.size();
+ int numRefFlows = (int)refFlow.size();
+
+ //cout << numQueryFlows << '\t' << numRefFlows << endl;
+
+ vector<vector<double> > scoreMatrix; scoreMatrix.resize(numQueryFlows+1);
+ vector<vector<char> > directMatrix; directMatrix.resize(numQueryFlows+1);
+
+ for(int i=0;i<=numQueryFlows;i++){
+ if (m->control_pressed) { return 0; }
+ scoreMatrix[i].resize(numRefFlows+1, 0.00);
+ directMatrix[i].resize(numRefFlows+1, 'x');
+
+ scoreMatrix[i][0] = i * gapOpening;
+ directMatrix[i][0] = 'u';
+ }
+
+ //cout << numQueryFlows << '\t' << numRefFlows << endl;
+
+
+ for(int i=0;i<=numRefFlows;i++){
+ scoreMatrix[0][i] = i * gapOpening;
+ directMatrix[0][i] = 'l';
+ }
+
+
+ for(int i=1;i<=numQueryFlows;i++){
+ for(int j=1;j<=numRefFlows;j++){
+ if (m->control_pressed) { return 0; }
+ double diagonal = 1000000000;
+ if(flowOrder[i%flowOrder.length()] == flowOrder[j%flowOrder.length()]){
+ diagonal = scoreMatrix[i-1][j-1] + penaltyMatrix[round(flowgram[i-1])][refFlow[j-1]];
+ }
+ double up = scoreMatrix[i-1][j] + gapOpening;
+ double left = scoreMatrix[i][j-1] + gapOpening;
+
+ double minScore = diagonal;
+ char direction = 'd';
+
+ if(left < diagonal && left < up){
+ minScore = left;
+ direction = 'l';
+ }
+ else if(up < diagonal && up < left){
+ minScore = up;
+ direction = 'u';
+ }
+
+ scoreMatrix[i][j] = minScore;
+ directMatrix[i][j] = direction;
+
+ }
+ }
+
+ int minRowIndex = numQueryFlows;
+ double minRowScore = scoreMatrix[numQueryFlows][numRefFlows];
+ for(int i=0;i<numQueryFlows;i++){
+ if (m->control_pressed) { return 0; }
+ if(scoreMatrix[i][numRefFlows] < minRowScore){
+ minRowScore = scoreMatrix[i][numRefFlows];
+ minRowIndex = i;
+ }
+ }
+
+ int minColumnIndex = numRefFlows;
+ double minColumnScore = scoreMatrix[numQueryFlows][numRefFlows];
+ for(int i=0;i<numRefFlows;i++){
+ if (m->control_pressed) { return 0; }
+ if(scoreMatrix[numQueryFlows][i] < minColumnScore){
+ minColumnScore = scoreMatrix[numQueryFlows][i];
+ minColumnIndex = i;
+ }
+ }
+
+
+ int i=minRowIndex;
+ int j= minColumnIndex;
+
+ vector<double> newFlowgram;
+ vector<double> newRefFlowgram;
+
+ while(i > 0 && j > 0){
+ if (m->control_pressed) { return 0; }
+ if(directMatrix[i][j] == 'd'){
+ newFlowgram.push_back(flowgram[i-1]);
+ newRefFlowgram.push_back(refFlow[j-1]);
+
+ i--;
+ j--;
+ }
+ else if(directMatrix[i][j] == 'l'){
+ newFlowgram.push_back(0);
+ newRefFlowgram.push_back(refFlow[j-1]);
+
+ j--;
+ }
+ else if(directMatrix[i][j] == 'u'){
+ newFlowgram.push_back(flowgram[i-1]);
+ newRefFlowgram.push_back(0);
+
+ i--;
+ }
+ }
+
+ flowgram = newFlowgram;
+ refFlow = newRefFlowgram;
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLookupCommand", "alignFlowGrams");
+ exit(1);
+ }
+}
+
+//******************************************************************************************************************************
+
+int MakeLookupCommand::regress(vector<double>& data, int N){
+ try {
+ //fit data for larger values of N
+ double xMean = 0;
+ double yMean = 0;
+
+ for(int i=1;i<N;i++){
+ if (m->control_pressed) { return 0; }
+ xMean += i;
+ yMean += data[i];
+ }
+ xMean /= (N-1);
+ yMean /= (N-1);
+
+ double numerator = 0;
+ double denomenator = 0;
+ for(int i=1;i<N;i++){
+ if (m->control_pressed) { return 0; }
+ numerator += (i-xMean)*(data[i] - yMean);
+ denomenator += (i-xMean) * (i-xMean);
+ }
+ double slope = numerator / denomenator;
+ double intercept = yMean - slope * xMean;
+
+ for(int i=N;i<11;i++){
+ data[i] = intercept + i * slope;
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "MakeLookupCommand", "regress");
+ exit(1);
+ }
+}
+
+//******************************************************************************************************************************
+
+//**********************************************************************************************************************
+
+
--- /dev/null
+//
+// makelookupcommand.h
+// Mothur
+//
+// Created by SarahsWork on 5/14/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_makelookupcommand_h
+#define Mothur_makelookupcommand_h
+
+#include "command.hpp"
+#include "sequence.hpp"
+
+/**************************************************************************************************/
+
+class MakeLookupCommand : public Command {
+public:
+ MakeLookupCommand(string);
+ MakeLookupCommand();
+ ~MakeLookupCommand(){}
+
+ vector<string> setParameters();
+ string getCommandName() { return "make.lookup"; }
+ string getCommandCategory() { return "Sequence Processing"; }
+
+ string getOutputPattern(string);
+ string getHelpString();
+ string getCitation() { return "http://www.mothur.org/wiki/Make.lookup"; }
+ string getDescription() { return "create custom lookup files for use with shhh.flows"; }
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+private:
+ bool abort;
+ string outputDir, flowFileName, errorFileName, flowOrder, refFastaFileName, barcodeSequence, keySequence;
+ vector<string> outputNames;
+ int thresholdCount;
+
+ vector<double> convertSeqToFlow(string sequence, string order);
+ int alignFlowGrams(vector<double>& flowgram, vector<double>& refFlow, double gapOpening, vector<vector<double> > penaltyMatrix, string flowOrder);
+ int regress(vector<double>& data, int N);
+};
+
+/**************************************************************************************************/
+
+
+
+
+#endif
/************************************************************/
int MothurOut::checkName(string& name) {
try {
- for (int i = 0; i < name.length(); i++) {
- if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
- }
+ if (modifyNames) {
+ for (int i = 0; i < name.length(); i++) {
+ if (name[i] == ':') { name[i] = '_'; changedSeqNames = true; }
+ }
+ }
return 0;
}
catch(exception& e) {
exit(1);
}
}
+/**************************************************************************************************/
+double MothurOut::getAverage(vector<double> dists) {
+ try{
+ double average = 0;
+
+ for (int i = 0; i < dists.size(); i++) {
+ average += dists[i];
+ }
+
+ //finds average.
+ average /= (double) dists.size();
+
+ return average;
+ }
+ catch(exception& e) {
+ errorOut(e, "MothurOut", "getAverage");
+ exit(1);
+ }
+}
+
/**************************************************************************************************/
vector<double> MothurOut::getStandardDeviation(vector< vector<double> >& dists) {
try{
vector<string> binLabelsInFile;
vector<string> currentBinLabels;
string saveNextLabel, argv, sharedHeaderMode, groupMode;
- bool printedHeaders, commandInputsConvertError, changedSeqNames;
+ bool printedHeaders, commandInputsConvertError, changedSeqNames, modifyNames;
//functions from mothur.h
//file operations
vector<double> getStandardDeviation(vector< vector<double> >&);
vector<double> getStandardDeviation(vector< vector<double> >&, vector<double>&);
vector<double> getAverages(vector< vector<double> >&);
+ double getAverage(vector<double>);
vector< vector<seqDist> > getStandardDeviation(vector< vector< vector<seqDist> > >&);
vector< vector<seqDist> > getStandardDeviation(vector< vector< vector<seqDist> > >&, vector< vector<seqDist> >&);
vector< vector<seqDist> > getAverages(vector< vector< vector<seqDist> > >&, string);
sharedHeaderMode = "";
groupMode = "group";
changedSeqNames = false;
+ modifyNames = true;
}
~MothurOut();
else if (name[0] != '@') { m->mothurOut("[ERROR]: reading " + name + " expected a name with @ as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
else {
name = name.substr(1);
- for (int i = 0; i < name.length(); i++) { if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; } }
+ m->checkName(name);
}
//read sequence
else if (name2[0] != '+') { m->mothurOut("[ERROR]: reading " + name2 + " expected a name with + as a leading character."); m->mothurOutEndLine(); m->control_pressed = true; break; }
else {
name2 = name2.substr(1);
- for (int i = 0; i < name2.length(); i++) { if (name2[i] == ':') { name2[i] = '_'; m->changedSeqNames = true; } }
+ m->checkName(name2);
}
//read quality scores
namesOfGroupCombos.push_back(groups);
}
}
-
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- if(processors == 1){
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
- }else{
- lines.clear();
- int numPairs = namesOfGroupCombos.size();
-
- int numPairsPerProcessor = numPairs / processors;
-
- for (int i = 0; i < processors; i++) {
- int startPos = i * numPairsPerProcessor;
-
- if(i == processors - 1){
- numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
- }
-
- lines.push_back(linePair(startPos, numPairsPerProcessor));
- }
-
- data = createProcesses(t, namesOfGroupCombos, ct);
- }
- #else
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
- #endif
+
+ lines.clear();
+ int numPairs = namesOfGroupCombos.size();
+ int numPairsPerProcessor = numPairs / processors;
+
+ for (int i = 0; i < processors; i++) {
+ int startPos = i * numPairsPerProcessor;
+ if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; }
+ lines.push_back(linePair(startPos, numPairsPerProcessor));
+ }
+
+ data = createProcesses(t, namesOfGroupCombos, ct);
return data;
EstOutput Parsimony::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, CountTable* ct) {
try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- int process = 1;
+ int process = 1;
vector<int> processIDS;
EstOutput results;
-
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+
//loop through and create all the processes you want
while (process != processors) {
int pid = fork();
in.close();
m->mothurRemove(s);
}
+#else
+ //fill in functions
+ vector<parsData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+ vector<CountTable*> cts;
+ vector<Tree*> trees;
+
+ //Create processor worker threads.
+ for( int i=1; i<processors; i++ ){
+ CountTable* copyCount = new CountTable();
+ copyCount->copy(ct);
+ Tree* copyTree = new Tree(copyCount);
+ copyTree->getCopy(t);
+
+ cts.push_back(copyCount);
+ trees.push_back(copyTree);
+
+ parsData* temppars = new parsData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount);
+ pDataArray.push_back(temppars);
+ processIDS.push_back(i);
+
+ hThreadArray[i-1] = CreateThread(NULL, 0, MyParsimonyThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+ }
+
+ results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct);
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ for (int j = 0; j < pDataArray[i]->results.size(); j++) { results.push_back(pDataArray[i]->results[j]); }
+ delete cts[i];
+ delete trees[i];
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
- return results;
#endif
+ return results;
}
catch(exception& e) {
m->errorOut(e, "Parsimony", "createProcesses");
EstOutput driver(Tree*, vector< vector<string> >, int, int, CountTable*);
EstOutput createProcesses(Tree*, vector< vector<string> >, CountTable*);
};
-
/***********************************************************************/
+struct parsData {
+ int start;
+ int num;
+ MothurOut* m;
+ EstOutput results;
+ vector< vector<string> > namesOfGroupCombos;
+ Tree* t;
+ CountTable* ct;
+
+ parsData(){}
+ parsData(MothurOut* mout, int st, int en, vector< vector<string> > ngc, Tree* tree, CountTable* count) {
+ m = mout;
+ start = st;
+ num = en;
+ namesOfGroupCombos = ngc;
+ t = tree;
+ ct = count;
+ }
+};
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyParsimonyThreadFunction(LPVOID lpParam){
+ parsData* pDataArray;
+ pDataArray = (parsData*)lpParam;
+ try {
+
+ pDataArray->results.resize(pDataArray->num);
+
+ Tree* copyTree = new Tree(pDataArray->ct);
+ int count = 0;
+
+ for (int h = pDataArray->start; h < (pDataArray->start+pDataArray->num); h++) {
+
+ if (pDataArray->m->control_pressed) { delete copyTree; return 0; }
+
+ int score = 0;
+
+ //groups in this combo
+ vector<string> groups = pDataArray->namesOfGroupCombos[h];
+
+ //copy users tree so that you can redo pgroups
+ copyTree->getCopy(pDataArray->t);
+
+ //create pgroups that reflect the groups the user want to use
+ for(int i=copyTree->getNumLeaves();i<copyTree->getNumNodes();i++){
+ copyTree->tree[i].pGroups = (copyTree->mergeUserGroups(i, groups));
+ }
+
+ for(int i=copyTree->getNumLeaves();i<copyTree->getNumNodes();i++){
+
+ if (pDataArray->m->control_pressed) { return 0; }
+
+ int lc = copyTree->tree[i].getLChild();
+ int rc = copyTree->tree[i].getRChild();
+
+ int iSize = copyTree->tree[i].pGroups.size();
+ int rcSize = copyTree->tree[rc].pGroups.size();
+ int lcSize = copyTree->tree[lc].pGroups.size();
+
+ //if isize are 0 then that branch is to be ignored
+ if (iSize == 0) { }
+ else if ((rcSize == 0) || (lcSize == 0)) { }
+ //if you have more groups than either of your kids then theres been a change.
+ else if(iSize > rcSize || iSize > lcSize){
+ score++;
+ }
+ }
+
+ pDataArray->results[count] = score;
+ count++;
+ }
+
+ delete copyTree;
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ pDataArray->m->errorOut(e, "Parsimony", "MyParsimonyThreadFunction");
+ exit(1);
+ }
+}
+#endif
#endif
if (numSampledList.count(diversity[mGroups[j]].size()-1) == 0) { numSampledList.insert(diversity[mGroups[j]].size()-1); }
}
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- if(processors == 1){
- driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);
- }else{
- if (rarefy) {
- vector<int> procIters;
-
- int numItersPerProcessor = iters / processors;
-
- //divide iters between processes
- for (int h = 0; h < processors; h++) {
- if(h == processors - 1){
- numItersPerProcessor = iters - h * numItersPerProcessor;
- }
- procIters.push_back(numItersPerProcessor);
- }
-
- createProcesses(procIters, trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum);
-
- }else{ //no need to paralellize if you dont want to rarefy
- driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);
- }
- }
-
- #else
- driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);
- #endif
-
+ if (rarefy) {
+ vector<int> procIters;
+ int numItersPerProcessor = iters / processors;
+
+ //divide iters between processes
+ for (int h = 0; h < processors; h++) {
+ if(h == processors - 1){ numItersPerProcessor = iters - h * numItersPerProcessor; }
+ procIters.push_back(numItersPerProcessor);
+ }
+
+ createProcesses(procIters, trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum);
+
+ }else{ //no need to paralellize if you dont want to rarefy
+ driver(trees[i], diversity, sumDiversity, iters, increment, randomLeaf, numSampledList, outCollect, outSum, true);
+ }
+
if (rarefy) { printData(numSampledList, sumDiversity, outRare, iters); }
}
//**********************************************************************************************************************
int PhyloDiversityCommand::createProcesses(vector<int>& procIters, Tree* t, map< string, vector<float> >& div, map<string, vector<float> >& sumDiv, int numIters, int increment, vector<int>& randomLeaf, set<int>& numSampledList, ofstream& outCollect, ofstream& outSum){
try {
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- int process = 1;
+ int process = 1;
vector<int> processIDS;
map< string, vector<float> >::iterator itSum;
-
+
+ #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+
//loop through and create all the processes you want
while (process != processors) {
int pid = fork();
in.close();
m->mothurRemove(inTemp);
}
+#else
+
+ //fill in functions
+ vector<phylodivData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+ vector<CountTable*> cts;
+ vector<Tree*> trees;
+ map<string, int> rootForGroup = getRootForGroups(t);
+
+ //Create processor worker threads.
+ for( int i=1; i<processors; i++ ){
+ CountTable* copyCount = new CountTable();
+ copyCount->copy(ct);
+ Tree* copyTree = new Tree(copyCount);
+ copyTree->getCopy(t);
+
+ cts.push_back(copyCount);
+ trees.push_back(copyTree);
+
+ map<string, vector<float> > copydiv = div;
+ map<string, vector<float> > copysumDiv = sumDiv;
+ vector<int> copyrandomLeaf = randomLeaf;
+ set<int> copynumSampledList = numSampledList;
+ map<string, int> copyRootForGrouping = rootForGroup;
+
+ phylodivData* temp = new phylodivData(m, procIters[i], copydiv, copysumDiv, copyTree, copyCount, increment, copyrandomLeaf, copynumSampledList, copyRootForGrouping);
+ pDataArray.push_back(temp);
+ processIDS.push_back(i);
+
+ hThreadArray[i-1] = CreateThread(NULL, 0, MyPhyloDivThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+ }
+
+ driver(t, div, sumDiv, procIters[0], increment, randomLeaf, numSampledList, outCollect, outSum, true);
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ for (itSum = pDataArray[i]->sumDiv.begin(); itSum != pDataArray[i]->sumDiv.end(); itSum++) {
+ for (int k = 0; k < (itSum->second).size(); k++) {
+ sumDiv[itSum->first][k] += (itSum->second)[k];
+ }
+ }
+ delete cts[i];
+ delete trees[i];
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
#endif
else { score = div[mGroups[j]][numSampled] / (float)numIters; }
out << setprecision(4) << score << endl;
- cout << mGroups[j] << '\t' << numSampled << '\t'<< setprecision(4) << score << endl;
+ //cout << mGroups[j] << '\t' << numSampled << '\t'<< setprecision(4) << score << endl;
}
out.close();
};
+/***********************************************************************/
+struct phylodivData {
+ int numIters;
+ MothurOut* m;
+ map< string, vector<float> > div;
+ map<string, vector<float> > sumDiv;
+ map<string, int> rootForGroup;
+ vector<int> randomLeaf;
+ set<int> numSampledList;
+ int increment;
+ Tree* t;
+ CountTable* ct;
+ bool includeRoot;
+
+
+ phylodivData(){}
+ phylodivData(MothurOut* mout, int ni, map< string, vector<float> > cd, map< string, vector<float> > csd, Tree* tree, CountTable* count, int incre, vector<int> crl, set<int> nsl, map<string, int> rfg) {
+ m = mout;
+ t = tree;
+ ct = count;
+ div = cd;
+ numIters = ni;
+ sumDiv = csd;
+ increment = incre;
+ randomLeaf = crl;
+ numSampledList = nsl;
+ rootForGroup = rfg;
+ }
+};
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyPhyloDivThreadFunction(LPVOID lpParam){
+ phylodivData* pDataArray;
+ pDataArray = (phylodivData*)lpParam;
+ try {
+ int numLeafNodes = pDataArray->randomLeaf.size();
+ vector<string> mGroups = pDataArray->m->getGroups();
+
+ for (int l = 0; l < pDataArray->numIters; l++) {
+ random_shuffle(pDataArray->randomLeaf.begin(), pDataArray->randomLeaf.end());
+
+ //initialize counts
+ map<string, int> counts;
+ vector< map<string, bool> > countedBranch;
+ for (int i = 0; i < pDataArray->t->getNumNodes(); i++) {
+ map<string, bool> temp;
+ for (int j = 0; j < mGroups.size(); j++) { temp[mGroups[j]] = false; }
+ countedBranch.push_back(temp);
+ }
+
+ for (int j = 0; j < mGroups.size(); j++) { counts[mGroups[j]] = 0; }
+
+ for(int k = 0; k < numLeafNodes; k++){
+
+ if (pDataArray->m->control_pressed) { return 0; }
+
+ //calc branch length of randomLeaf k
+ //vector<float> br = calcBranchLength(t, randomLeaf[k], countedBranch, rootForGroup);
+ //(Tree* t, int leaf, vector< map<string, bool> >& counted, map<string, int> roots
+ /////////////////////////////////////////////////////////////////////////////////////
+ vector<float> br;
+ int index = pDataArray->randomLeaf[k];
+
+ vector<string> groups = pDataArray->t->tree[pDataArray->randomLeaf[k]].getGroup();
+ br.resize(groups.size(), 0.0);
+
+ //you are a leaf
+ if(pDataArray->t->tree[index].getBranchLength() != -1){
+ for (int k = 0; k < groups.size(); k++) {
+ br[k] += abs(pDataArray->t->tree[index].getBranchLength());
+ }
+ }
+
+ index = pDataArray->t->tree[index].getParent();
+
+ //while you aren't at root
+ while(pDataArray->t->tree[index].getParent() != -1){
+
+ if (pDataArray->m->control_pressed) { return 0; }
+
+ for (int k = 0; k < groups.size(); k++) {
+
+ if (index >= pDataArray->rootForGroup[groups[k]]) { countedBranch[index][groups[k]] = true; } //if you are at this groups "root", then say we are done
+
+ if (!countedBranch[index][groups[k]]){ //if counted[index][groups[k] is true this groups has already added all br from here to root, so quit early
+ if (pDataArray->t->tree[index].getBranchLength() != -1) {
+ br[k] += abs(pDataArray->t->tree[index].getBranchLength());
+ }
+ countedBranch[index][groups[k]] = true;
+ }
+ }
+ index = pDataArray->t->tree[index].getParent();
+ }
+ /////////////////////////////////////////////////////////////////////////////////////
+
+ //for each group in the groups update the total branch length accounting for the names file
+ groups = pDataArray->t->tree[pDataArray->randomLeaf[k]].getGroup();
+
+ for (int j = 0; j < groups.size(); j++) {
+
+ if (pDataArray->m->inUsersGroups(groups[j], mGroups)) {
+ int numSeqsInGroupJ = 0;
+ map<string, int>::iterator it;
+ it = pDataArray->t->tree[pDataArray->randomLeaf[k]].pcount.find(groups[j]);
+ if (it != pDataArray->t->tree[pDataArray->randomLeaf[k]].pcount.end()) { //this leaf node contains seqs from group j
+ numSeqsInGroupJ = it->second;
+ }
+
+ if (numSeqsInGroupJ != 0) { pDataArray->div[groups[j]][(counts[groups[j]]+1)] = pDataArray->div[groups[j]][counts[groups[j]]] + br[j]; }
+
+ for (int s = (counts[groups[j]]+2); s <= (counts[groups[j]]+numSeqsInGroupJ); s++) {
+ pDataArray->div[groups[j]][s] = pDataArray->div[groups[j]][s-1]; //update counts, but don't add in redundant branch lengths
+ }
+ counts[groups[j]] += numSeqsInGroupJ;
+ }
+ }
+ }
+
+
+ //add this diversity to the sum
+ for (int j = 0; j < mGroups.size(); j++) {
+ for (int g = 0; g < pDataArray->div[mGroups[j]].size(); g++) {
+ pDataArray->sumDiv[mGroups[j]][g] += pDataArray->div[mGroups[j]][g];
+ }
+ }
+
+ }
+
+ return 0;
+ }
+ catch(exception& e) {
+ pDataArray->m->errorOut(e, "PhyloDiversityCommand", "MyPhyloDivThreadFunction");
+ exit(1);
+ }
+}
+#endif
+
#endif
else if (c == 32 || c == 9){;} //space or tab
}
primers[oligo] = primerCount; primerCount++;
+ //cout << "for oligo = " << oligo << endl;
}else if(type == "REVERSE"){
string oligoRC = reverseOligo(oligo);
revPrimer.push_back(oligoRC);
- //cout << "oligo = " << oligo << " reverse = " << oligoRC << endl;
+ //cout << "rev oligo = " << oligo << " reverse = " << oligoRC << endl;
}else if(type == "BARCODE"){
- inOligos >> group;
+ inOligos >> group;
+ }else if(type == "PRIMER"){
+ m->gobble(inOligos);
+ primers[oligo] = primerCount; primerCount++;
+
+ string roligo="";
+ inOligos >> roligo;
+
+ for(int i=0;i<roligo.length();i++){
+ roligo[i] = toupper(roligo[i]);
+ if(roligo[i] == 'U') { roligo[i] = 'T'; }
+ }
+ revPrimer.push_back(reverseOligo(roligo));
+
+ // get rest of line in case there is a primer name
+ while (!inOligos.eof()) {
+ char c = inOligos.get();
+ if (c == 10 || c == 13 || c == -1){ break; }
+ else if (c == 32 || c == 9){;} //space or tab
+ }
+ //cout << "prim oligo = " << oligo << " reverse = " << roligo << endl;
}else if((type == "LINKER")||(type == "SPACER")) {;}
- else{ m->mothurOut(type + " is not recognized as a valid type. Choices are forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
+ else{ m->mothurOut(type + " is not recognized as a valid type. Choices are primer, forward, reverse, linker, spacer and barcode. Ignoring " + oligo + "."); m->mothurOutEndLine(); m->control_pressed = true; }
}
m->gobble(inOligos);
}
while (!in.eof()) {
in >> firstCol >> secondCol; m->gobble(in);
- for (int i = 0; i < firstCol.length(); i++) {
- if (firstCol[i] == ':') { firstCol[i] = '_'; m->changedSeqNames = true; }
- }
-
- int size = 1;
- for (int i = 0; i < secondCol.length(); i++) {
- if (secondCol[i] == ':') { secondCol[i] = '_'; m->changedSeqNames = true; }
- else if(secondCol[i] == ','){ size++; }
- }
+ m->checkName(firstCol);
+ m->checkName(secondCol);
+ int size = m->getNumNames(secondCol);
names[firstCol] = secondCol;
sizes[firstCol] = size;
name = name.substr(1);
- for (int i = 0; i < name.length(); i++) {
- if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
- }
+ m->checkName(name);
}else{ m->mothurOut("Error in reading your qfile, at position " + toString(qFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true; }
void QualityScores::setName(string name) {
try {
- for (int i = 0; i < name.length(); i++) {
- if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
- }
-
+ m->checkName(name);
seqName = name;
}
catch(exception& e) {
--- /dev/null
+#ifndef RANDOMNUMBER_H
+#define RANDOMNUMBER_H
+
+/*
+ * randomnumber.cpp
+ *
+ *
+ * Created by Pat Schloss on 7/6/11.
+ * Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+#include "randomnumber.h"
+#include <cmath>
+
+/**************************************************************************************************/
+
+RandomNumberGenerator::RandomNumberGenerator(){
+ srand( (unsigned)time( NULL ) );
+}
+
+/**************************************************************************************************/
+
+float RandomNumberGenerator::randomUniform(){
+
+ float randUnif = 0.0000;
+
+ while(randUnif == 0.0000){
+
+ randUnif = rand() / (float)RAND_MAX;
+
+ }
+
+ return randUnif;
+}
+
+/**************************************************************************************************/
+//
+//Code shamelessly swiped and modified from Numerical Recipes in C++
+//
+/**************************************************************************************************/
+
+float RandomNumberGenerator::randomExp(){
+
+ float randExp = 0.0000;
+
+ while(randExp == 0.0000){
+
+ randExp = -log(randomUniform());
+
+ }
+
+ return randExp;
+}
+
+/**************************************************************************************************/
+//
+//Code shamelessly swiped and modified from Numerical Recipes in C++
+//
+/**************************************************************************************************/
+
+float RandomNumberGenerator::randomNorm(){
+
+ float x, y, rsquare;
+
+ do{
+ x = 2.0 * randomUniform() - 1.0;
+ y = 2.0 * randomUniform() - 1.0;
+
+ rsquare = x * x + y * y;
+ } while(rsquare >= 1.0 || rsquare == 0.0);
+
+ float fac = sqrt(-2.0 * log(rsquare)/rsquare);
+
+ return x * fac;
+}
+
+
+/**************************************************************************************************/
+/*
+ * Slightly modified version of:
+ *
+ * Mathlib : A C Library of Special Functions
+ * Copyright (C) 1998 Ross Ihaka
+ * Copyright (C) 2000-2005 The R Development Core Team
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, a copy is available at
+ * http://www.r-project.org/Licenses/
+ *
+ * SYNOPSIS
+ *
+ * #include <Rmath.h>
+ * float rgamma(float a, float scale);
+ *
+ * DESCRIPTION
+ *
+ * Random variates from the gamma distribution.
+ *
+ * REFERENCES
+ *
+ * [1] Shape parameter a >= 1. Algorithm GD in:
+ *
+ * Ahrens, J.H. and Dieter, U. (1982).
+ * Generating gamma variates by a modified
+ * rejection technique.
+ * Comm. ACM, 25, 47-54.
+ *
+ *
+ * [2] Shape parameter 0 < a < 1. Algorithm GS in:
+ *
+ * Ahrens, J.H. and Dieter, U. (1974).
+ * Computer methods for sampling from gamma, beta,
+ * poisson and binomial distributions.
+ * Computing, 12, 223-246.
+ *
+ * Input: a = parameter (mean) of the standard gamma distribution.
+ * Output: a variate from the gamma(a)-distribution
+ */
+
+float RandomNumberGenerator::randomGamma(float a)
+{
+ /* Constants : */
+ const static float sqrt32 = 5.656854;
+ const static float exp_m1 = 0.36787944117144232159;/* exp(-1) = 1/e */
+ float scale = 1.0;
+
+ /* Coefficients q[k] - for q0 = sum(q[k]*a^(-k))
+ * Coefficients a[k] - for q = q0+(t*t/2)*sum(a[k]*v^k)
+ * Coefficients e[k] - for exp(q)-1 = sum(e[k]*q^k)
+ */
+ const static float q1 = 0.04166669;
+ const static float q2 = 0.02083148;
+ const static float q3 = 0.00801191;
+ const static float q4 = 0.00144121;
+ const static float q5 = -7.388e-5;
+ const static float q6 = 2.4511e-4;
+ const static float q7 = 2.424e-4;
+
+ const static float a1 = 0.3333333;
+ const static float a2 = -0.250003;
+ const static float a3 = 0.2000062;
+ const static float a4 = -0.1662921;
+ const static float a5 = 0.1423657;
+ const static float a6 = -0.1367177;
+ const static float a7 = 0.1233795;
+
+ /* State variables [FIXME for threading!] :*/
+ static float aa = 0.;
+ static float aaa = 0.;
+ static float s, s2, d; /* no. 1 (step 1) */
+ static float q0, b, si, c;/* no. 2 (step 4) */
+
+ float e, p, q, r, t, u, v, w, x, ret_val;
+
+ if (a <= 0.0 || scale <= 0.0){ cout << "error alpha or scale parameter are less than zero." << endl; exit(1); }
+
+ if (a < 1.) { /* GS algorithm for parameters a < 1 */
+ e = 1.0 + exp_m1 * a;
+ for(;;) {
+ p = e * randomUniform();
+ if (p >= 1.0) {
+ x = -log((e - p) / a);
+ if (randomExp() >= (1.0 - a) * log(x))
+ break;
+ } else {
+ x = exp(log(p) / a);
+ if (randomExp() >= x)
+ break;
+ }
+ }
+ return scale * x;
+ }
+
+ /* --- a >= 1 : GD algorithm --- */
+
+ /* Step 1: Recalculations of s2, s, d if a has changed */
+ if (a != aa) {
+ aa = a;
+ s2 = a - 0.5;
+ s = sqrt(s2);
+ d = sqrt32 - s * 12.0;
+ }
+ /* Step 2: t = standard normal deviate,
+ x = (s,1/2) -normal deviate. */
+
+ /* immediate acceptance (i) */
+ t = randomNorm();
+ x = s + 0.5 * t;
+ ret_val = x * x;
+ if (t >= 0.0)
+ return scale * ret_val;
+
+ /* Step 3: u = 0,1 - uniform sample. squeeze acceptance (s) */
+ u = randomUniform();
+ if (d * u <= t * t * t)
+ return scale * ret_val;
+
+ /* Step 4: recalculations of q0, b, si, c if necessary */
+
+ if (a != aaa) {
+ aaa = a;
+ r = 1.0 / a;
+ q0 = ((((((q7 * r + q6) * r + q5) * r + q4) * r + q3) * r
+ + q2) * r + q1) * r;
+
+ /* Approximation depending on size of parameter a */
+ /* The constants in the expressions for b, si and c */
+ /* were established by numerical experiments */
+
+ if (a <= 3.686) {
+ b = 0.463 + s + 0.178 * s2;
+ si = 1.235;
+ c = 0.195 / s - 0.079 + 0.16 * s;
+ } else if (a <= 13.022) {
+ b = 1.654 + 0.0076 * s2;
+ si = 1.68 / s + 0.275;
+ c = 0.062 / s + 0.024;
+ } else {
+ b = 1.77;
+ si = 0.75;
+ c = 0.1515 / s;
+ }
+ }
+ /* Step 5: no quotient test if x not positive */
+
+ if (x > 0.0) {
+ /* Step 6: calculation of v and quotient q */
+ v = t / (s + s);
+ if (fabs(v) <= 0.25)
+ q = q0 + 0.5 * t * t * ((((((a7 * v + a6) * v + a5) * v + a4) * v
+ + a3) * v + a2) * v + a1) * v;
+ else
+ q = q0 - s * t + 0.25 * t * t + (s2 + s2) * log(1.0 + v);
+
+
+ /* Step 7: quotient acceptance (q) */
+ if (log(1.0 - u) <= q)
+ return scale * ret_val;
+ }
+
+ for(;;) {
+ /* Step 8: e = standard exponential deviate
+ * u = 0,1 -uniform deviate
+ * t = (b,si)-float exponential (laplace) sample */
+ e = randomExp();
+ u = randomUniform();
+ u = u + u - 1.0;
+ if (u < 0.0)
+ t = b - si * e;
+ else
+ t = b + si * e;
+ /* Step 9: rejection if t < tau(1) = -0.71874483771719 */
+ if (t >= -0.71874483771719) {
+ /* Step 10: calculation of v and quotient q */
+ v = t / (s + s);
+ if (fabs(v) <= 0.25)
+ q = q0 + 0.5 * t * t *
+ ((((((a7 * v + a6) * v + a5) * v + a4) * v + a3) * v
+ + a2) * v + a1) * v;
+ else
+ q = q0 - s * t + 0.25 * t * t + (s2 + s2) * log(1.0 + v);
+ /* Step 11: hat acceptance (h) */
+ /* (if q not positive go to step 8) */
+ if (q > 0.0) {
+ w = expm1(q);
+ /* ^^^^^ original code had approximation with rel.err < 2e-7 */
+ /* if t is rejected sample again at step 8 */
+ if (c * fabs(u) <= w * exp(e - 0.5 * t * t))
+ break;
+ }
+ }
+ } /* for(;;) .. until `t' is accepted */
+ x = s + 0.5 * t;
+ return scale * x * x;
+}
+
+/**************************************************************************************************/
+//
+// essentially swiped from http://en.wikipedia.org/wiki/Dirichlet_distribution#Random_number_generation
+//
+/**************************************************************************************************/
+
+vector<float> RandomNumberGenerator::randomDirichlet(vector<float> alphas){
+
+ int nAlphas = (int)alphas.size();
+ vector<float> dirs(nAlphas, 0.0000);
+
+ float sum = 0.0000;
+ for(int i=0;i<nAlphas;i++){
+ dirs[i] = randomGamma(alphas[i]);
+ sum += dirs[i];
+ }
+
+ for(int i=0;i<nAlphas;i++){
+ dirs[i] /= sum;
+ }
+
+ return dirs;
+
+}
+
+/**************************************************************************************************/
+
+#endif
--- /dev/null
+#ifndef RANDOMNUMBER
+#define RANDOMNUMBER
+
+/*
+ * randomnumber.h
+ *
+ *
+ * Created by Pat Schloss on 7/6/11.
+ * Copyright 2011 Patrick D. Schloss. All rights reserved.
+ *
+ */
+
+
+/**************************************************************************************************/
+
+#include "mothur.h"
+
+/**************************************************************************************************/
+
+class RandomNumberGenerator {
+
+public:
+ RandomNumberGenerator();
+ float randomUniform();
+ float randomExp();
+ float randomNorm();
+ float randomGamma(float);
+ vector<float> randomDirichlet(vector<float> alphas);
+
+};
+
+/**************************************************************************************************/
+
+#endif
int newNSeqs = rank->getNumSeqs();
vector<double> data = estimate->getValues(rank);
- if(nIters != 1){
-
- double oldS = var[index] * ( nIters - 2 );
- double delta = data[0] - results[index];
- double newMean = results[index] + delta / nIters;
- double newS = oldS + delta * ( data[0] - newMean );
- double newVar = newS / ( nIters - 1 );
-
- seqs[index] = newNSeqs;
- results[index] = newMean;
- var[index] = newVar;
-
- index++;
- }
- else{
- seqs.push_back(newNSeqs);
- results.push_back(data[0]);
- var.push_back(0.0);
- }
+ map<int, vector<double> >::iterator it = results.find(newNSeqs);
+ if (it == results.end()) { //first iter for this count
+ vector<double> temp;
+ temp.push_back(data[0]);
+ results[newNSeqs] = temp;
+ }else {
+ it->second.push_back(data[0]);
+ }
}
catch(exception& e) {
m->errorOut(e, "RareDisplay", "update");
void RareDisplay::update(vector<SharedRAbundVector*> shared, int numSeqs, int numGroupComb) {
try {
vector<double> data = estimate->getValues(shared);
- double newNSeqs = data[0];
- if(nIters != 1){
-
- double oldS = var[index] * ( nIters - 2 );
- double delta = data[0] - results[index];
- double newMean = results[index] + delta / nIters;
- double newS = oldS + delta * ( data[0] - newMean );
- double newVar = newS / ( nIters - 1 );
- seqs[index] = newNSeqs;
- results[index] = newMean;
- var[index] = newVar;
-
- index++;
- }
- else{
-
- seqs.push_back(newNSeqs);
- results.push_back(data[0]);
- var.push_back(0.0);
-
- }
+ map<int, vector<double> >::iterator it = results.find(numSeqs);
+ if (it == results.end()) { //first iter for this count
+ vector<double> temp;
+ temp.push_back(data[0]);
+ results[numSeqs] = temp;
+ }else {
+ it->second.push_back(data[0]);
+ }
}
catch(exception& e) {
m->errorOut(e, "RareDisplay", "update");
void RareDisplay::reset(){
try {
nIters++;
- index = 0;
}
catch(exception& e) {
m->errorOut(e, "RareDisplay", "reset");
void RareDisplay::close(){
try {
-
output->initFile(label);
- for (int i = 0; i < seqs.size(); i++) {
+ for (map<int, vector<double> >::iterator it = results.begin(); it != results.end(); it++) {
vector<double> data(3,0);
- double variance = var[i];
-
- data[0] = results[i];
-
- double ci = 1.96 * pow(variance, 0.5);
- data[1] = data[0] - ci;
- data[2] = data[0] + ci;
+
+ sort((it->second).begin(), (it->second).end());
+
+ //lci=results[int(0.025*iter)] and hci=results[int(0.975*iter)]
+ data[0] = (it->second)[(int)(0.50*(nIters-1))];
+ //data[0] = m->getAverage(it->second);
+ data[1] = (it->second)[(int)(0.025*(nIters-1))];
+ data[2] = (it->second)[(int)(0.975*(nIters-1))];
+ //cout << nIters << '\t' << (int)(0.025*(nIters-1)) << '\t' << (int)(0.975*(nIters-1)) << endl;
+
+ //cout << it->first << '\t' << data[1] << '\t' << data[2] << endl;
- output->output(seqs[i], data);
+ output->output(it->first, data);
}
nIters = 1;
- index = 0;
-
- seqs.clear();
- results.clear();
- var.clear();
+ results.clear();
output->resetFile();
}
ifstream in;
m->openInputFile(filename, in);
- int thisIters;
- in >> thisIters; m->gobble(in);
+ int thisIters, size;
+ in >> thisIters >> size; m->gobble(in);
+ nIters += thisIters;
- for (int i = 0; i < seqs.size(); i++) {
- double tempresult, tempvar;
- in >> tempresult >> tempvar; m->gobble(in);
-
- //find weighted result
- results[i] = ((nIters * results[i]) + (thisIters * tempresult)) / (float)(nIters + thisIters);
-
- var[i] = ((nIters * var[i]) + (thisIters * tempvar)) / (float)(nIters + thisIters);
+ for (int i = 0; i < size; i++) {
+ int tempCount;
+ in >> tempCount; m->gobble(in);
+ map<int, vector<double> >::iterator it = results.find(tempCount);
+ if (it != results.end()) {
+ for (int j = 0; j < thisIters; j++) {
+ double value;
+ in >> value; m->gobble(in);
+ (it->second).push_back(value);
+ }
+ }else {
+ vector<double> tempValues;
+ for (int j = 0; j < thisIters; j++) {
+ double value;
+ in >> value; m->gobble(in);
+ tempValues.push_back(value);
+ }
+ results[tempCount] = tempValues;
+ }
}
in.close();
ofstream out;
m->openOutputFile(filename, out);
- out << nIters << endl;
+ out << nIters-1 << '\t' << results.size() << endl;
- for (int i = 0; i < seqs.size(); i++) {
- out << results[i] << '\t' << var[i] << endl;
+ for (map<int, vector<double> >::iterator it = results.begin(); it != results.end(); it++) {
+ out << it->first << '\t';
+ for(int i = 0; i < (it->second).size(); i++) {
+ out << (it->second)[i] << '\t';
+ }
+ out << endl;
}
out.close();
class RareDisplay : public Display {
public:
- RareDisplay(Calculator* calc, FileOutput* file) : estimate(calc), output(file), nIters(1), index(0) {};
+ RareDisplay(Calculator* calc, FileOutput* file) : estimate(calc), output(file), nIters(1) {};
~RareDisplay() { delete estimate; delete output; };
void init(string);
void reset();
Calculator* estimate;
FileOutput* output;
string label;
- vector<int> seqs;
- vector<double> results;
- vector<double> var;
- int index, nIters;
+ map<int, vector<double> > results; //maps seqCount to results for that number of sequences
+ int nIters;
};
#endif
while(fileHandle && lt == 1){ //let's assume it's a triangular matrix...
- fileHandle >> firstName >> secondName >> distance; // get the row and column names and distance
+ fileHandle >> firstName; m->gobble(fileHandle);
+ fileHandle >> secondName; m->gobble(fileHandle);
+ fileHandle >> distance; // get the row and column names and distance
+
+ if (m->debug) { cout << firstName << '\t' << secondName << '\t' << distance << endl; }
if (m->control_pressed) { fileHandle.close(); delete reading; return 0; }
m->openInputFile(distFile, fileHandle); //let's start over
while(fileHandle){
- fileHandle >> firstName >> secondName >> distance;
+ fileHandle >> firstName; m->gobble(fileHandle);
+ fileHandle >> secondName; m->gobble(fileHandle);
+ fileHandle >> distance; // get the row and column names and distance
if (m->control_pressed) { fileHandle.close(); delete reading; return 0; }
while(fileHandle && lt == 1){ //let's assume it's a triangular matrix...
- fileHandle >> firstName >> secondName >> distance; // get the row and column names and distance
-
+ fileHandle >> firstName; m->gobble(fileHandle);
+ fileHandle >> secondName; m->gobble(fileHandle);
+ fileHandle >> distance; // get the row and column names and distance
+
if (m->control_pressed) { fileHandle.close(); delete reading; return 0; }
int itA = countTable->get(firstName);
m->openInputFile(distFile, fileHandle); //let's start over
while(fileHandle){
- fileHandle >> firstName >> secondName >> distance;
+ fileHandle >> firstName; m->gobble(fileHandle);
+ fileHandle >> secondName; m->gobble(fileHandle);
+ fileHandle >> distance; // get the row and column names and distance
if (m->control_pressed) { fileHandle.close(); delete reading; return 0; }
//make sure groups are valid
//takes care of user setting groupNames that are invalid or setting groups=all
- SharedUtil* util = new SharedUtil();
vector<string> namesGroups = groupMap->getNamesOfGroups();
- util->setGroups(Groups, namesGroups);
- delete util;
-
+ vector<string> checkedGroups;
+ for (int i = 0; i < Groups.size(); i++) {
+ if (m->inUsersGroups(Groups[i], namesGroups)) { checkedGroups.push_back(Groups[i]); }
+ else { m->mothurOut("[WARNING]: " + Groups[i] + " is not a valid group in your groupfile, ignoring.\n"); }
+ }
+
+ if (checkedGroups.size() == 0) { m->mothurOut("[ERROR]: no valid groups, aborting.\n"); delete groupMap; return 0; }
+ else {
+ Groups = checkedGroups;
+ m->setGroups(Groups);
+ }
+
//fill names with names of sequences that are from the groups we want to remove
fillNames();
exit(1);
}
}
+//***************************************************************************************************************
+bool SensSpecCommand::testFile(){
+ try{
+ ifstream fileHandle;
+ m->openInputFile(phylipfile, fileHandle);
+
+ bool square = false;
+ string numTest, name;
+ fileHandle >> numTest >> name;
+
+ if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
+
+ char d;
+ while((d=fileHandle.get()) != EOF){
+ if(isalnum(d)){
+ square = true;
+ break;
+ }
+ if(d == '\n'){
+ square = false;
+ break;
+ }
+ }
+ fileHandle.close();
+
+ return square;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SensSpecCommand", "testFile");
+ exit(1);
+ }
+}
//***************************************************************************************************************
int SensSpecCommand::processPhylip(){
try{
//probably need some checking to confirm that the names in the distance matrix are the same as those in the list file
- string origCutoff = "";
+ square = testFile();
+ string origCutoff = "";
bool getCutoff = 0;
if(cutoff == -1.00) { getCutoff = 1; }
- else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
+ else { origCutoff = toString(cutoff); cutoff += (0.49 / double(precision)); }
map<string, int> seqMap;
string seqList;
set<string> processedLabels;
set<string> userLabels = labels;
-
while((list != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
if(m->control_pressed){
else { trueNegatives++; }
}
}
+
+ if (square) { m->getline(phylipFile); } //get rest of line - redundant distances
+ m->gobble(phylipFile);
}
phylipFile.close();
m->openInputFile(distFile, phylipFile);
string numTest;
int pNumSeqs;
- phylipFile >> numTest;
+ phylipFile >> numTest; m->gobble(phylipFile);
if (!m->isContainingOnlyDigits(numTest)) { m->mothurOut("[ERROR]: expected a number and got " + numTest + ", quitting."); m->mothurOutEndLine(); exit(1); }
else {
m->mothurConvert(numTest, pNumSeqs);
}
- phylipFile >> pNumSeqs; m->gobble(phylipFile);
string seqName;
- double distance;
-
for(int i=0;i<pNumSeqs;i++){
-
if (m->control_pressed) { return ""; }
-
- phylipFile >> seqName;
+ phylipFile >> seqName; m->getline(phylipFile); m->gobble(phylipFile);
uniqueNames.insert(seqName);
-
- for(int j=0;j<i;j++){
- phylipFile >> distance;
- }
- m->gobble(phylipFile);
}
phylipFile.close();
}else {
m->splitAtComma(binnames, bnames);
string newNames = "";
- for (int i = 0; i < bnames.size(); i++) {
- string name = bnames[i];
+ for (int j = 0; j < bnames.size(); j++) {
+ string name = bnames[j];
//if that name is in the .accnos file, add it
if (uniqueNames.count(name) != 0) { newNames += name + ","; }
}
out.close();
if (wroteSomething) { return newListFile; }
+ else { m->mothurRemove(newListFile); }
+
return "";
}
catch(exception& e) {
set<string> labels; //holds labels to be used
long int truePositives, falsePositives, trueNegatives, falseNegatives;
- bool abort, allLines;
+ bool abort, allLines, square;
bool hard;
//string lineLabel;
double cutoff;
int process(map<string, int>&, string, bool&, string&);
int process(set<string>&, string, bool&, string&, int);
string preProcessList();
+ bool testFile();
};
string sname = ""; nameStream >> sname;
sname = sname.substr(1);
- for (int i = 0; i < sname.length(); i++) {
- if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
- }
+ m->checkName(sname);
map<string, int>::iterator it = firstSeqNames.find(sname);
istringstream nameStream(input);
string sname = ""; nameStream >> sname;
- for (int i = 0; i < sname.length(); i++) {
- if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
- }
+ m->checkName(sname);
map<string, int>::iterator it = firstSeqNamesReport.find(sname);
initialize();
name = newName;
- for (int i = 0; i < name.length(); i++) {
- if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
- }
+ m->checkName(name);
//setUnaligned removes any gap characters for us
setUnaligned(sequence);
initialize();
name = newName;
- for (int i = 0; i < name.length(); i++) {
- if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
- }
+ m->checkName(name);
//setUnaligned removes any gap characters for us
setUnaligned(sequence);
name = name.substr(1);
- for (int i = 0; i < name.length(); i++) {
- if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
- }
+ m->checkName(name);
}else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true; }
name = name.substr(1);
- for (int i = 0; i < name.length(); i++) {
- if (name[i] == ':') { name[i] = '_'; m->changedSeqNames = true; }
- }
+ m->checkName(name);
}else{ m->mothurOut("Error in reading your fastafile, at position " + toString(fastaFile.tellg()) + ". Blank name."); m->mothurOutEndLine(); m->control_pressed = true; }
try {
CommandParameter ptempdefault("tempdefault", "String", "", "", "", "", "","",false,false); parameters.push_back(ptempdefault);
CommandParameter pdebug("debug", "Boolean", "", "F", "", "", "","",false,false); parameters.push_back(pdebug);
+ CommandParameter pmodnames("modifynames", "Boolean", "", "T", "", "", "","",false,false); parameters.push_back(pmodnames);
CommandParameter pinput("input", "String", "", "", "", "", "","",false,false,true); parameters.push_back(pinput);
CommandParameter poutput("output", "String", "", "", "", "", "","",false,false,true); parameters.push_back(poutput);
CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
helpString += "The set.dir command can also be used to specify the directory where your input files are located, the directory must exist.\n";
helpString += "The set.dir command can also be used to override or set the default location mothur will look for files if it is unable to find them, the directory must exist.\n";
helpString += "The set.dir command can also be used to run mothur in debug mode.\n";
+ helpString += "The set.dir command can also be used to set the modifynames parameter. Default=t, meaning if your sequence names contain ':' change them to '_' to avoid issues while making trees. modifynames=F will leave sequence names as they are.\n";
helpString += "The set.dir command parameters are input, output, tempdefault and debug and one is required.\n";
helpString += "To run mothur in debug mode set debug=true. Default debug=false.\n";
helpString += "To return the output to the same directory as the input files you may enter: output=clear.\n";
else { debug = m->isTrue(temp); }
m->debug = debug;
+ bool nomod = false;
+ temp = validParameter.validFile(parameters, "modifynames", false);
+ if (temp == "not found") { modifyNames = true; nomod=true; }
+ else { modifyNames = m->isTrue(temp); }
+ m->modifyNames = modifyNames;
+
if (debug) { m->mothurOut("Setting [DEBUG] flag.\n"); }
+
- if ((input == "") && (output == "") && (tempdefault == "") && nodebug) {
- m->mothurOut("You must provide either an input, output, tempdefault or debug for the set.outdir command."); m->mothurOutEndLine(); abort = true;
+ if ((input == "") && (output == "") && (tempdefault == "") && nodebug && nomod) {
+ m->mothurOut("You must provide either an input, output, tempdefault, debug or modifynames for the set.outdir command."); m->mothurOutEndLine(); abort = true;
}else if((input == "") && (output == "") && (tempdefault == "")) { debugOnly = true; }
}
}
private:
CommandFactory* commandFactory;
string output, input, tempdefault;
- bool abort, debugOnly;
+ bool abort, debugOnly, modifyNames;
vector<string> outputNames;
CommandParameter ptdiffs("tdiffs", "Number", "", "0", "", "", "","",false,false); parameters.push_back(ptdiffs);
CommandParameter psignal("signal", "Number", "", "0.50", "", "", "","",false,false); parameters.push_back(psignal);
CommandParameter pnoise("noise", "Number", "", "0.70", "", "", "","",false,false); parameters.push_back(pnoise);
- CommandParameter porder("order", "String", "", "TACG", "", "", "","",false,false); parameters.push_back(porder);
-
+ CommandParameter porder("order", "Multiple", "A-B-I", "A", "", "", "","",false,false, true); parameters.push_back(porder);
//shhh.flows
CommandParameter plookup("lookup", "InputTypes", "", "", "none", "none", "none","",false,false,true); parameters.push_back(plookup);
CommandParameter pcutoff("cutoff", "Number", "", "0.01", "", "", "","",false,false); parameters.push_back(pcutoff);
helpString += "The keepforward parameter allows you to indicate whether you want the forward primer removed or not. The default is F, meaning remove the forward primer.\n";
helpString += "The keepfirst parameter trims the sequence to the first keepfirst number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements. \n";
helpString += "The removelast removes the last removelast number of bases after the barcode or primers are removed, before the sequence is checked to see if it meets the other requirements.\n";
-
+ helpString += "The order parameter options are A, B or I. Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n";
helpString += "Example sff.multiple(file=mySffOligosFile.txt, trim=F).\n";
helpString += "Note: No spaces between parameter labels (i.e. file), '=' and parameters (i.e.mySffOligosFile.txt).\n";
return helpString;
m->setProcessors(temp);
m->mothurConvert(temp, processors);
- flowOrder = validParameter.validFile(parameters, "order", false);
- if (flowOrder == "not found"){ flowOrder = "TACG"; }
- else if(flowOrder.length() != 4){
- m->mothurOut("The value of the order option must be four bases long\n");
- }
+ temp = validParameter.validFile(parameters, "order", false); if (temp == "not found"){ temp = "A"; }
+ if (temp.length() > 1) { m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true;
+ }
+ else {
+ if (toupper(temp[0]) == 'A') { flowOrder = "A"; }
+ else if(toupper(temp[0]) == 'B'){
+ flowOrder = "B"; }
+ else if(toupper(temp[0]) == 'I'){
+ flowOrder = "I"; }
+ else {
+ m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true;
+ }
+ }
+
temp = validParameter.validFile(parameters, "cutoff", false); if (temp == "not found"){ temp = "0.01"; }
m->mothurConvert(temp, cutoff);
string fileroot = outputDir + m->getRootName(m->getSimpleName(filename));
map<string, string> variables;
variables["[filename]"] = fileroot;
- string fasta = fileroot + getOutputFileName("fasta",variables);
- string name = fileroot + getOutputFileName("name",variables);
- string group = fileroot + getOutputFileName("group",variables);
+ string fasta = getOutputFileName("fasta",variables);
+ string name = getOutputFileName("name",variables);
+ string group = getOutputFileName("group",variables);
if (m->control_pressed) { return 0; }
return data[index].abundance;
}
/***********************************************************************/
+//returns vector of abundances
+vector<float> SharedRAbundFloatVector::getAbundances(){
+ vector<float> abunds;
+ for (int i = 0; i < data.size(); i++) {
+ abunds.push_back(data[i].abundance);
+ }
+
+ return abunds;
+}
+/***********************************************************************/
individualFloat SharedRAbundFloatVector::get(int index){
return data[index];
}
individualFloat get(int);
vector <individual> getData();
float getAbundance(int);
+ vector<float> getAbundances();
void push_front(float, int, string); //abundance, otu, groupname
void insert(float, int, string); //abundance, otu, groupname
void push_back(float, string); //abundance, groupname
CommandParameter plarge("large", "Number", "", "-1", "", "", "","",false,false); parameters.push_back(plarge);
CommandParameter psigma("sigma", "Number", "", "60", "", "", "","",false,false); parameters.push_back(psigma);
CommandParameter pmindelta("mindelta", "Number", "", "0.000001", "", "", "","",false,false); parameters.push_back(pmindelta);
- CommandParameter porder("order", "Multiple", "A-B", "A", "", "", "","",false,false, true); parameters.push_back(porder); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter porder("order", "Multiple", "A-B-I", "A", "", "", "","",false,false, true); parameters.push_back(porder); CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
vector<string> myArray;
helpString += "The flow parameter is used to input your flow file.\n";
helpString += "The file parameter is used to input the *flow.files file created by trim.flows.\n";
helpString += "The lookup parameter is used specify the lookup file you would like to use. http://www.mothur.org/wiki/Lookup_files.\n";
- helpString += "The order parameter options are A or B. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC.\n";
+ helpString += "The order parameter options are A, B or I. Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n";
return helpString;
}
catch(exception& e) {
m->mothurConvert(temp, sigma);
temp = validParameter.validFile(parameters, "order", false); if (temp == "not found"){ temp = "A"; }
- if (temp.length() > 1) { m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A or B. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC.\n"); abort=true;
+ if (temp.length() > 1) { m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true;
}
else {
if (toupper(temp[0]) == 'A') { flowOrder = "TACG"; }
- else if(toupper(temp[0]) == 'B'){
+ else if(toupper(temp[0]) == 'B'){
flowOrder = "TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC"; }
+ else if(toupper(temp[0]) == 'I'){
+ flowOrder = "TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC"; }
else {
- m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A or B. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC.\n"); abort=true;
+ m->mothurOut("[ERROR]: " + temp + " is not a valid option for order. order options are A, B, or I. A = TACG, B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC, and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n"); abort=true;
}
}
--- /dev/null
+//
+// sparcccommand.cpp
+// Mothur
+//
+// Created by SarahsWork on 5/10/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#include "sparcccommand.h"
+
+
+//**********************************************************************************************************************
+vector<string> SparccCommand::setParameters(){
+ try {
+ CommandParameter pshared("shared", "InputTypes", "", "", "none", "none", "none","outputType",false,true); parameters.push_back(pshared);
+ CommandParameter pgroups("groups", "String", "", "", "", "", "","",false,false); parameters.push_back(pgroups);
+ CommandParameter plabel("label", "String", "", "", "", "", "","",false,false); parameters.push_back(plabel);
+ CommandParameter psamplings("samplings", "Number", "", "20", "", "", "","",false,false,false); parameters.push_back(psamplings);
+ CommandParameter piterations("iterations", "Number", "", "10", "", "", "","",false,false,false); parameters.push_back(piterations);
+ CommandParameter ppermutations("permutations", "Number", "", "1000", "", "", "","",false,false,false); parameters.push_back(ppermutations);
+ CommandParameter pmethod("method", "Multiple", "relabund-dirichlet", "dirichlet", "", "", "","",false,false); parameters.push_back(pmethod);
+ CommandParameter pprocessors("processors", "Number", "", "1", "", "", "","",false,false,true); parameters.push_back(pprocessors);
+ //every command must have inputdir and outputdir. This allows mothur users to redirect input and output files.
+ CommandParameter pinputdir("inputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(pinputdir);
+ CommandParameter poutputdir("outputdir", "String", "", "", "", "", "","",false,false); parameters.push_back(poutputdir);
+
+ vector<string> myArray;
+ for (int i = 0; i < parameters.size(); i++) { myArray.push_back(parameters[i].name); }
+ return myArray;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparccCommand", "setParameters");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string SparccCommand::getHelpString(){
+ try {
+ string helpString = "";
+ helpString += "The sparcc command allows you to ....\n";
+ helpString += "The sparcc command parameters are: shared, groups, label, samplings, iterations, permutations, processors and method.\n";
+ helpString += "The samplings parameter is used to .... Default=20.\n";
+ helpString += "The iterations parameter is used to ....Default=10.\n";
+ helpString += "The permutations parameter is used to ....Default=1000.\n";
+ helpString += "The method parameter is used to ....Options are relabund and dirichlet. Default=dirichlet.\n";
+ helpString += "The default value for groups is all the groups in your sharedfile.\n";
+ helpString += "The label parameter is used to analyze specific labels in your shared file.\n";
+ helpString += "The sparcc command should be in the following format: sparcc(shared=yourSharedFile)\n";
+ helpString += "sparcc(shared=final.an.shared)\n";
+ return helpString;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparccCommand", "getHelpString");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+string SparccCommand::getOutputPattern(string type) {
+ try {
+ string pattern = "";
+
+ if (type == "corr") { pattern = "[filename],[distance],sparcc_correlation"; }
+ else if (type == "pvalue") { pattern = "[filename],[distance],sparcc_pvalue"; }
+ else if (type == "sparccrelabund") { pattern = "[filename],[distance],sparcc_relabund"; }
+ else { m->mothurOut("[ERROR]: No definition for type " + type + " output pattern.\n"); m->control_pressed = true; }
+
+ return pattern;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparccCommand", "getOutputPattern");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+SparccCommand::SparccCommand(){
+ try {
+ abort = true; calledHelp = true;
+ setParameters();
+ vector<string> tempOutNames;
+ outputTypes["corr"] = tempOutNames;
+ outputTypes["pvalue"] = tempOutNames;
+ outputTypes["sparccrelabund"] = tempOutNames;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparccCommand", "SparccCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+SparccCommand::SparccCommand(string option) {
+ try {
+ abort = false; calledHelp = false;
+ allLines = 1;
+
+ //allow user to run help
+ if(option == "help") { help(); abort = true; calledHelp = true; }
+ else if(option == "citation") { citation(); abort = true; calledHelp = true;}
+
+ else {
+ //valid paramters for this command
+ vector<string> myArray = setParameters();
+
+ OptionParser parser(option);
+ map<string,string> parameters = parser.getParameters();
+
+ ValidParameters validParameter;
+ map<string,string>::iterator it;
+ //check to make sure all parameters are valid for command
+ for (it = parameters.begin(); it != parameters.end(); it++) {
+ if (validParameter.isValidParameter(it->first, myArray, it->second) != true) { abort = true; }
+ }
+
+ vector<string> tempOutNames;
+ outputTypes["corr"] = tempOutNames; //filetypes should be things like: shared, fasta, accnos...
+ outputTypes["pvalue"] = tempOutNames;
+ outputTypes["sparccrelabund"] = tempOutNames;
+
+ //if the user changes the input directory command factory will send this info to us in the output parameter
+ string inputDir = validParameter.validFile(parameters, "inputdir", false);
+ if (inputDir == "not found"){ inputDir = ""; }
+ else {
+
+ string path;
+ it = parameters.find("shared");
+ //user has given a template file
+ if(it != parameters.end()){
+ path = m->hasPath(it->second);
+ //if the user has not given a path then, add inputdir. else leave path alone.
+ if (path == "") { parameters["shared"] = inputDir + it->second; }
+ }
+ }
+
+ //check for parameters
+ //get shared file, it is required
+ sharedfile = validParameter.validFile(parameters, "shared", true);
+ if (sharedfile == "not open") { sharedfile = ""; abort = true; }
+ else if (sharedfile == "not found") {
+ //if there is a current shared file, use it
+ sharedfile = m->getSharedFile();
+ if (sharedfile != "") { m->mothurOut("Using " + sharedfile + " as input file for the shared parameter."); m->mothurOutEndLine(); }
+ else { m->mothurOut("You have no current sharedfile and the shared parameter is required."); m->mothurOutEndLine(); abort = true; }
+ }else { m->setSharedFile(sharedfile); }
+
+ //if the user changes the output directory command factory will send this info to us in the output parameter
+ outputDir = validParameter.validFile(parameters, "outputdir", false); if (outputDir == "not found"){
+ outputDir = m->hasPath(sharedfile); //if user entered a file with a path then preserve it
+ }
+
+ normalizeMethod = validParameter.validFile(parameters, "method", false);
+ if (normalizeMethod == "not found") { normalizeMethod = "dirichlet"; }
+ if ((normalizeMethod == "dirichlet") || (normalizeMethod == "relabund")) { }
+ else { m->mothurOut(normalizeMethod + " is not a valid method. Valid methods are dirichlet and relabund."); m->mothurOutEndLine(); abort = true; }
+
+
+ string temp = validParameter.validFile(parameters, "samplings", false); if (temp == "not found"){ temp = "20"; }
+ m->mothurConvert(temp, numSamplings);
+
+ if(normalizeMethod == "relabund"){ numSamplings = 1; }
+
+ temp = validParameter.validFile(parameters, "iterations", false); if (temp == "not found"){ temp = "10"; }
+ m->mothurConvert(temp, maxIterations);
+
+ temp = validParameter.validFile(parameters, "permutations", false); if (temp == "not found"){ temp = "1000"; }
+ m->mothurConvert(temp, numPermutations);
+
+ temp = validParameter.validFile(parameters, "processors", false); if (temp == "not found"){ temp = m->getProcessors(); }
+ m->setProcessors(temp);
+ m->mothurConvert(temp, processors);
+
+ string groups = validParameter.validFile(parameters, "groups", false);
+ if (groups == "not found") { groups = ""; }
+ else { m->splitAtDash(groups, Groups); }
+ m->setGroups(Groups);
+
+ string label = validParameter.validFile(parameters, "label", false);
+ if (label == "not found") { label = ""; }
+ else {
+ if(label != "all") { m->splitAtDash(label, labels); allLines = 0; }
+ else { allLines = 1; }
+ }
+ }
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparccCommand", "SparccCommand");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int SparccCommand::execute(){
+ try {
+
+ if (abort == true) { if (calledHelp) { return 0; } return 2; }
+
+ int start = time(NULL);
+
+ InputData input(sharedfile, "sharedfile");
+ vector<SharedRAbundVector*> lookup = input.getSharedRAbundVectors();
+ string lastLabel = lookup[0]->getLabel();
+
+ //if the users enters label "0.06" and there is no "0.06" in their file use the next lowest label.
+ set<string> processedLabels;
+ set<string> userLabels = labels;
+
+ //as long as you are not at the end of the file or done wih the lines you want
+ while((lookup[0] != NULL) && ((allLines == 1) || (userLabels.size() != 0))) {
+
+ if (m->control_pressed) { for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; } for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); }return 0; }
+
+ if(allLines == 1 || labels.count(lookup[0]->getLabel()) == 1){
+
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ process(lookup);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+ }
+
+ if ((m->anyLabelsToProcess(lookup[0]->getLabel(), userLabels, "") == true) && (processedLabels.count(lastLabel) != 1)) {
+ string saveLabel = lookup[0]->getLabel();
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ process(lookup);
+
+ processedLabels.insert(lookup[0]->getLabel());
+ userLabels.erase(lookup[0]->getLabel());
+
+ //restore real lastlabel to save below
+ lookup[0]->setLabel(saveLabel);
+ }
+
+ lastLabel = lookup[0]->getLabel();
+ //prevent memory leak
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; lookup[i] = NULL; }
+
+ if (m->control_pressed) { return 0; }
+
+ //get next line to process
+ lookup = input.getSharedRAbundVectors();
+ }
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ //output error messages about any remaining user labels
+ set<string>::iterator it;
+ bool needToRun = false;
+ for (it = userLabels.begin(); it != userLabels.end(); it++) {
+ m->mothurOut("Your file does not include the label " + *it);
+ if (processedLabels.count(lastLabel) != 1) {
+ m->mothurOut(". I will use " + lastLabel + "."); m->mothurOutEndLine();
+ needToRun = true;
+ }else {
+ m->mothurOut(". Please refer to " + lastLabel + "."); m->mothurOutEndLine();
+ }
+ }
+
+ //run last label if you need to
+ if (needToRun == true) {
+ for (int i = 0; i < lookup.size(); i++) { if (lookup[i] != NULL) { delete lookup[i]; } }
+ lookup = input.getSharedRAbundVectors(lastLabel);
+
+ m->mothurOut(lookup[0]->getLabel()); m->mothurOutEndLine();
+
+ process(lookup);
+
+ for (int i = 0; i < lookup.size(); i++) { delete lookup[i]; }
+ }
+
+ if (m->control_pressed) { for (int i = 0; i < outputNames.size(); i++) { m->mothurRemove(outputNames[i]); } return 0; }
+
+ m->mothurOut("It took " + toString(time(NULL) - start) + " seconds to process.");
+ m->mothurOutEndLine();
+ m->mothurOutEndLine();
+
+ //output files created by command
+ m->mothurOutEndLine();
+ m->mothurOut("Output File Names: "); m->mothurOutEndLine();
+ for (int i = 0; i < outputNames.size(); i++) { m->mothurOut(outputNames[i]); m->mothurOutEndLine(); }
+ m->mothurOutEndLine();
+ return 0;
+
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparccCommand", "execute");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+vector<vector<float> > SparccCommand::shuffleSharedVector(vector<vector<float> >& sharedVector){
+ try {
+ int numGroups = (int)sharedVector.size();
+ int numOTUs = (int)sharedVector[0].size();
+
+ vector<vector<float> > shuffledVector = sharedVector;
+
+ for(int i=0;i<numGroups;i++){
+ for(int j=0;j<numOTUs;j++){
+ shuffledVector[i][j] = sharedVector[rand()%numGroups][j];
+ }
+ }
+
+ return shuffledVector;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparccCommand", "execute");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+int SparccCommand::process(vector<SharedRAbundVector*>& lookup){
+ try {
+ cout.setf(ios::fixed, ios::floatfield);
+ cout.setf(ios::showpoint);
+
+ vector<vector<float> > sharedVector;
+ vector<string> otuNames = m->currentBinLabels;
+
+ //fill sharedVector to pass to CalcSparcc
+ for (int i = 0; i < lookup.size(); i++) {
+ vector<int> abunds = lookup[i]->getAbundances();
+ vector<float> temp;
+ for (int j = 0; j < abunds.size(); j++) { temp.push_back((float) abunds[j]); }
+ sharedVector.push_back(temp);
+ }
+ int numOTUs = (int)sharedVector[0].size();
+ int numGroups = lookup.size();
+
+ map<string, string> variables;
+ variables["[filename]"] = outputDir + m->getRootName(m->getSimpleName(sharedfile));
+ variables["[distance]"] = lookup[0]->getLabel();
+
+
+ string relAbundFileName = getOutputFileName("sparccrelabund", variables);
+ ofstream relAbundFile;
+ m->openOutputFile(relAbundFileName, relAbundFile);
+ outputNames.push_back(relAbundFileName); outputTypes["sparccrelabund"].push_back(relAbundFileName);
+
+ relAbundFile << "OTU\taveRelAbund\n";
+ for(int i=0;i<numOTUs;i++){
+ if (m->control_pressed) { relAbundFile.close(); return 0; }
+
+ double relAbund = 0.0000;
+ for(int j=0;j<numGroups;j++){
+ relAbund += sharedVector[j][i]/(double)lookup[j]->getNumSeqs();
+ }
+ relAbundFile << otuNames[i] <<'\t' << relAbund / (double) numGroups << endl;
+ }
+ relAbundFile.close();
+
+ CalcSparcc originalData(sharedVector, maxIterations, numSamplings, normalizeMethod);
+ vector<vector<float> > origCorrMatrix = originalData.getRho();
+
+ string correlationFileName = getOutputFileName("corr", variables);
+ ofstream correlationFile;
+ m->openOutputFile(correlationFileName, correlationFile);
+ outputNames.push_back(correlationFileName); outputTypes["corr"].push_back(correlationFileName);
+ correlationFile.setf(ios::fixed, ios::floatfield);
+ correlationFile.setf(ios::showpoint);
+
+ for(int i=0;i<numOTUs;i++){ correlationFile << '\t' << otuNames[i]; } correlationFile << endl;
+ for(int i=0;i<numOTUs;i++){
+ correlationFile << otuNames[i];
+ for(int j=0;j<numOTUs;j++){
+ correlationFile << '\t' << origCorrMatrix[i][j];
+ }
+ correlationFile << endl;
+ }
+
+
+ if(numPermutations != 0){
+ vector<vector<float> > pValues = createProcesses(sharedVector, origCorrMatrix);
+
+ if (m->control_pressed) { return 0; }
+
+ string pValueFileName = getOutputFileName("pvalue", variables);
+ ofstream pValueFile;
+ m->openOutputFile(pValueFileName, pValueFile);
+ outputNames.push_back(pValueFileName); outputTypes["pvalue"].push_back(pValueFileName);
+ pValueFile.setf(ios::fixed, ios::floatfield);
+ pValueFile.setf(ios::showpoint);
+
+ for(int i=0;i<numOTUs;i++){ pValueFile << '\t' << otuNames[i]; } pValueFile << endl;
+ for(int i=0;i<numOTUs;i++){
+ pValueFile << otuNames[i];
+ for(int j=0;j<numOTUs;j++){
+ pValueFile << '\t' << pValues[i][j];
+ }
+ pValueFile << endl;
+ }
+ }
+
+
+ return 0;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparccCommand", "process");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+vector<vector<float> > SparccCommand::createProcesses(vector<vector<float> >& sharedVector, vector<vector<float> >& origCorrMatrix){
+ try {
+ int numOTUs = sharedVector[0].size();
+ vector<vector<float> > pValues;
+
+ if(processors == 1){
+ pValues = driver(sharedVector, origCorrMatrix, numPermutations);
+ }else{
+ //divide iters between processors
+ vector<int> procIters;
+ int numItersPerProcessor = numPermutations / processors;
+
+ //divide iters between processes
+ for (int h = 0; h < processors; h++) {
+ if(h == processors - 1){ numItersPerProcessor = numPermutations - h * numItersPerProcessor; }
+ procIters.push_back(numItersPerProcessor);
+ }
+
+ vector<int> processIDS;
+ int process = 1;
+
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+
+ //loop through and create all the processes you want
+ while (process != processors) {
+ int pid = fork();
+
+ if (pid > 0) {
+ processIDS.push_back(pid); //create map from line number to pid so you can append files in correct order later
+ process++;
+ }else if (pid == 0){
+ pValues = driver(sharedVector, origCorrMatrix, procIters[process]);
+
+ //pass pvalues to parent
+ ofstream out;
+ string tempFile = toString(getpid()) + ".pvalues.temp";
+ m->openOutputFile(tempFile, out);
+
+ //pass values
+ for (int i = 0; i < pValues.size(); i++) {
+ for (int j = 0; j < pValues[i].size(); j++) {
+ out << pValues[i][j] << '\t';
+ }
+ out << endl;
+ }
+ out << endl;
+
+ out.close();
+
+ exit(0);
+ }else {
+ m->mothurOut("[ERROR]: unable to spawn the necessary processes."); m->mothurOutEndLine();
+ for (int i = 0; i < processIDS.size(); i++) { kill (processIDS[i], SIGINT); }
+ exit(0);
+ }
+ }
+
+ //do my part
+ pValues = driver(sharedVector, origCorrMatrix, procIters[0]);
+
+ //force parent to wait until all the processes are done
+ for (int i=0;i<processIDS.size();i++) {
+ int temp = processIDS[i];
+ wait(&temp);
+ }
+
+ //combine results
+ for (int i = 0; i < processIDS.size(); i++) {
+ ifstream in;
+ string tempFile = toString(processIDS[i]) + ".pvalues.temp";
+ m->openInputFile(tempFile, in);
+
+ ////// to do ///////////
+ int numTemp; numTemp = 0;
+
+ for (int j = 0; j < pValues.size(); j++) {
+ for (int k = 0; k < pValues.size(); k++) {
+ in >> numTemp; m->gobble(in);
+ pValues[j][k] += numTemp;
+ }
+ m->gobble(in);
+ }
+ in.close(); m->mothurRemove(tempFile);
+ }
+#else
+
+ //fill in functions
+ vector<sparccData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+
+ //Create processor worker threads.
+ for( int i=1; i<processors; i++ ){
+
+ //make copy so we don't get access violations
+ vector< vector<float> > copySharedVector = sharedVector;
+ vector< vector<float> > copyOrig = origCorrMatrix;
+
+ sparccData* temp = new sparccData(m, procIters[i], copySharedVector, copyOrig, numSamplings, maxIterations, numPermutations, normalizeMethod);
+ pDataArray.push_back(temp);
+ processIDS.push_back(i);
+
+ hThreadArray[i-1] = CreateThread(NULL, 0, MySparccThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+ }
+
+ //do my part
+ pValues = driver(sharedVector, origCorrMatrix, procIters[0]);
+
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ for (int j = 0; j < pDataArray[i]->pValues.size(); j++) {
+ for (int k = 0; k < pDataArray[i]->pValues[j].size(); k++) {
+ pValues[j][k] += pDataArray[i]->pValues[j][k];
+ }
+ }
+
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
+#endif
+ }
+
+ for(int i=0;i<numOTUs;i++){
+ pValues[i][i] = 1;
+ for(int j=0;j<i;j++){
+ pValues[i][j] /= (double)numPermutations;
+ pValues[j][i] = pValues[i][j];
+ }
+ }
+
+ return pValues;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparccCommand", "createProcesses");
+ exit(1);
+ }
+}
+
+//**********************************************************************************************************************
+vector<vector<float> > SparccCommand::driver(vector<vector<float> >& sharedVector, vector<vector<float> >& origCorrMatrix, int numPerms){
+ try {
+ int numOTUs = sharedVector[0].size();
+ vector<vector<float> > sharedShuffled = sharedVector;
+ vector<vector<float> > pValues(numOTUs);
+ for(int i=0;i<numOTUs;i++){ pValues[i].assign(numOTUs, 0); }
+
+ for(int i=0;i<numPerms;i++){
+ if (m->control_pressed) { return pValues; }
+ sharedShuffled = shuffleSharedVector(sharedVector);
+ CalcSparcc permutedData(sharedShuffled, maxIterations, numSamplings, normalizeMethod);
+ vector<vector<float> > permuteCorrMatrix = permutedData.getRho();
+
+ for(int j=0;j<numOTUs;j++){
+ for(int k=0;k<j;k++){
+ double randValue = permuteCorrMatrix[j][k];
+ double observedValue = origCorrMatrix[j][k];
+ if(observedValue >= 0 && randValue > observedValue) { pValues[j][k]++; }//this method seems to deflate the
+ else if(observedValue < 0 && randValue < observedValue){ pValues[j][k]++; }//pvalues of small rho values
+ }
+ }
+ if((i+1) % (int)(numPermutations * 0.05) == 0){ cout << i+1 << endl; }
+ }
+
+ return pValues;
+ }
+ catch(exception& e) {
+ m->errorOut(e, "SparccCommand", "driver");
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
--- /dev/null
+//
+// sparcccommand.h
+// Mothur
+//
+// Created by SarahsWork on 5/10/13.
+// Copyright (c) 2013 Schloss Lab. All rights reserved.
+//
+
+#ifndef Mothur_sparcccommand_h
+#define Mothur_sparcccommand_h
+
+#include "command.hpp"
+#include "inputdata.h"
+#include "calcsparcc.h"
+
+/**************************************************************************************************/
+
+class SparccCommand : public Command {
+public:
+ SparccCommand(string);
+ SparccCommand();
+ ~SparccCommand(){}
+
+ vector<string> setParameters();
+ string getCommandName() { return "sparcc"; }
+ string getCommandCategory() { return "OTU-Based Approaches"; }
+
+ string getOutputPattern(string);
+ //commmand category choices: Sequence Processing, OTU-Based Approaches, Hypothesis Testing, Phylotype Analysis, General, Clustering and Hidden
+ string getHelpString();
+ string getCitation() { return "Friedman J, Alm EJ (2012) Inferring Correlation Networks from Genomic Survey Data. PLoS Comput Biol 8(9): e1002687. doi:10.1371/journal.pcbi.1002687 http://www.mothur.org/wiki/Sparcc"; }
+ string getDescription() { return "brief description"; }
+
+ int execute();
+ void help() { m->mothurOut(getHelpString()); }
+
+private:
+ bool abort, allLines;
+ string outputDir, sharedfile, normalizeMethod;
+ int numSamplings, maxIterations, numPermutations, processors;
+ set<string> labels;
+ vector<string> Groups;
+ vector<string> outputNames;
+
+ int process(vector<SharedRAbundVector*>&);
+ vector<vector<float> > createProcesses(vector<vector<float> >&, vector<vector<float> >&);
+ vector<vector<float> > driver(vector<vector<float> >&, vector<vector<float> >&, int);
+ vector<vector<float> > shuffleSharedVector(vector<vector<float> >&);
+};
+
+/**************************************************************************************************/
+
+struct sparccData {
+ MothurOut* m;
+ int numPerms;
+ vector< vector<float> > sharedVector;
+ vector< vector<float> > origCorrMatrix;
+ vector<vector<float> > pValues;
+ int numSamplings, maxIterations, numPermutations;
+ string normalizeMethod;
+
+ sparccData(){}
+ sparccData(MothurOut* mout, int it, vector< vector<float> > cs, vector< vector<float> > co, int ns, int mi, int np, string nm) {
+ m = mout;
+ numPerms = it;
+ sharedVector = cs;
+ origCorrMatrix = co;
+ numSamplings = ns;
+ maxIterations = mi;
+ numPermutations = np;
+ normalizeMethod = nm;
+ }
+};
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MySparccThreadFunction(LPVOID lpParam){
+ sparccData* pDataArray;
+ pDataArray = (sparccData*)lpParam;
+
+ try {
+
+ int numOTUs = pDataArray->sharedVector[0].size();
+ vector<vector<float> > sharedShuffled = pDataArray->sharedVector;
+ pDataArray->pValues.resize(numOTUs);
+ for(int i=0;i<numOTUs;i++){ pDataArray->pValues[i].assign(numOTUs, 0); }
+
+ for(int i=0;i<pDataArray->numPerms;i++){
+ if (pDataArray->m->control_pressed) { return 0; }
+
+ //sharedShuffled = shuffleSharedVector(sharedVector);
+ //////////////////////////////////////////////////////////
+ int numGroups = (int)pDataArray->sharedVector.size();
+ sharedShuffled = pDataArray->sharedVector;
+
+ for(int k=0;k<numGroups;k++){
+ for(int j=0;j<numOTUs;j++){
+ sharedShuffled[k][j] = pDataArray->sharedVector[rand()%numGroups][j];
+ }
+ }
+ /////////////////////////////////////////////////////////
+
+ CalcSparcc permutedData(sharedShuffled, pDataArray->maxIterations, pDataArray->numSamplings, pDataArray->normalizeMethod);
+ vector<vector<float> > permuteCorrMatrix = permutedData.getRho();
+
+ for(int j=0;j<numOTUs;j++){
+ for(int k=0;k<j;k++){
+ double randValue = permuteCorrMatrix[j][k];
+ double observedValue = pDataArray->origCorrMatrix[j][k];
+ if(observedValue >= 0 && randValue > observedValue) { pDataArray->pValues[j][k]++; }//this method seems to deflate the
+ else if(observedValue < 0 && randValue < observedValue){ pDataArray->pValues[j][k]++; }//pvalues of small rho values
+ }
+ }
+ if((i+1) % (int)(pDataArray->numPermutations * 0.05) == 0){ cout << i+1 << endl; }
+ }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ pDataArray->m->errorOut(e, "SparccCommand", "MySparccThreadFunction");
+ exit(1);
+ }
+}
+#endif
+
+
+/**************************************************************************************************/
+
+
+
+
+#endif
}
copyGroups.clear();
-
+
//process each distance file
for (int i = 0; i < numGroups; i++) {
else { m->mothurRemove((countfile + "." + toString(i) + ".temp")); }
}
+ //restore old fasta file name since dist.seqs overwrites it with the temp files
+ m->setFastaFile(fastafile);
+
vector<string> tempDistFiles;
for(int i=0;i<numGroups;i++){
if (outputDir == "") { outputDir = m->hasPath(fastafile); }
int numGroups = 0;
- ofstream outFile;
+ //ofstream outFile;
ifstream dFile;
m->openInputFile(distFile, dFile);
//have we reached the max buffer size
if (numOutputs[groupID] > 60) { //write out sequence
+ ofstream outFile;
outFile.open(fileName.c_str(), ios::app);
outFile << outputs[groupID] << seqA << '\t' << seqB << '\t' << dist << endl;
outFile.close();
//if groupB is written to file it is above buffer size so read and write to new merged file
if (wroteOutPut[groupIDB]) {
string fileName2 = distFile + "." + toString(groupIDB) + ".temp";
- ifstream fileB(fileName2.c_str(), ios::ate);
+ /*ifstream fileB(fileName2.c_str(), ios::ate);
outFile.open(fileName.c_str(), ios::app);
outFile << temp.substr(0, lastRead);
delete memblock;
- fileB.close();
+ fileB.close();*/
+ m->appendFiles(fileName2, fileName);
m->mothurRemove(fileName2);
+
//write out the merged memory
if (numOutputs[groupID] > 60) {
- outFile << outputs[groupID];
+ ofstream tempOut;
+ m->openOutputFile(fileName, tempOut);
+ tempOut << outputs[groupID];
outputs[groupID] = "";
numOutputs[groupID] = 0;
+ tempOut.close();
}
- outFile.close();
+ //outFile.close();
wroteOutPut[groupID] = true;
wroteOutPut[groupIDB] = false;
if (wroteOutPut[groupIDA]) {
string fileName2 = distFile + "." + toString(groupIDA) + ".temp";
- ifstream fileB(fileName2.c_str(), ios::ate);
+ /*ifstream fileB(fileName2.c_str(), ios::ate);
outFile.open(fileName.c_str(), ios::app);
delete memblock;
- fileB.close();
+ fileB.close();*/
+ m->appendFiles(fileName2, fileName);
m->mothurRemove(fileName2);
//write out the merged memory
if (numOutputs[groupID] > 60) {
- outFile << outputs[groupID];
+ ofstream tempOut;
+ m->openOutputFile(fileName, tempOut);
+ tempOut << outputs[groupID];
outputs[groupID] = "";
numOutputs[groupID] = 0;
+ tempOut.close();
}
- outFile.close();
+ //outFile.close();
wroteOutPut[groupID] = true;
wroteOutPut[groupIDA] = false;
//remove old names files just in case
if (numOutputs[i] > 0) {
+ ofstream outFile;
outFile.open(fileName.c_str(), ios::app);
outFile << outputs[i];
outFile.close();
vector<summarySharedData*> pDataArray;
DWORD dwThreadIdArray[processors-1];
- HANDLE hThreadArray[processors-1];
+ HANDLE hThreadArray[processors-1];
//Create processor worker threads.
for( int i=1; i<processors; i++ ){
temp->setGroup(thisLookup[k]->getGroup());
newLookup.push_back(temp);
}
+
//for each bin
for (int k = 0; k < thisLookup[0]->getNumBins(); k++) {
if (lookup.size() < 2) { m->mothurOut("You have not provided enough valid groups. I cannot run the command."); m->mothurOutEndLine(); m->control_pressed = true; return 0; }
}
-
numGroups = lookup.size();
+
+ //sanity check to make sure processors < numComparisions
+ int numDists = 0;
+ for(int i=0;i<numGroups;i++){
+ for(int j=0;j<i;j++){
+ numDists++;
+ if (numDists > processors) { break; }
+ }
+ }
+ if (numDists < processors) { processors = numDists; }
+
lines.resize(processors);
for (int i = 0; i < processors; i++) {
lines[i].start = int (sqrt(float(i)/float(processors)) * numGroups);
thisItersLookup.clear();
for (int i = 0; i < calcDists.size(); i++) { calcDists[i].clear(); }
}
+
+ if (m->debug) { m->mothurOut("[DEBUG]: iter = " + toString(thisIter) + ".\n"); }
}
if (m->debug) { m->mothurOut("[DEBUG]: done with iters.\n"); }
try {
string helpString = "";
helpString += "The trim.flows command reads a flowgram file and creates .....\n";
+ helpString += "The order parameter options are A, B or I. Default=A. A = TACG and B = TACGTACGTACGATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATAGATCGCATGACGATCGCATATCGTCAGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGTAGTCGAGCATCATCTGACGCAGTACGTGCATGATCTCAGTCAGCAGCTATGTCAGTGCATGCATAGATCGCATGACGATCGCATATCGTCAGTGCAGTGACTGATCGTCATCAGCTAGCATCGACTGCATGATCTCAGTCAGCAGC and I = TACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGCTACGTACGTCTGAGCATCGATCGATGTACAGC.\n";
helpString += "Note: No spaces between parameter labels (i.e. fasta), '=' and parameters (i.e.yourFasta).\n";
helpString += "For more details please check out the wiki http://www.mothur.org/wiki/Trim.flows.\n";
return helpString;
string sname = ""; nameStream >> sname;
sname = sname.substr(1);
- for (int i = 0; i < sname.length(); i++) {
- if (sname[i] == ':') { sname[i] = '_'; m->changedSeqNames = true; }
- }
+ m->checkName(sname);
map<string, int>::iterator it = firstSeqNames.find(sname);
namesOfGroupCombos.push_back(groups);
}
}
+
+ lines.clear();
+ int numPairs = namesOfGroupCombos.size();
+ int numPairsPerProcessor = numPairs / processors;
+
+ for (int i = 0; i < processors; i++) {
+ int startPos = i * numPairsPerProcessor;
+ if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; }
+ lines.push_back(linePair(startPos, numPairsPerProcessor));
+ }
+
+ data = createProcesses(t, namesOfGroupCombos, ct);
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- if(processors == 1){
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
- }else{
- int numPairs = namesOfGroupCombos.size();
-
- int numPairsPerProcessor = numPairs / processors;
-
- for (int i = 0; i < processors; i++) {
- int startPos = i * numPairsPerProcessor;
-
- if(i == processors - 1){
- numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
- }
-
- lines.push_back(linePair(startPos, numPairsPerProcessor));
- }
- data = createProcesses(t, namesOfGroupCombos, ct);
- lines.clear();
- }
- #else
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), ct);
- #endif
-
+ lines.clear();
+
return data;
}
catch(exception& e) {
EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, CountTable* ct) {
try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- int process = 1;
+ int process = 1;
vector<int> processIDS;
EstOutput results;
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+
//loop through and create all the processes you want
while (process != processors) {
in.close();
m->mothurRemove(s);
}
+#else
+ //fill in functions
+ vector<unweightedData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+ vector<CountTable*> cts;
+ vector<Tree*> trees;
+
+ //Create processor worker threads.
+ for( int i=1; i<processors; i++ ){
+ CountTable* copyCount = new CountTable();
+ copyCount->copy(ct);
+ Tree* copyTree = new Tree(copyCount);
+ copyTree->getCopy(t);
+
+ cts.push_back(copyCount);
+ trees.push_back(copyTree);
+
+ unweightedData* tempweighted = new unweightedData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount, includeRoot);
+ pDataArray.push_back(tempweighted);
+ processIDS.push_back(i);
+
+ hThreadArray[i-1] = CreateThread(NULL, 0, MyUnWeightedThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+ }
+
+ results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, ct);
- //m->mothurOut("DONE."); m->mothurOutEndLine(); m->mothurOutEndLine();
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
- return results;
-#endif
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ for (int j = 0; j < pDataArray[i]->results.size(); j++) { results.push_back(pDataArray[i]->results[j]); }
+ delete cts[i];
+ delete trees[i];
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ }
+
+#endif
+ return results;
}
catch(exception& e) {
m->errorOut(e, "Unweighted", "createProcesses");
int count = 0;
int total = num;
- int twentyPercent = (total * 0.20);
- if (twentyPercent == 0) { twentyPercent = 1; }
-
-
+
for (int h = start; h < (start+num); h++) {
if (m->control_pressed) { return results; }
}
count++;
- //report progress
- //if((count % twentyPercent) == 0) { float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine(); }
- }
-
- //report progress
- //if((count % twentyPercent) != 0) { float tempOut = (count / (float)total); if (isnan(tempOut) || isinf(tempOut)) { tempOut = 0.0; } m->mothurOut("Percentage complete: " + toString((int(tempOut) * 100.0))); m->mothurOutEndLine(); }
+ }
return results;
}
namesOfGroupCombos.push_back(groups);
}
}
+
+ lines.clear();
+ int numPairs = namesOfGroupCombos.size();
+ int numPairsPerProcessor = numPairs / processors;
+
+ for (int i = 0; i < processors; i++) {
+ int startPos = i * numPairsPerProcessor;
+ if(i == processors - 1){ numPairsPerProcessor = numPairs - i * numPairsPerProcessor; }
+ lines.push_back(linePair(startPos, numPairsPerProcessor));
+ }
+
+ data = createProcesses(t, namesOfGroupCombos, true, ct);
+ lines.clear();
- #if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- if(processors == 1){
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true, ct);
- }else{
- int numPairs = namesOfGroupCombos.size();
-
- int numPairsPerProcessor = numPairs / processors;
-
- for (int i = 0; i < processors; i++) {
- int startPos = i * numPairsPerProcessor;
- if(i == processors - 1){
- numPairsPerProcessor = numPairs - i * numPairsPerProcessor;
- }
- lines.push_back(linePair(startPos, numPairsPerProcessor));
- }
-
- data = createProcesses(t, namesOfGroupCombos, true, ct);
-
- lines.clear();
- }
- #else
- data = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), true, ct);
- #endif
-
return data;
}
catch(exception& e) {
EstOutput Unweighted::createProcesses(Tree* t, vector< vector<string> > namesOfGroupCombos, bool usingGroups, CountTable* ct) {
try {
-#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
- int process = 1;
+ int process = 1;
vector<int> processIDS;
EstOutput results;
-
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+
//loop through and create all the processes you want
while (process != processors) {
int pid = fork();
in.close();
m->mothurRemove(s);
}
+#else
+ //for some reason it doesn't seem to be calculating hte random trees scores. all scores are the same even though copytree appears to be randomized.
+
+ /*
+ //fill in functions
+ vector<unweightedData*> pDataArray;
+ DWORD dwThreadIdArray[processors-1];
+ HANDLE hThreadArray[processors-1];
+ vector<CountTable*> cts;
+ vector<Tree*> trees;
+
+ //Create processor worker threads.
+ for( int i=1; i<processors; i++ ){
+ CountTable* copyCount = new CountTable();
+ copyCount->copy(ct);
+ Tree* copyTree = new Tree(copyCount);
+ copyTree->getCopy(t);
+
+ cts.push_back(copyCount);
+ trees.push_back(copyTree);
+
+ unweightedData* tempweighted = new unweightedData(m, lines[i].start, lines[i].num, namesOfGroupCombos, copyTree, copyCount, includeRoot);
+ pDataArray.push_back(tempweighted);
+ processIDS.push_back(i);
+
+ hThreadArray[i-1] = CreateThread(NULL, 0, MyUnWeightedRandomThreadFunction, pDataArray[i-1], 0, &dwThreadIdArray[i-1]);
+ }
+
+ results = driver(t, namesOfGroupCombos, lines[0].start, lines[0].num, usingGroups, ct);
- return results;
-#endif
+ //Wait until all threads have terminated.
+ WaitForMultipleObjects(processors-1, hThreadArray, TRUE, INFINITE);
+
+ //Close all thread handles and free memory allocations.
+ for(int i=0; i < pDataArray.size(); i++){
+ for (int j = 0; j < pDataArray[i]->results.size(); j++) { results.push_back(pDataArray[i]->results[j]); }
+ delete cts[i];
+ delete trees[i];
+ CloseHandle(hThreadArray[i]);
+ delete pDataArray[i];
+ } */
+
+ results = driver(t, namesOfGroupCombos, 0, namesOfGroupCombos.size(), usingGroups, ct);
+#endif
+ return results;
}
catch(exception& e) {
m->errorOut(e, "Unweighted", "createProcesses");
if (isnan(UW) || isinf(UW)) { UW = 0; }
results[count] = UW;
- }
+ }
count++;
}
};
/***********************************************************************/
+struct unweightedData {
+ int start;
+ int num;
+ MothurOut* m;
+ EstOutput results;
+ vector< vector<string> > namesOfGroupCombos;
+ Tree* t;
+ CountTable* ct;
+ bool includeRoot;
+
+ unweightedData(){}
+ unweightedData(MothurOut* mout, int st, int en, vector< vector<string> > ngc, Tree* tree, CountTable* count, bool ir) {
+ m = mout;
+ start = st;
+ num = en;
+ namesOfGroupCombos = ngc;
+ t = tree;
+ ct = count;
+ includeRoot = ir;
+ }
+};
+
+/**************************************************************************************************/
+#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux) || (__linux__) || (__unix__) || (__unix)
+#else
+static DWORD WINAPI MyUnWeightedThreadFunction(LPVOID lpParam){
+ unweightedData* pDataArray;
+ pDataArray = (unweightedData*)lpParam;
+ try {
+ pDataArray->results.resize(pDataArray->num);
+ map< vector<string>, set<int> > rootForGrouping;
+
+ int count = 0;
+
+ for (int h = pDataArray->start; h < (pDataArray->start+pDataArray->num); h++) {
+ if (pDataArray->m->control_pressed) { return 0; }
+
+ double UniqueBL=0.0000; //a branch length is unique if it's chidren are from the same group
+ double totalBL = 0.00; //all branch lengths
+ double UW = 0.00; //Unweighted Value = UniqueBL / totalBL;
+
+ //find a node that belongs to one of the groups in this combo
+ int nodeBelonging = -1;
+ for (int g = 0; g < pDataArray->namesOfGroupCombos[h].size(); g++) {
+ if (pDataArray->t->groupNodeInfo[pDataArray->namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = pDataArray->t->groupNodeInfo[pDataArray->namesOfGroupCombos[h][g]][0]; break; }
+ }
+
+ //sanity check
+ if (nodeBelonging == -1) {
+ pDataArray->m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping ");
+ for (int g = 0; g < pDataArray->namesOfGroupCombos[h].size()-1; g++) { pDataArray->m->mothurOut(pDataArray->namesOfGroupCombos[h][g] + "-"); }
+ pDataArray->m->mothurOut(pDataArray->namesOfGroupCombos[h][pDataArray->namesOfGroupCombos[h].size()-1]);
+ pDataArray->m->mothurOut(", skipping."); pDataArray->m->mothurOutEndLine(); pDataArray->results[count] = UW;
+ }else{
+
+ //if including the root this clears rootForGrouping[namesOfGroupCombos[h]]
+ //getRoot(t, nodeBelonging, namesOfGroupCombos[h]);
+ /////////////////////////////////////////////////////////////////////////////
+ //you are a leaf so get your parent
+ vector<string> grouping = pDataArray->namesOfGroupCombos[h];
+ int index = pDataArray->t->tree[nodeBelonging].getParent();
+
+ if (pDataArray->includeRoot) {
+ rootForGrouping[grouping].clear();
+ }else {
+
+ //my parent is a potential root
+ rootForGrouping[grouping].insert(index);
+
+ //while you aren't at root
+ while(pDataArray->t->tree[index].getParent() != -1){
+ //cout << index << endl;
+ if (pDataArray->m->control_pressed) { return 0; }
+
+ //am I the root for this grouping? if so I want to stop "early"
+ //does my sibling have descendants from the users groups?
+ //if so I am not the root
+ int parent = pDataArray->t->tree[index].getParent();
+ int lc = pDataArray->t->tree[parent].getLChild();
+ int rc = pDataArray->t->tree[parent].getRChild();
+
+ int sib = lc;
+ if (lc == index) { sib = rc; }
+
+ map<string, int>::iterator itGroup;
+ int pcountSize = 0;
+ for (int j = 0; j < grouping.size(); j++) {
+ map<string, int>::iterator itGroup = pDataArray->t->tree[sib].pcount.find(grouping[j]);
+ if (itGroup != pDataArray->t->tree[sib].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } }
+ }
+
+ //if yes, I am not the root
+ if (pcountSize != 0) {
+ rootForGrouping[grouping].clear();
+ rootForGrouping[grouping].insert(parent);
+ }
+
+ index = parent;
+ }
+
+ //get all nodes above the root to add so we don't add their u values above
+ index = *(rootForGrouping[grouping].begin());
+ while(pDataArray->t->tree[index].getParent() != -1){
+ int parent = pDataArray->t->tree[index].getParent();
+ rootForGrouping[grouping].insert(parent);
+ //cout << parent << " in root" << endl;
+ index = parent;
+ }
+ }
+ /////////////////////////////////////////////////////////////////////////////
+
+ for(int i=0;i<pDataArray->t->getNumNodes();i++){
+
+ if (pDataArray->m->control_pressed) { return 0; }
+ //cout << i << endl;
+ //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
+ //pcountSize = 2, not unique to one group
+ //pcountSize = 1, unique to one group
+
+ int pcountSize = 0;
+ for (int j = 0; j < pDataArray->namesOfGroupCombos[h].size(); j++) {
+ map<string, int>::iterator itGroup = pDataArray->t->tree[i].pcount.find(pDataArray->namesOfGroupCombos[h][j]);
+ if (itGroup != pDataArray->t->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } }
+ }
+
+
+ //unique calc
+ if (pcountSize == 0) { }
+ else if ((pDataArray->t->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[pDataArray->namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root
+ UniqueBL += abs(pDataArray->t->tree[i].getBranchLength());
+ }
+
+ //total calc
+ if (pcountSize == 0) { }
+ else if ((pDataArray->t->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[pDataArray->namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root
+ totalBL += abs(pDataArray->t->tree[i].getBranchLength());
+ }
+ }
+ //cout << UniqueBL << '\t' << totalBL << endl;
+ UW = (UniqueBL / totalBL);
+
+ if (isnan(UW) || isinf(UW)) { UW = 0; }
+
+ pDataArray->results[count] = UW;
+ }
+ count++;
+ }
+
+ return 0;
+
+ }
+ catch(exception& e) {
+ pDataArray->m->errorOut(e, "UnWeighted", "MyUnWeightedThreadFunction");
+ exit(1);
+ }
+}
+/**************************************************************************************************/
+
+static DWORD WINAPI MyUnWeightedRandomThreadFunction(LPVOID lpParam){
+ unweightedData* pDataArray;
+ pDataArray = (unweightedData*)lpParam;
+ try {
+ pDataArray->results.resize(pDataArray->num);
+
+ int count = 0;
+
+ Tree* copyTree = new Tree(pDataArray->ct);
+
+ for (int h = pDataArray->start; h < (pDataArray->start+pDataArray->num); h++) {
+
+ if (pDataArray->m->control_pressed) { return 0; }
+
+ map< vector<string>, set<int> > rootForGrouping;
+
+ //copy random tree passed in
+ copyTree->getCopy(pDataArray->t);
+
+ //swap labels in the groups you want to compare
+ copyTree->assembleRandomUnifracTree(pDataArray->namesOfGroupCombos[h]);
+
+ double UniqueBL=0.0000; //a branch length is unique if it's chidren are from the same group
+ double totalBL = 0.00; //all branch lengths
+ double UW = 0.00; //Unweighted Value = UniqueBL / totalBL;
+ //find a node that belongs to one of the groups in this combo
+ int nodeBelonging = -1;
+ for (int g = 0; g < pDataArray->namesOfGroupCombos[h].size(); g++) {
+ if (copyTree->groupNodeInfo[pDataArray->namesOfGroupCombos[h][g]].size() != 0) { nodeBelonging = copyTree->groupNodeInfo[pDataArray->namesOfGroupCombos[h][g]][0]; break; }
+ }
+
+ //sanity check
+ if (nodeBelonging == -1) {
+ pDataArray->m->mothurOut("[WARNING]: cannot find a nodes in the tree from grouping ");
+ for (int g = 0; g < pDataArray->namesOfGroupCombos[h].size()-1; g++) { pDataArray->m->mothurOut(pDataArray->namesOfGroupCombos[h][g] + "-"); }
+ pDataArray->m->mothurOut(pDataArray->namesOfGroupCombos[h][pDataArray->namesOfGroupCombos[h].size()-1]);
+ pDataArray->m->mothurOut(", skipping."); pDataArray->m->mothurOutEndLine(); pDataArray->results[count] = UW;
+ }else{
+
+ //if including the root this clears rootForGrouping[namesOfGroupCombos[h]]
+ //getRoot(copyTree, nodeBelonging, namesOfGroupCombos[h]);
+ /////////////////////////////////////////////////////////////////////////////
+ //you are a leaf so get your parent
+ vector<string> grouping = pDataArray->namesOfGroupCombos[h];
+ int index = copyTree->tree[nodeBelonging].getParent();
+
+ if (pDataArray->includeRoot) {
+ rootForGrouping[grouping].clear();
+ }else {
+
+ //my parent is a potential root
+ rootForGrouping[grouping].insert(index);
+
+ //while you aren't at root
+ while(copyTree->tree[index].getParent() != -1){
+ //cout << index << endl;
+ if (pDataArray->m->control_pressed) { return 0; }
+
+ //am I the root for this grouping? if so I want to stop "early"
+ //does my sibling have descendants from the users groups?
+ //if so I am not the root
+ int parent = copyTree->tree[index].getParent();
+ int lc = copyTree->tree[parent].getLChild();
+ int rc = copyTree->tree[parent].getRChild();
+
+ int sib = lc;
+ if (lc == index) { sib = rc; }
+
+ map<string, int>::iterator itGroup;
+ int pcountSize = 0;
+ for (int j = 0; j < grouping.size(); j++) {
+ map<string, int>::iterator itGroup = copyTree->tree[sib].pcount.find(grouping[j]);
+ if (itGroup != copyTree->tree[sib].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } }
+ }
+
+ //if yes, I am not the root
+ if (pcountSize != 0) {
+ rootForGrouping[grouping].clear();
+ rootForGrouping[grouping].insert(parent);
+ }
+
+ index = parent;
+ }
+
+ //get all nodes above the root to add so we don't add their u values above
+ index = *(rootForGrouping[grouping].begin());
+ while(copyTree->tree[index].getParent() != -1){
+ int parent = copyTree->tree[index].getParent();
+ rootForGrouping[grouping].insert(parent);
+ //cout << parent << " in root" << endl;
+ index = parent;
+ }
+ }
+ /////////////////////////////////////////////////////////////////////////////
+ for(int i=0;i<copyTree->getNumNodes();i++){
+
+ if (pDataArray->m->control_pressed) { return 0; }
+
+ //pcountSize = 0, they are from a branch that is entirely from a group the user doesn't want
+ //pcountSize = 2, not unique to one group
+ //pcountSize = 1, unique to one group
+ int pcountSize = 0;
+ for (int j = 0; j < pDataArray->namesOfGroupCombos[h].size(); j++) {
+ map<string, int>::iterator itGroup = copyTree->tree[i].pcount.find(pDataArray->namesOfGroupCombos[h][j]);
+ if (itGroup != copyTree->tree[i].pcount.end()) { pcountSize++; if (pcountSize > 1) { break; } }
+ }
+
+ //unique calc
+ if (pcountSize == 0) { }
+ else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize == 1) && (rootForGrouping[pDataArray->namesOfGroupCombos[h]].count(i) == 0)) { //you have a unique branch length and you are not the root
+ UniqueBL += abs(copyTree->tree[i].getBranchLength());
+ }
+
+ //total calc
+ if (pcountSize == 0) { }
+ else if ((copyTree->tree[i].getBranchLength() != -1) && (pcountSize != 0) && (rootForGrouping[pDataArray->namesOfGroupCombos[h]].count(i) == 0)) { //you have a branch length and you are not the root
+ totalBL += abs(copyTree->tree[i].getBranchLength());
+ }
+
+ }
+ cout << h << '\t' << UniqueBL << '\t' << totalBL << endl;
+ UW = (UniqueBL / totalBL);
+
+ if (isnan(UW) || isinf(UW)) { UW = 0; }
+
+ pDataArray->results[count] = UW;
+ cout << h << '\t' << UW << endl;
+ }
+ count++;
+
+ }
+
+ delete copyTree;
+
+ return 0;
+ }
+ catch(exception& e) {
+ pDataArray->m->errorOut(e, "UnWeighted", "MyUnWeightedRandomThreadFunction");
+ exit(1);
+ }
+}
+
+#endif
+
#endif