374610830F40652400460C57 /* unweighted.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 374610820F40652400460C57 /* unweighted.cpp */; };
3746109D0F40657600460C57 /* unifracunweightedcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3746109C0F40657600460C57 /* unifracunweightedcommand.cpp */; };
374CD63F0F65832000D90B4A /* libshuffcommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 374CD63E0F65832000D90B4A /* libshuffcommand.cpp */; };
+ 374CD6F10F65A4C100D90B4A /* coverage.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 374CD6F00F65A4C100D90B4A /* coverage.cpp */; };
3782163D0F616079008E1F6D /* fullmatrix.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3782163C0F616079008E1F6D /* fullmatrix.cpp */; };
379293C30F2DE73400B9034A /* treemap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 379293C20F2DE73400B9034A /* treemap.cpp */; };
379294700F2E191800B9034A /* parsimonycommand.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3792946F0F2E191800B9034A /* parsimonycommand.cpp */; };
3746109C0F40657600460C57 /* unifracunweightedcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = unifracunweightedcommand.cpp; sourceTree = "<group>"; };
374CD63D0F65832000D90B4A /* libshuffcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = libshuffcommand.h; sourceTree = "<group>"; };
374CD63E0F65832000D90B4A /* libshuffcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = libshuffcommand.cpp; sourceTree = "<group>"; };
+ 374CD6EF0F65A4C100D90B4A /* coverage.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = coverage.h; sourceTree = "<group>"; };
+ 374CD6F00F65A4C100D90B4A /* coverage.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = coverage.cpp; sourceTree = "<group>"; };
3782163B0F616079008E1F6D /* fullmatrix.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fullmatrix.h; sourceTree = "<group>"; };
3782163C0F616079008E1F6D /* fullmatrix.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fullmatrix.cpp; sourceTree = "<group>"; };
379293C10F2DE73400B9034A /* treemap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = treemap.h; sourceTree = "<group>"; };
37D927BB0F21331F001D4494 /* bootstrap.cpp */,
37D927C00F21331F001D4494 /* chao1.h */,
37D927BF0F21331F001D4494 /* chao1.cpp */,
+ 374CD6EF0F65A4C100D90B4A /* coverage.h */,
+ 374CD6F00F65A4C100D90B4A /* coverage.cpp */,
37D927E80F21331F001D4494 /* jackknife.h */,
37D927E70F21331F001D4494 /* jackknife.cpp */,
37D927F50F21331F001D4494 /* npshannon.h */,
A70B53AC0F4CD7AD0064797E /* getlinecommand.cpp in Sources */,
3782163D0F616079008E1F6D /* fullmatrix.cpp in Sources */,
374CD63F0F65832000D90B4A /* libshuffcommand.cpp in Sources */,
+ 374CD6F10F65A4C100D90B4A /* coverage.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
--- /dev/null
+/*
+ * coverage.cpp
+ * Mothur
+ *
+ * Created by Sarah Westcott on 3/9/09.
+ * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "coverage.h"
+
+//**********************************************************************************************************************
+vector< vector<float> > Coverage::getValues(FullMatrix*, float) {
+ try {
+ globaldata = GlobalData::getInstance();
+ numGroups = globaldata->Groups.size();
+
+ //initialize data
+ data.resize(numGroups);
+ for (int l = 0; l < data.size(); l++) {
+ data[l].push_back(0.0);
+ }
+
+ /**************************************/
+ //get the minumums for each comparision
+ /**************************************/
+ return data;
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the Coverage class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the Coverage class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+
+
\ No newline at end of file
--- /dev/null
+#ifndef COVERAGE_H
+#define COVERAGE_H
+
+/*
+ * coverage.h
+ * Mothur
+ *
+ * Created by Sarah Westcott on 3/9/09.
+ * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
+ *
+ */
+
+#include "mothur.h"
+#include "fullmatrix.h"
+#include "globaldata.hpp"
+
+using namespace std;
+
+/***********************************************************************/
+
+class Coverage {
+
+ public:
+ Coverage(){};
+ ~Coverage(){};
+ vector< vector<float> > getValues(FullMatrix*, float);
+
+ private:
+ GlobalData* globaldata;
+ vector< vector<float> > data;
+ int numGroups;
+
+};
+
+
+/***********************************************************************/
+
+
+
+#endif
\ No newline at end of file
globaldata = GlobalData::getInstance();
groupmap = globaldata->gGroupmap;
- float minSoFar = 0.0;
string name, group;
filehandle >> numSeqs >> name;
square = true;
filehandle.putback(d);
- if (numSeqs >= 2) {
- //save first distance that is not distance to itself as minimum
- filehandle >> matrix[0][0] >> minSoFar;
- matrix[0][1] = minSoFar;
- }
-
- for(int i=2;i<numSeqs;i++){
+ for(int i=0;i<numSeqs;i++){
filehandle >> matrix[0][i];
- if (matrix[0][i] < minSoFar) { minSoFar = matrix[0][i]; }
}
- index[0].minDist = minSoFar;
break;
}
reading = new Progress("Reading matrix: ", numSeqs * numSeqs);
int count = 0;
- float distance, minSoFar;
- minSoFar = 0.0;
+ float distance;
+
string group, name;
for(int i=1;i<numSeqs;i++){
index[i].groupname = group;
index[i].seqName = name;
- filehandle >> minSoFar;
- matrix[i][0] = minSoFar;
-
if(group == "not found") { cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl; exit(1); }
- for(int j=1;j<numSeqs;j++){
+ for(int j=0;j<numSeqs;j++){
filehandle >> distance;
-
- if ((distance < minSoFar) && (i != j)) { minSoFar = distance; }
matrix[i][j] = distance;
count++;
reading->update(count);
}
-
- //save minimum value for each row
- index[i].minDist = minSoFar;
}
reading->finish();
delete reading;
reading = new Progress("Reading matrix: ", numSeqs * (numSeqs - 1) / 2);
int count = 0;
- float distance, minSoFar;
- minSoFar = 0.0;
+ float distance;
string group, name;
group = groupmap->getGroup(name);
index[i].groupname = group;
index[i].seqName = name;
-
- filehandle >> minSoFar;
- matrix[i][0] = minSoFar;
if(group == "not found") { cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl; exit(1); }
- for(int j=1;j<i;j++){
+ for(int j=0;j<i;j++){
filehandle >> distance;
-
- if (distance < minSoFar) { minSoFar = distance; }
matrix[i][j] = distance; matrix[j][i] = distance;
count++;
reading->update(count);
}
- //save minimum value for each row
- index[i].minDist = minSoFar;
}
reading->finish();
delete reading;
}
}
+
/**************************************************************************/
+void FullMatrix::getMinsForRowsVectors(){
+ try{
+ numGroups = globaldata->gGroupmap->namesOfGroups.size();
+ numUserGroups = globaldata->Groups.size();
+
+ //sort globaldata->gGroupmap.namesOfGroups so that it will match the matrix
+ sort(globaldata->gGroupmap->namesOfGroups.begin(), globaldata->gGroupmap->namesOfGroups.end());
+
+ /*************************************************/
+ //find where in matrix each group starts and stops
+ /*************************************************/
+ vector<int> bounds; //bounds[0] = 0, bounds[1] = starting row in matrix from group B, bounds[2] = starting row in matrix from group C, bounds[3] = no need to find C because its numSeqs.
+ bounds.resize(numGroups);
+
+ bounds[numGroups] = numSeqs;
+ //for each group find bounds of subgroup/comparison
+ for (int i = 0; i < numGroups; i++) {
+ getBounds(bounds[i], globaldata->gGroupmap->namesOfGroups[i]);
+ }
+
+ /************************************************************/
+ //fill the minsForRows vectors for each group the user wants
+ /************************************************************/
+ int countx = bounds[0]; //where second group starts
+ int county = bounds[0];
+
+ //go through the entire matrix
+ for (int x = 0; x < numSeqs; x++) {
+ for (int y = 0; y < numSeqs; y++) {
+ //if have not changed groups
+ if ((x < countx) && (y < county)) {
+ if (inUsersGroups(index[x].groupname, globaldata->Groups)) {
+ }
+ }
+ }
+ }
+
+
+
+
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the FullMatrix class function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+
+}
+
+/**************************************************************************/
+void FullMatrix::getBounds(int& higher, string group) {
+ try{
+ bool gotLower = false;
+
+ //for each group find bounds of subgroup/comparison
+ for (it = index.begin(); it != index.end(); it++) {
+ if (it->second.groupname == group) {
+ if (gotLower != true) { gotLower = true; }
+ }else if ((gotLower == true) && (it->second.groupname != group)) { higher = it->first; break; }
+ }
+
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getBounds. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the FullMatrix class function getBounds. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+
+}
struct Names {
string groupname;
string seqName;
- float minDist;
};
int getNumSeqs();
void printMatrix(ostream&);
+ void getMinsForRowsVectors(); //requires globaldata->Groups to be filled
private:
void sortGroups(int, int); //this function sorts the sequences within the matrix.
+ void getBounds(int&, string);
void readSquareMatrix(ifstream&);
void readLTMatrix(ifstream&);
vector< vector<float> > matrix; //a 2D distance matrix of all the sequences and their distances to eachother.
+ vector< vector<float> > minsForRows; //vector< minimum distance for that subrow> -one for each comparison.
map<int, Names> index; // row in vector, sequence group. need to know this so when we sort it can be updated.
+ map<int, Names>::iterator it;
GroupMap* groupmap; //maps sequences to groups they belong to.
GlobalData* globaldata;
- int numSeqs;
+ int numSeqs, numGroups, numUserGroups;
bool square;
};
LibShuffCommand::LibShuffCommand(){
try {
- //globaldata = GlobalData::getInstance();
+ globaldata = GlobalData::getInstance();
+ convert(globaldata->getCutOff(), cutOff);
+ convert(globaldata->getIters(), iters);
+ matrix = globaldata->gMatrix;
+ coverageFile = getRootName(globaldata->getPhylipFile()) + "coverage";
+ summaryFile = getRootName(globaldata->getPhylipFile()) + "slsummary";
+ openOutputFile(coverageFile, out);
+ openOutputFile(summaryFile, outSum);
+
+ //set the groups to be analyzed
+ setGroups();
+
+ //file headers for coverage file
+ out << "D" << '\t';
+ for (int i = 0; i < groupComb.size(); i++) {
+ out << "C" + groupComb[i] << '\t';
+ }
+
+ for (int i = 0; i < numGroups; i++) {
+ for (int j = 0; j < numGroups; j++) {
+ //don't output AA to AA
+ if (i != j) {
+ out << "Delta" + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
+ }
+ }
+ }
+ out << endl;
+
+ numComp = numGroups*numGroups;
+
+ coverage = new Coverage();
}
catch(exception& e) {
//**********************************************************************************************************************
LibShuffCommand::~LibShuffCommand(){
-
+ delete coverage;
}
//**********************************************************************************************************************
int LibShuffCommand::execute(){
try {
+ //deltaValues[0] = scores for the difference between AA and AB.
+ //cValues[0][0] = AA, cValues[0][1] = AB, cValues[0][2] = AC, cValues[1][0] = BA, cValues[1][1] = BB...
+
+ sumDelta.resize(numComp-numGroups, 0.0);
+
+ float D = 0.0;
+
+ /*****************************/
+ //get values for users matrix
+ /*****************************/
+ matrix->getMinsForRowsVectors();
+
+ //get values for users matrix
+ while (D <= cutOff) {
+ cValues = coverage->getValues(matrix, D);
+
+ //find delta values
+ int count = 0;
+ for (int i = 0; i < numGroups; i++) {
+ for (int j = 0; j < numGroups; j++) {
+ //don't save AA to AA
+ if (i != j) {
+ //(Caa - Cab)^2
+ deltaValues.push_back( (abs(cValues[i][i]-cValues[i][j]) * abs(cValues[i][i]-cValues[i][j])) );
+ sumDelta[count] += deltaValues[count];
+ count++;
+ }
+ }
+ }
+
+ D += 0.01;
+
+ printCoverageFile(D);
+
+ //clear out old Values
+ cValues.clear();
+ deltaValues.clear();
+ }
+
+ /*******************************************************************************/
+ //create and score random matrixes finding the sumDelta values for summary file
+ /******************************************************************************/
+
+ //initialize rsumDelta
+ rsumDelta.resize(numComp-numGroups);
+ for (int l = 0; l < rsumDelta.size(); l++) {
+ for (int w = 0; w < iters; w++) {
+ rsumDelta[l].push_back(0.0);
+ }
+ }
+
+ for (int m = 0; m < iters; m++) {
+ //generate random matrix in getValues
+ //values for random matrix
+ while (D <= cutOff) {
+ cValues = coverage->getValues(matrix, D);
+
+ //find delta values
+ int count = 0;
+ for (int i = 0; i < numGroups; i++) {
+ for (int j = 0; j < numGroups; j++) {
+ //don't save AA to AA
+ if (i != j) {
+ //(Caa - Cab)^2
+ rsumDelta[count][m] += (abs(cValues[i][i]-cValues[i][j]) * abs(cValues[i][i]-cValues[i][j]));
+ count++;
+ }
+ }
+ }
+
+ D += 0.01;
+
+ //clear out old Values
+ cValues.clear();
+ }
+ }
+
+ /**********************************************************/
+ //find the signifigance of the user matrix' sumdelta values
+ /**********************************************************/
+
+ for (int t = 0; t < rsumDelta.size(); t++) {
+ //sort rsumDelta t
+ sort(rsumDelta[t].begin(), rsumDelta[t].end());
+
+ //the index of the score higher than yours is returned
+ //so if you have 1000 random matrices the index returned is 100
+ //then there are 900 matrices with a score greater then you.
+ //giving you a signifigance of 0.900
+ int index = findIndex(sumDelta[t], t);
+
+ //the signifigance is the number of trees with the users score or higher
+ sumDeltaSig.push_back((iters-index)/(float)iters);
+
+ }
+
+ printSummaryFile();
+
+ //clear out users groups
+ globaldata->Groups.clear();
+
return 0;
}
catch(exception& e) {
exit(1);
}
}
+//**********************************************************************************************************************
+void LibShuffCommand::printCoverageFile(float d) {
+ try {
+ //format output
+ out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
+
+ out << setprecision(globaldata->getIters().length()) << d << '\t';
+
+ //print out coverage values
+ for (int i = 0; i < numGroups; i++) {
+ for (int j = 0; j < numGroups; j++) {
+ out << cValues[i][j] << '\t';
+ }
+ }
+
+ //print out delta values
+ for (int i = 0; i < deltaValues.size(); i++) {
+ out << deltaValues[i] << '\t';
+ }
+
+ out << endl;
+
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function printCoverageFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the LibShuffCommand class function printCoverageFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+//**********************************************************************************************************************
+void LibShuffCommand::printSummaryFile() {
+ try {
+ //format output
+ outSum.setf(ios::fixed, ios::floatfield); outSum.setf(ios::showpoint);
+
+ for (int i = 0; i < numGroups; i++) {
+ for (int j = 0; j < numGroups; j++) {
+ //don't output AA to AA
+ if (i != j) {
+ outSum << "Delta" + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig" + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
+ }
+ }
+ }
+ outSum << endl;
+
+ //print out delta values
+ for (int i = 0; i < sumDelta.size(); i++) {
+ outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
+ }
+
+ outSum << endl;
+
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function printSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the LibShuffCommand class function printSummaryFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
//**********************************************************************************************************************
+void LibShuffCommand::setGroups() {
+ try {
+ //if the user has not entered specific groups to analyze then do them all
+ if (globaldata->Groups.size() == 0) {
+ numGroups = globaldata->gGroupmap->getNumGroups();
+ for (int i=0; i < numGroups; i++) {
+ globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
+ }
+ }else {
+ if (globaldata->getGroups() != "all") {
+ //check that groups are valid
+ for (int i = 0; i < globaldata->Groups.size(); i++) {
+ if (globaldata->gGroupmap->isValidGroup(globaldata->Groups[i]) != true) {
+ cout << globaldata->Groups[i] << " is not a valid group, and will be disregarded." << endl;
+ // erase the invalid group from globaldata->Groups
+ globaldata->Groups.erase (globaldata->Groups.begin()+i);
+ }
+ }
+
+ //if the user only entered invalid groups
+ if ((globaldata->Groups.size() == 0) || (globaldata->Groups.size() == 1)) {
+ numGroups = globaldata->gGroupmap->getNumGroups();
+ for (int i=0; i < numGroups; i++) {
+ globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
+ }
+ cout << "When using the groups parameter you must have at least 2 valid groups. I will run the command using all the groups in your groupfile." << endl;
+ }else { numGroups = globaldata->Groups.size(); }
+ }else { //users wants all groups
+ numGroups = globaldata->gGroupmap->getNumGroups();
+ globaldata->Groups.clear();
+ for (int i=0; i < numGroups; i++) {
+ globaldata->Groups.push_back(globaldata->gGroupmap->namesOfGroups[i]);
+ }
+ }
+ }
+
+ // number of comparisons i.e. with groups A,B,C = AA, AB, AC, BA, BB, BC...;
+ for (int i=0; i<numGroups; i++) {
+ for (int l = 0; l < numGroups; l++) {
+ //set group comparison labels
+ groupComb.push_back(globaldata->Groups[i] + "-" + globaldata->Groups[l]);
+ }
+ }
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the LibShuffCommand class function setGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+/***********************************************************/
+int LibShuffCommand::findIndex(float score, int index) {
+ try{
+ for (int i = 0; i < rsumDelta[index].size(); i++) {
+ if (rsumDelta[index][i] >= score) { return i; }
+ }
+ return rsumDelta[index].size();
+ }
+ catch(exception& e) {
+ cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+ catch(...) {
+ cout << "An unknown error has occurred in the LibShuffCommand class function findIndex. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+ exit(1);
+ }
+}
+
+/***********************************************************/
+
*/
#include "command.hpp"
+#include "coverage.h"
+#include "fullmatrix.h"
using namespace std;
int execute();
private:
+ vector< vector<float> > cValues; // vector of coverage scores, one for each comparison.
+ vector<float> deltaValues; // vector of delta scores, one for each comparison.
+ vector<float> sumDelta; //sum of delta scores, one for each comparison.
+ vector<float> sumDeltaSig; //number of random matrixes with that delta value or ??
+ vector< vector<float> > rsumDelta; //vector< vector<sumdelta scores for a given comparison> >
+ vector<string> groupComb;
+
+
+ void setGroups();
+ int findIndex(float, int);
+ void printCoverageFile(float);
+ void printSummaryFile();
+
GlobalData* globaldata;
+ Coverage* coverage;
+ FullMatrix* matrix;
+ float cutOff;
+ int numGroups, numComp, iters;
+ string coverageFile, summaryFile;
+ ofstream out, outSum;
+
};