numUserGroups = globaldata->Groups.size();
numGroups = globaldata->gGroupmap->getNumGroups();
}
-//**********************************************************************************************************************
-void Coverage::getValues(FullMatrix* matrix, float d, vector< vector<float> >& data) {
- try {
- vector<float> min;
- vector<string> groups;
-
- //initialize data
- data.resize(numUserGroups);
- for (int l = 0; l < data.size(); l++) {
- data[l].push_back(0.0);
- }
- /**************************************/
- //get the minimums for each comparision
- /**************************************/
- int count = 0;
- int a = 0;
- int b = 0;
- for (int i = 0; i < numGroups; i++) {
- for (int j = 0; j < numGroups; j++) {
-
- //is this "box" one hte user wants analyzed?
- if ((inUsersGroups(globaldata->gGroupmap->namesOfGroups[i], globaldata->Groups) == true) && (inUsersGroups(globaldata->gGroupmap->namesOfGroups[j], globaldata->Groups) == true)) {
-
- min = matrix->getMins(count); //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB;
-
- //find the coverage at this distance
- sort(min.begin(), min.end());
-//cout << "minvector for : " << globaldata->gGroupmap->namesOfGroups[i] + globaldata->gGroupmap->namesOfGroups[j] << endl;
-//for(int h = 0; h<min.size(); h++) {
-// cout << min[h] << " ";
-//}
-//cout << endl;
- int index = -1;
- //find index in min where value is higher than d
- for (int m = 0; m < min.size(); m++) {
- if (min[m] > d) { index = m; break; }
- }
-
- // if you don't find one than all the mins are less than d
- if (index == -1) { index = min.size(); }
-
- //save value in data
- data[a][b] = 1.0 - ((min.size()-index)/(float)min.size());
-//cout << "D = " << d << "data " << a << b << " = " << data[a][b] << endl;
- if (b < numUserGroups-1) { b++; }
- else{ //you are moving to a new row of "boxes"
- b = 0;
- a++;
- }
- }
- count++;
- }
- }
- }
- catch(exception& e) {
- cout << "Standard Error: " << e.what() << " has occurred in the Coverage class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
- catch(...) {
- cout << "An unknown error has occurred in the Coverage class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
- exit(1);
- }
-}
//**********************************************************************************************************************
-//For the random matrices
-void Coverage::getValues(FullMatrix* matrix, float d, vector< vector<float> >& data, string r) {
+void Coverage::getValues(FullMatrix* matrix, vector< vector< vector<float> > >& data, vector<float> dist, string mode) {
try {
vector<float> min;
vector<string> groups;
//initialize data
- data.resize(numUserGroups);
+ data.resize(dist.size());
for (int l = 0; l < data.size(); l++) {
- data[l].push_back(0.0);
+ data[l].resize(numGroups);
+ for (int k = 0; k < data[l].size(); k++) {
+ data[l][k].push_back(0.0);
+ }
}
/**************************************/
//is this "box" one hte user wants analyzed?
if ((inUsersGroups(globaldata->gGroupmap->namesOfGroups[i], globaldata->Groups) == true) && (inUsersGroups(globaldata->gGroupmap->namesOfGroups[j], globaldata->Groups) == true)) {
-cout << "combo " << a << b << endl;
-cout << "original matrix mins4rows " << endl;
-matrix->printMinsForRows(cout);
- //create random matrix for this comparison
- matrix->shuffle(count);
+
+ if (mode == "random") {
+ //create random matrix for this comparison
+ matrix->shuffle(globaldata->gGroupmap->namesOfGroups[i], globaldata->gGroupmap->namesOfGroups[j]);
+ }
min = matrix->getMins(count); //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB;
-cout << "shuffled matrix mins4rows " << endl;
-matrix->printMinsForRows(cout);
//find the coverage at this distance
sort(min.begin(), min.end());
- int index = -1;
- //find index in min where value is higher than d
- for (int m = 0; m < min.size(); m++) {
- if (min[m] > d) { index = m; break; }
- }
+ //loop through each distance and fill data
+ for (int k = 0; k < data.size(); k++) {
+
+ int index = -1;
+ //find index in min where value is higher than d
+ for (int m = 0; m < min.size(); m++) {
+ if (min[m] > dist[k]) { index = m; break; }
+ }
- // if you don't find one than all the mins are less than d
- if (index == -1) { index = min.size(); }
+ // if you don't find one than all the mins are less than d
+ if (index == -1) { index = min.size(); }
- //save value in data
- data[a][b] = 1.0 - ((min.size()-index)/(float)min.size());
-cout << "D = " << d << "data " << a << b << " = " << data[a][b] << endl;
+ //save value in data
+ data[k][a][b] = 1.0 - ((min.size()-index)/(float)min.size());
+
+ }
+
+ //move to next box
if (b < numUserGroups-1) { b++; }
else{ //you are moving to a new row of "boxes"
b = 0;
a++;
}
+
+ count++;
+
+ if (mode == "random") {
+ //restore matrix to original form for next shuffle
+ matrix->restore();
+ }
}
- count++;
-
- //restore matrix to original form for next shuffle
- matrix->restore();
-min = matrix->getMins(count-1);
-cout << "restored matrix mins4rows " << endl;
-matrix->printMinsForRows(cout);
}
}
+
}
catch(exception& e) {
cout << "Standard Error: " << e.what() << " has occurred in the Coverage class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
public:
Coverage();
~Coverage(){};
- void getValues(FullMatrix*, float, vector< vector<float> >&);
- void getValues(FullMatrix*, float, vector< vector<float> >&, string); //for random matrices
+ void getValues(FullMatrix*, vector< vector< vector<float> > >&, vector<float>, string); //matrix, container for results, vector of distances, mode
private:
GlobalData* globaldata;
for (int i = 0; i < numSeqs; i++) {
out << "row " << i << " group = " << index[i].groupname << " name = " << index[i].seqName << endl;
for (int j = 0; j < numSeqs; j++) {
- //out << matrix[i][j] << " ";
+ out << matrix[i][j] << " ";
}
out << endl;
}
}
}
+
/**************************************************************************/
//shuffles the sequences in the 2 groups passed in.
-void FullMatrix::shuffle(int box){
+void FullMatrix::shuffle(string groupA, string groupB){
try{
vector<int> rows2Swap;
vector<int> shuffled;
float y = 0;
string name = "";
- /****************************/
- //find the box the user wants
- /****************************/
- int count = 0;
- int lowBoundy = bounds[0]; //where first group starts
- int highBoundy = bounds[1]; //where second group starts
- int county = 1; //index in bound
-
- //find the bounds for the box the user wants
- for (int i = 0; i < (numGroups * numGroups); i++) {
-
- //are you at the box?
- if (count == box) { break; }
- else { count++; }
- //move to next box
- if (county < numGroups) {
- county++;
- highBoundy = bounds[county];
- lowBoundy = bounds[county-1];
- }else{ //you are moving to a new row of "boxes"
- county = 1;
- highBoundy = bounds[county];
- lowBoundy = bounds[county-1];
- }
- }
-
- /************************/
- //save its rows locations
- /************************/
+ /********************************/
+ //save rows you want to randomize
+ /********************************/
//go through the matrix map to find the rows from groups you want to randomize
- for (int y = lowBoundy; y < highBoundy; y++) {
- rows2Swap.push_back(y);
- shuffled.push_back(y);
+ for (it = index.begin(); it != index.end(); it++) {
+ //is this row from group A or B?
+ if ((it->second.groupname == groupA) || (it->second.groupname == groupB)) {
+ rows2Swap.push_back(it->first);
+ shuffled.push_back(it->first);
+ }
}
//randomize rows to shuffle in shuffled
//swap rows and columns to randomize box
/***************************************/
for (int i = 0; i < shuffled.size(); i++) {
+
//record the swaps you are making so you can undo them in restore function
restoreIndex[i].a = shuffled[i];
restoreIndex[i].b = rows2Swap[i];
name = index[shuffled[i]].seqName;
index[shuffled[i]].seqName = index[rows2Swap[i]].seqName;
index[rows2Swap[i]].seqName = name;
+
}
}
catch(exception& e) {
//reverse iterate through swaps and undo them to restore original matrix and index map.
for(it2 = restoreIndex.rbegin(); it2 != restoreIndex.rend(); it2++) {
/* swap rows */
+
for (int h = 0; h < numSeqs; h++) {
y = matrix[it2->second.a][h];
matrix[it2->second.a][h] = matrix[it2->second.b][h];
name = index[it2->second.a].seqName;
index[it2->second.a].seqName = index[it2->second.b].seqName;
index[it2->second.b].seqName = name;
+
}
//clear restore for next shuffle
void setBounds(); //requires globaldata->Groups to be filled
vector<float> getMins(int); //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB;
void getDist(vector<float>&); //fills a vector with the valid distances for the integral form.
- void shuffle(int); //shuffles the sequences in the box passed in.
+ void shuffle(string, string); //shuffles the sequences in the groups passed in.
void restore(); //unshuffles the matrix.
-
-
- void printMinsForRows(ostream&);
+
private:
void sortGroups(int, int); //this function sorts the sequences within the matrix.
void getBounds(int&, string);
void readSquareMatrix(ifstream&);
void readLTMatrix(ifstream&);
+ void printMinsForRows(ostream&);
map<int, Names> index; // row in vector, sequence group. need to know this so when we sort it can be updated.
map<int, Swap> restoreIndex; //a map of the swaps made so you can undo them in restore.
int HelpCommand::execute(){
if (globaldata->helpRequest == "read.dist") {
- cout << "The read.dist command parameter options are phylip or column, name, cutoff and precision" << "\n";
- cout << "The read.dist command should be in the following format: " << "\n";
+ cout << "The read.dist command parameter options are phylip or column, group, name, cutoff and precision" << "\n";
+ cout << "The read.dist command must be run before using the cluster or libshuff commands" << "\n";
+ cout << "The read.dist command can be used in two ways. The first is to read a phylip or column and run the cluster command" << "\n";
+ cout << "For this use the read.dist command should be in the following format: " << "\n";
cout << "read.dist(phylip=yourDistFile, name=yourNameFile, cutoff=yourCutoff, precision=yourPrecision) " << "\n";
cout << "The phylip or column parameter is required, but only one may be used. If you use a column file the name filename is required. " << "\n";
cout << "If you do not provide a cutoff value 10.00 is assumed. If you do not provide a precision value then 100 is assumed." << "\n";
- cout << "Note: No spaces between parameter labels (i.e. dist), '=' and parameters (i.e.yourDistfile)." << "\n" << "\n";
+ cout << "The second way to use the read.dist command is to read a phylip or column and a group, so you can use the libshuff command." << "\n";
+ cout << "For this use the read.dist command should be in the following format: " << "\n";
+ cout << "read.dist(phylip=yourPhylipfile, group=yourGroupFile). The cutoff and precision parameters are not valid with this use. " << "\n";
+ cout << "Note: No spaces between parameter labels (i.e. phylip), '=' and parameters (i.e.yourPhylipfile)." << "\n" << "\n";
}else if (globaldata->helpRequest == "read.otu") {
cout << "The read.otu command must be run before you execute a collect.single, rarefaction.single, summary.single, " << "\n";
cout << "collect.shared, rarefaction.shared or summary.shared command. Mothur will generate a .list, .rabund and .sabund upon completion of the cluster command " << "\n";
cout << "The default value for groups is all the groups in your groupfile, and iters is 1000." << "\n";
cout << "The unifrac.unweighted command output two files: .unweighted and .uwsummary their descriptions are in the manual." << "\n";
cout << "Note: No spaces between parameter labels (i.e. list), '=' and parameters (i.e.yourListfile)." << "\n" << "\n";
+ }else if (globaldata->helpRequest == "libshuff") {
+ cout << "The libshuff command can only be executed after a successful read.dist command." << "\n";
+ cout << "The libshuff command parameters are groups, iters, step, form and cutoff. No parameters are required." << "\n";
+ cout << "The groups parameter allows you to specify which of the groups in your groupfile you would like analyzed. You must enter at least 2 valid groups." << "\n";
+ cout << "The group names are separated by dashes. The iters parameter allows you to specify how many random matrices you would like compared to your matrix." << "\n";
+ cout << "The step parameter allows you to specify change in distance you would like between each output if you are using the discrete form." << "\n";
+ cout << "The form parameter allows you to specify if you would like to analyze your matrix using the discrete or integral form. Your options are integral or discrete." << "\n";
+ cout << "The libshuff command should be in the following format: libshuff(groups=yourGroups, iters=yourIters, cutOff=yourCutOff, form=yourForm, step=yourStep)." << "\n";
+ cout << "Example libshuff(groups=A-B-C, iters=500, form=discrete, step=0.01, cutOff=2.0)." << "\n";
+ cout << "The default value for groups is all the groups in your groupfile, iters is 10000, cutoff is 1.0, form is integral and step is 0.01." << "\n";
+ cout << "The libshuff command output two files: .coverage and .slsummary their descriptions are in the manual." << "\n";
+ cout << "Note: No spaces between parameter labels (i.e. iters), '=' and parameters (i.e.yourIters)." << "\n" << "\n";
}else if (globaldata->helpRequest == "quit") {
cout << "The quit command will terminate Dotur and should be in the following format: " << "\n";
cout << "quit()" << "\n" << "\n";
int LibShuffCommand::execute(){
try {
//deltaValues[0] = scores for the difference between AA and AB.
- //cValues[0][0] = AA, cValues[0][1] = AB, cValues[0][2] = AC, cValues[1][0] = BA, cValues[1][1] = BB...
- vector<float> dist;
- int next;
+ //cValues[0][0][0] = AA at distance 0.0, cValues[0][0][1] = AB at distance 0.0, cValues[0][0][2] = AC at distance 0.0, cValues[0][1][0] = BA at distance 0.0, cValues[0][1][1] = BB...
sumDelta.resize(numComp-numGroups, 0.0);
- float D = 0.0;
-
- /*****************************/
- //get values for users matrix
- /*****************************/
matrix->setBounds();
- if (form != "discrete") { matrix->getDist(dist); next = 1; }
-//cout << "Distances" << endl;
-//for (int i = 0; i < dist.size(); i++) { cout << dist[i] << " "; }
-//cout << endl;
+ //load distances
+ if (form != "discrete") { matrix->getDist(dist); }
+ else {
+ float f = 0.0;
+ while (f <= cutOff) {
+ dist.push_back(f);
+ f += step;
+ }
+ }
+ /*****************************/
//get values for users matrix
- while (D <= cutOff) {
- //clear out old Values
- deltaValues.clear();
- coverage->getValues(matrix, D, cValues);
+ /*****************************/
+ //clear out old Values
+ deltaValues.clear();
+ deltaValues.resize(dist.size());
+
+ coverage->getValues(matrix, cValues, dist, "user");
+
+ //loop through each distance and load rsumdelta
+ for (int p = 0; p < cValues.size(); p++) {
//find delta values
int count = 0;
for (int i = 0; i < numGroups; i++) {
//don't save AA to AA
if (i != j) {
//(Caa - Cab)^2
- deltaValues.push_back( (abs(cValues[i][i]-cValues[i][j]) * abs(cValues[i][i]-cValues[i][j])) );
- sumDelta[count] += deltaValues[count];
+ deltaValues[p].push_back( (abs(cValues[p][i][i]-cValues[p][i][j]) * abs(cValues[p][i][i]-cValues[p][i][j])) );
+ sumDelta[count] += deltaValues[p][count];
count++;
}
}
}
+ }
- printCoverageFile(D);
-
- //check form
- if (form != "discrete") {
- if (next == dist.size()) { break; }
- else { D = dist[next]; next++; }
- }else { D += step; }
+ printCoverageFile();
-
- }
-
- //output sum Deltas
- for (int i = 0; i < numGroups; i++) {
- for (int j = 0; j < numGroups; j++) {
- //don't output AA to AA
- if (i != j) {
- cout << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
- }
- }
- }
- cout << endl;
-
- for (int i = 0; i < sumDelta.size(); i++) {
- cout << setprecision(6) << sumDelta[i] << '\t';
- }
- cout << endl;
-
/*******************************************************************************/
//create and score random matrixes finding the sumDelta values for summary file
/******************************************************************************/
for (int m = 0; m < iters; m++) {
//generate random matrix in getValues
//values for random matrix
- cout << "Iteration " << m+1 << endl;
- D = 0.0;
- next = 1;
-
- while (D <= cutOff) {
- coverage->getValues(matrix, D, cValues, "random");
+
+ coverage->getValues(matrix, cValues, dist, "random");
+ //loop through each distance and load rsumdelta
+ for (int p = 0; p < cValues.size(); p++) {
//find delta values
int count = 0;
for (int i = 0; i < numGroups; i++) {
//don't save AA to AA
if (i != j) {
//(Caa - Cab)^2
- rsumDelta[count][m] += ((abs(cValues[i][i]-cValues[i][j]) * abs(cValues[i][i]-cValues[i][j])));
-//cout << "iter " << m << " box " << i << j << " delta = " << ((abs(cValues[i][i]-cValues[i][j]) * abs(cValues[i][i]-cValues[i][j]))) << endl;
+ rsumDelta[count][m] += ((abs(cValues[p][i][i]-cValues[p][i][j]) * abs(cValues[p][i][i]-cValues[p][i][j])));
count++;
}
}
}
-
- //check form
- if (form != "discrete") {
- if (next == dist.size()) { break; }
- else { D = dist[next]; next++; }
- }else { D += step; }
-
-
- //clear out old Values
- cValues.clear();
+
}
-cout << "random sum delta for iter " << m << endl;
-for (int i = 0; i < rsumDelta.size(); i++) {
- cout << setprecision(6) << rsumDelta[i][m] << '\t';
-}
-cout << endl;
+//cout << "iter " << m << endl;
+ //clear out old Values
+ cValues.clear();
+
+//cout << "random sum delta for iter " << m << endl;
+//for (int i = 0; i < rsumDelta.size(); i++) {
+// cout << setprecision(6) << rsumDelta[i][m] << '\t';
+//}
+//cout << endl;
}
+
/**********************************************************/
//find the signifigance of the user matrix' sumdelta values
/**********************************************************/
}
}
//**********************************************************************************************************************
-void LibShuffCommand::printCoverageFile(float d) {
+void LibShuffCommand::printCoverageFile() {
try {
//format output
out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
- out << setprecision(6) << d << '\t';
-
- //print out coverage values
- for (int i = 0; i < numGroups; i++) {
- for (int j = 0; j < numGroups; j++) {
- out << cValues[i][j] << '\t';
+ //loop through each distance
+ for (int p = 0; p < cValues.size(); p++) {
+ out << setprecision(6) << dist[p] << '\t';
+ //print out coverage values
+ for (int i = 0; i < numGroups; i++) {
+ for (int j = 0; j < numGroups; j++) {
+ out << cValues[p][i][j] << '\t';
+ }
}
+
+ for (int h = 0; h < deltaValues[p].size(); h++) {
+ out << deltaValues[p][h] << '\t';
+ }
+
+ out << endl;
}
- //print out delta values
- for (int i = 0; i < deltaValues.size(); i++) {
- out << deltaValues[i] << '\t';
- }
-
- out << endl;
-
}
catch(exception& e) {
cout << "Standard Error: " << e.what() << " has occurred in the LibShuffCommand class Function printCoverageFile. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
//print out delta values
for (int i = 0; i < sumDelta.size(); i++) {
- outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
- cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
+ if (sumDeltaSig[i] > (1/(float)iters)) {
+ outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
+ cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
+ }else {
+ outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << '\t';
+ cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << "<" << (1/float(iters)) << '\t';
+ }
}
outSum << endl;
int execute();
private:
- vector< vector<float> > cValues; // vector of coverage scores, one for each comparison.
- vector<float> deltaValues; // vector of delta scores, one for each comparison.
+ vector< vector< vector<float> > > cValues; // vector<vector of coverage scores, one for each comparison.> -one for each distance level.
+ vector< vector<float> > deltaValues; // vector< vector of delta scores, one for each comparison.> -one at each distance
vector<float> sumDelta; //sum of delta scores, one for each comparison.
vector<float> sumDeltaSig; //number of random matrixes with that delta value or ??
vector< vector<float> > rsumDelta; //vector< vector<sumdelta scores for a given comparison> >
vector<string> groupComb;
+ vector<float> dist;
void setGroups();
int findIndex(float, int);
- void printCoverageFile(float);
+ void printCoverageFile();
void printSummaryFile();
GlobalData* globaldata;