From 2d2fbc80f9359b19873ba3e63970b58f4f8f49f3 Mon Sep 17 00:00:00 2001 From: westcott Date: Wed, 18 Mar 2009 16:05:36 +0000 Subject: [PATCH] working on libshuff --- commandfactory.cpp | 8 +- coverage.cpp | 130 +++++++++++++++++- coverage.h | 8 +- errorchecking.cpp | 17 ++- fullmatrix.cpp | 312 ++++++++++++++++++++++++++++++++++++++++---- fullmatrix.h | 25 +++- globaldata.cpp | 21 ++- globaldata.hpp | 4 +- libshuffcommand.cpp | 83 +++++++++--- libshuffcommand.h | 4 +- validcommands.cpp | 1 + validparameter.cpp | 2 + 12 files changed, 548 insertions(+), 67 deletions(-) diff --git a/commandfactory.cpp b/commandfactory.cpp index a5ec5f1..1902204 100644 --- a/commandfactory.cpp +++ b/commandfactory.cpp @@ -30,6 +30,7 @@ #include "parsimonycommand.h" #include "unifracunweightedcommand.h" #include "unifracweightedcommand.h" +#include "libshuffcommand.h" #include "mothur.h" @@ -71,9 +72,10 @@ Command* CommandFactory::getCommand(string commandName){ else if(commandName == "summary.shared") { command = new SummarySharedCommand(); } else if(commandName == "unifrac.weighted") { command = new UnifracWeightedCommand(); } else if(commandName == "unifrac.unweighted") { command = new UnifracUnweightedCommand(); } - else if(commandName == "get.group") { command = new GetgroupCommand(); } - else if(commandName == "get.label") { command = new GetlabelCommand(); } - else if(commandName == "get.line") { command = new GetlineCommand(); } + else if(commandName == "get.group") { command = new GetgroupCommand(); } + else if(commandName == "get.label") { command = new GetlabelCommand(); } + else if(commandName == "get.line") { command = new GetlineCommand(); } + else if(commandName == "libshuff") { command = new LibShuffCommand(); } else { command = new NoCommand(); } return command; diff --git a/coverage.cpp b/coverage.cpp index e7baf00..ba2167a 100644 --- a/coverage.cpp +++ b/coverage.cpp @@ -10,21 +10,139 @@ #include "coverage.h" //********************************************************************************************************************** -vector< vector > Coverage::getValues(FullMatrix*, float) { - try { +Coverage::Coverage() { globaldata = GlobalData::getInstance(); - numGroups = globaldata->Groups.size(); + numUserGroups = globaldata->Groups.size(); + numGroups = globaldata->gGroupmap->getNumGroups(); +} +//********************************************************************************************************************** +void Coverage::getValues(FullMatrix* matrix, float d, vector< vector >& data) { + try { + vector min; + vector groups; //initialize data - data.resize(numGroups); + data.resize(numUserGroups); for (int l = 0; l < data.size(); l++) { data[l].push_back(0.0); } /**************************************/ - //get the minumums for each comparision + //get the minimums for each comparision /**************************************/ - return data; + int count = 0; + int a = 0; + int b = 0; + for (int i = 0; i < numGroups; i++) { + for (int j = 0; j < numGroups; j++) { + + //is this "box" one hte user wants analyzed? + if ((inUsersGroups(globaldata->gGroupmap->namesOfGroups[i], globaldata->Groups) == true) && (inUsersGroups(globaldata->gGroupmap->namesOfGroups[j], globaldata->Groups) == true)) { + + min = matrix->getMins(count); //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB; + + //find the coverage at this distance + sort(min.begin(), min.end()); +//cout << "minvector for : " << globaldata->gGroupmap->namesOfGroups[i] + globaldata->gGroupmap->namesOfGroups[j] << endl; +//for(int h = 0; h d) { index = m; break; } + } + + // if you don't find one than all the mins are less than d + if (index == -1) { index = min.size(); } + + //save value in data + data[a][b] = 1.0 - ((min.size()-index)/(float)min.size()); +//cout << "D = " << d << "data " << a << b << " = " << data[a][b] << endl; + if (b < numUserGroups-1) { b++; } + else{ //you are moving to a new row of "boxes" + b = 0; + a++; + } + } + count++; + } + } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the Coverage class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the Coverage class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +//********************************************************************************************************************** +//For the random matrices +void Coverage::getValues(FullMatrix* matrix, float d, vector< vector >& data, string r) { + try { + vector min; + vector groups; + + //initialize data + data.resize(numUserGroups); + for (int l = 0; l < data.size(); l++) { + data[l].push_back(0.0); + } + + /**************************************/ + //get the minimums for each comparision + /**************************************/ + int count = 0; + int a = 0; + int b = 0; + for (int i = 0; i < numGroups; i++) { + for (int j = 0; j < numGroups; j++) { + + //is this "box" one hte user wants analyzed? + if ((inUsersGroups(globaldata->gGroupmap->namesOfGroups[i], globaldata->Groups) == true) && (inUsersGroups(globaldata->gGroupmap->namesOfGroups[j], globaldata->Groups) == true)) { +cout << "combo " << a << b << endl; +cout << "original matrix mins4rows " << endl; +matrix->printMinsForRows(cout); + //create random matrix for this comparison + matrix->shuffle(count); + + min = matrix->getMins(count); //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB; +cout << "shuffled matrix mins4rows " << endl; +matrix->printMinsForRows(cout); + + //find the coverage at this distance + sort(min.begin(), min.end()); + + int index = -1; + //find index in min where value is higher than d + for (int m = 0; m < min.size(); m++) { + if (min[m] > d) { index = m; break; } + } + + // if you don't find one than all the mins are less than d + if (index == -1) { index = min.size(); } + + //save value in data + data[a][b] = 1.0 - ((min.size()-index)/(float)min.size()); +cout << "D = " << d << "data " << a << b << " = " << data[a][b] << endl; + if (b < numUserGroups-1) { b++; } + else{ //you are moving to a new row of "boxes" + b = 0; + a++; + } + } + count++; + + //restore matrix to original form for next shuffle + matrix->restore(); +min = matrix->getMins(count-1); +cout << "restored matrix mins4rows " << endl; +matrix->printMinsForRows(cout); + } + } } catch(exception& e) { cout << "Standard Error: " << e.what() << " has occurred in the Coverage class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; diff --git a/coverage.h b/coverage.h index c556bb2..7e64300 100644 --- a/coverage.h +++ b/coverage.h @@ -21,14 +21,14 @@ using namespace std; class Coverage { public: - Coverage(){}; + Coverage(); ~Coverage(){}; - vector< vector > getValues(FullMatrix*, float); + void getValues(FullMatrix*, float, vector< vector >&); + void getValues(FullMatrix*, float, vector< vector >&, string); //for random matrices private: GlobalData* globaldata; - vector< vector > data; - int numGroups; + int numGroups, numUserGroups; }; diff --git a/errorchecking.cpp b/errorchecking.cpp index 430729d..397bbd2 100644 --- a/errorchecking.cpp +++ b/errorchecking.cpp @@ -355,7 +355,9 @@ bool ErrorCheck::checkInput(string input) { }else if (listfile != "") { //you want to do single commands validateReadFiles(); validateReadPhil(); - }else {//you are reading a shared file + }else if ((listfile == "") && (sharedfile == "")) { + cout << "You must enter either a listfile or a sharedfile with the read.otu command. " << endl; return false; + }else{//you are reading a shared file validateReadFiles(); } }else if (commandName == "read.tree") { @@ -372,6 +374,10 @@ bool ErrorCheck::checkInput(string input) { errorFree = false; } + if ((commandName == "libshuff") && (globaldata->gMatrix == NULL)) { + cout << "You must read in a matrix before you use the libshuff command. " << endl; return false; + } + if (commandName == "parsimony") { //are you trying to use parsimony without reading a tree or saying you want random distribution if (randomtree == "") { @@ -429,7 +435,7 @@ void ErrorCheck::validateReadFiles() { //unable to open if (ableToOpen == 1) { errorFree = false; } else { globaldata->inputFileName = phylipfile; } - //are we reading a phylipfile + //are we reading a columnfile }else if (columnfile != "") { ableToOpen = openInputFile(columnfile, filehandle); filehandle.close(); @@ -600,6 +606,13 @@ void ErrorCheck::validateReadDist() { ifstream filehandle; int ableToOpen; + if (groupfile != "") { + ableToOpen = openInputFile(groupfile, filehandle); + filehandle.close(); + //unable to open + if (ableToOpen == 1) { errorFree = false; } + } + if ((phylipfile == "") && (columnfile == "")) { cout << "When executing a read.dist you must enter a phylip or a column." << endl; errorFree = false; } else if ((phylipfile != "") && (columnfile != "")) { cout << "When executing a read.dist you must enter ONLY ONE of the following: phylip or column." << endl; errorFree = false; } diff --git a/fullmatrix.cpp b/fullmatrix.cpp index 6151a65..6c09639 100644 --- a/fullmatrix.cpp +++ b/fullmatrix.cpp @@ -78,7 +78,6 @@ void FullMatrix::readSquareMatrix(ifstream& filehandle) { reading = new Progress("Reading matrix: ", numSeqs * numSeqs); int count = 0; - float distance; string group, name; @@ -92,9 +91,8 @@ void FullMatrix::readSquareMatrix(ifstream& filehandle) { if(group == "not found") { cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl; exit(1); } for(int j=0;j> distance; - - matrix[i][j] = distance; + filehandle >> matrix[i][j]; + count++; reading->update(count); } @@ -161,7 +159,7 @@ void FullMatrix::sortGroups(int low, int high){ int i = low; int j = high; - int y = 0; + float y = 0; string name; /* compare value */ @@ -235,7 +233,7 @@ void FullMatrix::printMatrix(ostream& out) { for (int i = 0; i < numSeqs; i++) { out << "row " << i << " group = " << index[i].groupname << " name = " << index[i].seqName << endl; for (int j = 0; j < numSeqs; j++) { - out << matrix[i][j] << " "; + //out << matrix[i][j] << " "; } out << endl; } @@ -252,57 +250,105 @@ void FullMatrix::printMatrix(ostream& out) { } /**************************************************************************/ -void FullMatrix::getMinsForRowsVectors(){ +void FullMatrix::setBounds(){ try{ numGroups = globaldata->gGroupmap->namesOfGroups.size(); //sort globaldata->gGroupmap.namesOfGroups so that it will match the matrix sort(globaldata->gGroupmap->namesOfGroups.begin(), globaldata->gGroupmap->namesOfGroups.end()); + //one for each comparision + //minsForRows.resize(numGroups*numGroups); + /*************************************************/ //find where in matrix each group starts and stops /*************************************************/ - vector bounds; //bounds[1] = starting row in matrix from group B, bounds[2] = starting row in matrix from group C, bounds[3] = no need to find upper bound of C because its numSeqs. bounds.resize(numGroups); bounds[0] = 0; - bounds[numGroups] = numSeqs-1; + bounds[numGroups] = numSeqs; + //for each group find bounds of subgroup/comparison for (int i = 1; i < numGroups; i++) { - getBounds(bounds[i], globaldata->gGroupmap->namesOfGroups[i]); + getBounds(bounds[i], globaldata->gGroupmap->namesOfGroups[i-1]); } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the FullMatrix class function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + +} +/**************************************************************************/ +vector FullMatrix::getMins(int x) { + try{ + //clear out old data + minsForRows.clear(); + /************************************************************/ - //fill the minsForRows vectors for each group the user wants + //fill the minsForRows vector for the box the user wants /************************************************************/ - int countx = bounds[1]; //where second group starts - int county = bounds[1]; + int count = 0; + int lowBoundx = bounds[0]; //where first group starts + int lowBoundy = bounds[0]; + int highBoundx = bounds[1]; //where second group starts + int highBoundy = bounds[1]; - //go through the entire matrix - for (int x = 0; x < numSeqs; x++) { - for (int y = 0; y < numSeqs; y++) { - //if have not changed groups - if ((x < countx) && (y < county)) { - - } + int countx = 1; //index in bound + int county = 1; //index in bound + + //find the bounds for the box the user wants + for (int i = 0; i < (numGroups * numGroups); i++) { + + //are you at the box? + if (count == x) { break; } + else { count++; } + + //move to next box + if (county < numGroups) { + county++; + highBoundy = bounds[county]; + lowBoundy = bounds[county-1]; + }else{ //you are moving to a new row of "boxes" + county = 1; + countx++; + highBoundx = bounds[countx]; + lowBoundx = bounds[countx-1]; + highBoundy = bounds[county]; + lowBoundy = bounds[county-1]; } } - + //each row in the box + for (int x = lowBoundx; x < highBoundx; x++) { + float min4Row = 100000.0; + //each entry in that row + for (int y = lowBoundy; y < highBoundy; y++) { + //if you are not on the diagonal and you are less than previous minimum + if ((x != y) && (matrix[x][y] < min4Row)) { + min4Row = matrix[x][y]; + } + } + //save minimum value for that row in minsForRows vector of vectors + minsForRows.push_back(min4Row); + } - + return minsForRows; } catch(exception& e) { - cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getMins. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } catch(...) { - cout << "An unknown error has occurred in the FullMatrix class function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + cout << "An unknown error has occurred in the FullMatrix class function getMins. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; exit(1); } - } - /**************************************************************************/ void FullMatrix::getBounds(int& higher, string group) { try{ @@ -311,7 +357,7 @@ void FullMatrix::getBounds(int& higher, string group) { //for each group find bounds of subgroup/comparison for (it = index.begin(); it != index.end(); it++) { if (it->second.groupname == group) { - if (gotLower != true) { gotLower = true; } + gotLower = true; }else if ((gotLower == true) && (it->second.groupname != group)) { higher = it->first; break; } } @@ -327,3 +373,215 @@ void FullMatrix::getBounds(int& higher, string group) { } +/**************************************************************************/ +//print out matrix +void FullMatrix::printMinsForRows(ostream& out) { + try{ + for (int j = 0; j < minsForRows.size(); j++) { + out << minsForRows[j] << " "; + } + out << endl; + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the FullMatrix class function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + +} +/**************************************************************************/ +//shuffles the sequences in the 2 groups passed in. +void FullMatrix::shuffle(int box){ + try{ + vector rows2Swap; + vector shuffled; + float y = 0; + string name = ""; + + /****************************/ + //find the box the user wants + /****************************/ + int count = 0; + int lowBoundy = bounds[0]; //where first group starts + int highBoundy = bounds[1]; //where second group starts + int county = 1; //index in bound + + //find the bounds for the box the user wants + for (int i = 0; i < (numGroups * numGroups); i++) { + + //are you at the box? + if (count == box) { break; } + else { count++; } + + //move to next box + if (county < numGroups) { + county++; + highBoundy = bounds[county]; + lowBoundy = bounds[county-1]; + }else{ //you are moving to a new row of "boxes" + county = 1; + highBoundy = bounds[county]; + lowBoundy = bounds[county-1]; + } + } + + /************************/ + //save its rows locations + /************************/ + //go through the matrix map to find the rows from groups you want to randomize + for (int y = lowBoundy; y < highBoundy; y++) { + rows2Swap.push_back(y); + shuffled.push_back(y); + } + + //randomize rows to shuffle in shuffled + random_shuffle(shuffled.begin(), shuffled.end()); + + /***************************************/ + //swap rows and columns to randomize box + /***************************************/ + for (int i = 0; i < shuffled.size(); i++) { + //record the swaps you are making so you can undo them in restore function + restoreIndex[i].a = shuffled[i]; + restoreIndex[i].b = rows2Swap[i]; + + /* swap rows*/ + for (int h = 0; h < numSeqs; h++) { + y = matrix[shuffled[i]][h]; + matrix[shuffled[i]][h] = matrix[rows2Swap[i]][h]; + matrix[rows2Swap[i]][h] = y; + } + + /* swap columns */ + for (int b = 0; b < numSeqs; b++) { + y = matrix[b][shuffled[i]]; + matrix[b][shuffled[i]] = matrix[b][rows2Swap[i]]; + matrix[b][rows2Swap[i]] = y; + } + + //swap map elements + name = index[shuffled[i]].seqName; + index[shuffled[i]].seqName = index[rows2Swap[i]].seqName; + index[rows2Swap[i]].seqName = name; + } + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function shuffle. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the FullMatrix class function shuffle. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/**************************************************************************/ +//unshuffles the matrix. +void FullMatrix::restore(){ + try{ + float y = 0; + string name = ""; + + //reverse iterate through swaps and undo them to restore original matrix and index map. + for(it2 = restoreIndex.rbegin(); it2 != restoreIndex.rend(); it2++) { + /* swap rows */ + for (int h = 0; h < numSeqs; h++) { + y = matrix[it2->second.a][h]; + matrix[it2->second.a][h] = matrix[it2->second.b][h]; + matrix[it2->second.b][h] = y; + } + + /* swap columns */ + for (int b = 0; b < numSeqs; b++) { + y = matrix[b][it2->second.a]; + matrix[b][it2->second.a] = matrix[b][it2->second.b]; + matrix[b][it2->second.b] = y; + } + + + //swap map elements + name = index[it2->second.a].seqName; + index[it2->second.a].seqName = index[it2->second.b].seqName; + index[it2->second.b].seqName = name; + } + + //clear restore for next shuffle + restoreIndex.clear(); + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the FullMatrix class function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} +/**************************************************************************/ +void FullMatrix::getDist(vector& distances) { + try{ + map dist; //holds the distances for the integral form + map::iterator it; + + /************************************************************/ + //fill the minsForRows vectors for each group the user wants + /************************************************************/ + int lowBoundx = bounds[0]; //where first group starts + int lowBoundy = bounds[0]; + int highBoundx = bounds[1]; //where second group starts + int highBoundy = bounds[1]; + + int countx = 1; //index in bound + int county = 1; //index in bound + + //go through each "box" in the matrix + for (int i = 0; i < (numGroups * numGroups); i++) { + //each row in the box + for (int x = lowBoundx; x < highBoundx; x++) { + float min4Row = 100000.0; + //each entry in that row + for (int y = lowBoundy; y < highBoundy; y++) { + //if you are not on the diagonal and you are less than previous minimum + if ((x != y) && (matrix[x][y] < min4Row)){ + min4Row = matrix[x][y]; + } + } + //save minimum value + dist[min4Row] = min4Row; + } + + //****** reset bounds to process next "box" ******** + //if you still have more "boxes" in that row + if (county < numGroups) { + county++; + highBoundy = bounds[county]; + lowBoundy = bounds[county-1]; + }else{ //you are moving to a new row of "boxes" + county = 1; + countx++; + highBoundx = bounds[countx]; + lowBoundx = bounds[countx-1]; + highBoundy = bounds[county]; + lowBoundy = bounds[county-1]; + } + } + + //store distances in users vector + for (it = dist.begin(); it != dist.end(); it++) { + distances.push_back(it->first); + } + + } + catch(exception& e) { + cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } + catch(...) { + cout << "An unknown error has occurred in the FullMatrix class function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n"; + exit(1); + } +} + diff --git a/fullmatrix.h b/fullmatrix.h index 43bde96..b744490 100644 --- a/fullmatrix.h +++ b/fullmatrix.h @@ -21,6 +21,11 @@ struct Names { string seqName; }; +struct Swap { + int a; + int b; +}; + class FullMatrix { @@ -31,17 +36,29 @@ class FullMatrix { int getNumSeqs(); void printMatrix(ostream&); - void getMinsForRowsVectors(); //requires globaldata->Groups to be filled - + void setBounds(); //requires globaldata->Groups to be filled + vector getMins(int); //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB; + void getDist(vector&); //fills a vector with the valid distances for the integral form. + void shuffle(int); //shuffles the sequences in the box passed in. + void restore(); //unshuffles the matrix. + + + void printMinsForRows(ostream&); private: void sortGroups(int, int); //this function sorts the sequences within the matrix. void getBounds(int&, string); void readSquareMatrix(ifstream&); void readLTMatrix(ifstream&); - vector< vector > matrix; //a 2D distance matrix of all the sequences and their distances to eachother. - vector< vector > minsForRows; //vector< minimum distance for that subrow> -one for each comparison. + map index; // row in vector, sequence group. need to know this so when we sort it can be updated. + map restoreIndex; //a map of the swaps made so you can undo them in restore. map::iterator it; + map::reverse_iterator it2; + + vector< vector > matrix; //a 2D distance matrix of all the sequences and their distances to eachother. + vector minsForRows; //vector< minimum distance for that subrow> - one for each comparison. + vector bounds; //bounds[1] = starting row in matrix from group B, bounds[2] = starting row in matrix from group C, bounds[3] = no need to find upper bound of C because its numSeqs. + GroupMap* groupmap; //maps sequences to groups they belong to. GlobalData* globaldata; int numSeqs, numGroups, numUserGroups; diff --git a/globaldata.cpp b/globaldata.cpp index 17b4908..a0ccdcb 100644 --- a/globaldata.cpp +++ b/globaldata.cpp @@ -39,6 +39,11 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ helpRequest = optionText; } + if (commandName == "libshuff") { + iters = "10000"; + cutoff = "1.0"; + } + string key, value; //reads in parameters and values if((optionText != "") && (commandName != "help")){ @@ -66,7 +71,10 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "fileroot" ) { fileroot = value; } if (key == "abund" ) { abund = value; } if (key == "random" ) { randomtree = value; } - if (key == "calc") { calc = value; } + if (key == "calc") { calc = value; } + if (key == "step") { step = value; } + if (key == "form") { form = value; } + if (key == "line") {//stores lines to be used in a set @@ -116,7 +124,8 @@ void GlobalData::parseGlobalData(string commandString, string optionText){ if (key == "abund" ) { abund = value; } if (key == "random" ) { randomtree = value; } if (key == "calc") { calc = value; } - + if (key == "step") { step = value; } + if (key == "form") { form = value; } if (key == "line") {//stores lines to be used in a vector lines.clear(); @@ -218,6 +227,8 @@ string GlobalData::getFreq() { return freq; } string GlobalData::getAbund() { return abund; } string GlobalData::getRandomTree() { return randomtree; } string GlobalData::getGroups() { return groups; } +string GlobalData::getStep() { return step; } +string GlobalData::getForm() { return form; } void GlobalData::setListFile(string file) { listfile = file; inputFileName = file;} void GlobalData::setRabundFile(string file) { rabundfile = file; inputFileName = file;} void GlobalData::setSabundFile(string file) { sabundfile = file; inputFileName = file;} @@ -266,6 +277,8 @@ void GlobalData::clear() { method = "furthest"; fileroot = ""; abund = "10"; + step = "0.01"; + form = "integral"; } //*******************************************************/ @@ -281,7 +294,9 @@ void GlobalData::reset() { freq = "100"; method = "furthest"; calc = ""; - abund = "10"; + abund = "10"; + step = "0.01"; + form = "integral"; } /*******************************************************/ diff --git a/globaldata.hpp b/globaldata.hpp index 6070dd7..16bc8d3 100644 --- a/globaldata.hpp +++ b/globaldata.hpp @@ -60,6 +60,8 @@ public: string getAbund(); string getRandomTree(); string getGroups(); + string getStep(); + string getForm(); void setListFile(string); void setPhylipFile(string); @@ -80,7 +82,7 @@ public: private: string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, treefile, sharedfile, line, label, randomtree, groups; - string cutoff, format, precision, method, fileroot, iters, jumble, freq, calc, abund; + string cutoff, format, precision, method, fileroot, iters, jumble, freq, calc, abund, step, form; static GlobalData* _uniqueInstance; GlobalData( const GlobalData& ); // Disable copy constructor diff --git a/libshuffcommand.cpp b/libshuffcommand.cpp index a34fb36..bd01662 100644 --- a/libshuffcommand.cpp +++ b/libshuffcommand.cpp @@ -17,6 +17,8 @@ LibShuffCommand::LibShuffCommand(){ globaldata = GlobalData::getInstance(); convert(globaldata->getCutOff(), cutOff); convert(globaldata->getIters(), iters); + convert(globaldata->getStep(), step); + form = globaldata->getForm(); matrix = globaldata->gMatrix; coverageFile = getRootName(globaldata->getPhylipFile()) + "coverage"; summaryFile = getRootName(globaldata->getPhylipFile()) + "slsummary"; @@ -70,19 +72,28 @@ int LibShuffCommand::execute(){ try { //deltaValues[0] = scores for the difference between AA and AB. //cValues[0][0] = AA, cValues[0][1] = AB, cValues[0][2] = AC, cValues[1][0] = BA, cValues[1][1] = BB... + vector dist; + int next; sumDelta.resize(numComp-numGroups, 0.0); - - float D = 0.0; + float D = 0.0; + /*****************************/ //get values for users matrix /*****************************/ - matrix->getMinsForRowsVectors(); + matrix->setBounds(); + if (form != "discrete") { matrix->getDist(dist); next = 1; } +//cout << "Distances" << endl; +//for (int i = 0; i < dist.size(); i++) { cout << dist[i] << " "; } +//cout << endl; + //get values for users matrix while (D <= cutOff) { - cValues = coverage->getValues(matrix, D); + //clear out old Values + deltaValues.clear(); + coverage->getValues(matrix, D, cValues); //find delta values int count = 0; @@ -98,15 +109,33 @@ int LibShuffCommand::execute(){ } } - D += 0.01; - printCoverageFile(D); - //clear out old Values - cValues.clear(); - deltaValues.clear(); + //check form + if (form != "discrete") { + if (next == dist.size()) { break; } + else { D = dist[next]; next++; } + }else { D += step; } + + } + //output sum Deltas + for (int i = 0; i < numGroups; i++) { + for (int j = 0; j < numGroups; j++) { + //don't output AA to AA + if (i != j) { + cout << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'; + } + } + } + cout << endl; + + for (int i = 0; i < sumDelta.size(); i++) { + cout << setprecision(6) << sumDelta[i] << '\t'; + } + cout << endl; + /*******************************************************************************/ //create and score random matrixes finding the sumDelta values for summary file /******************************************************************************/ @@ -119,11 +148,16 @@ int LibShuffCommand::execute(){ } } + for (int m = 0; m < iters; m++) { //generate random matrix in getValues //values for random matrix + cout << "Iteration " << m+1 << endl; + D = 0.0; + next = 1; + while (D <= cutOff) { - cValues = coverage->getValues(matrix, D); + coverage->getValues(matrix, D, cValues, "random"); //find delta values int count = 0; @@ -132,17 +166,29 @@ int LibShuffCommand::execute(){ //don't save AA to AA if (i != j) { //(Caa - Cab)^2 - rsumDelta[count][m] += (abs(cValues[i][i]-cValues[i][j]) * abs(cValues[i][i]-cValues[i][j])); + rsumDelta[count][m] += ((abs(cValues[i][i]-cValues[i][j]) * abs(cValues[i][i]-cValues[i][j]))); +//cout << "iter " << m << " box " << i << j << " delta = " << ((abs(cValues[i][i]-cValues[i][j]) * abs(cValues[i][i]-cValues[i][j]))) << endl; count++; } } } - - D += 0.01; + + //check form + if (form != "discrete") { + if (next == dist.size()) { break; } + else { D = dist[next]; next++; } + }else { D += step; } + //clear out old Values cValues.clear(); } +cout << "random sum delta for iter " << m << endl; +for (int i = 0; i < rsumDelta.size(); i++) { + cout << setprecision(6) << rsumDelta[i][m] << '\t'; +} +cout << endl; + } /**********************************************************/ @@ -186,7 +232,7 @@ void LibShuffCommand::printCoverageFile(float d) { //format output out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint); - out << setprecision(globaldata->getIters().length()) << d << '\t'; + out << setprecision(6) << d << '\t'; //print out coverage values for (int i = 0; i < numGroups; i++) { @@ -222,18 +268,22 @@ void LibShuffCommand::printSummaryFile() { for (int j = 0; j < numGroups; j++) { //don't output AA to AA if (i != j) { - outSum << "Delta" + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig" + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'; + outSum << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'; + cout << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'; } } } outSum << endl; + cout << endl; //print out delta values for (int i = 0; i < sumDelta.size(); i++) { outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t'; + cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t'; } outSum << endl; + cout << endl; } catch(exception& e) { @@ -283,6 +333,9 @@ void LibShuffCommand::setGroups() { } } + //sort so labels match + sort(globaldata->Groups.begin(), globaldata->Groups.end()); + // number of comparisons i.e. with groups A,B,C = AA, AB, AC, BA, BB, BC...; for (int i=0; i