]> git.donarmstrong.com Git - mothur.git/commitdiff
working on libshuff
authorwestcott <westcott>
Wed, 18 Mar 2009 16:05:36 +0000 (16:05 +0000)
committerwestcott <westcott>
Wed, 18 Mar 2009 16:05:36 +0000 (16:05 +0000)
12 files changed:
commandfactory.cpp
coverage.cpp
coverage.h
errorchecking.cpp
fullmatrix.cpp
fullmatrix.h
globaldata.cpp
globaldata.hpp
libshuffcommand.cpp
libshuffcommand.h
validcommands.cpp
validparameter.cpp

index a5ec5f12c48059089496af3a39d26b0fedb12f52..19022040cc507e29ec6019442638564ad0ce6048 100644 (file)
@@ -30,6 +30,7 @@
 #include "parsimonycommand.h"
 #include "unifracunweightedcommand.h"
 #include "unifracweightedcommand.h"
+#include "libshuffcommand.h"
 #include "mothur.h"
 
 
@@ -71,9 +72,10 @@ Command* CommandFactory::getCommand(string commandName){
                else if(commandName == "summary.shared")                {       command = new SummarySharedCommand();           }
                else if(commandName == "unifrac.weighted")              {       command = new UnifracWeightedCommand();         }
                else if(commandName == "unifrac.unweighted")    {       command = new UnifracUnweightedCommand();       }
-               else if(commandName == "get.group")             {   command = new GetgroupCommand();        }
-               else if(commandName == "get.label")             {   command = new GetlabelCommand();        }
-               else if(commandName == "get.line")              {   command = new GetlineCommand();         }
+               else if(commandName == "get.group")             {   command = new GetgroupCommand();                    }
+               else if(commandName == "get.label")             {   command = new GetlabelCommand();                    }
+               else if(commandName == "get.line")              {   command = new GetlineCommand();                             }
+               else if(commandName == "libshuff")              {   command = new LibShuffCommand();                    }
                else                                                                                    {       command = new NoCommand();                                      }
 
                return command;
index e7baf001fefe4d5de552c1cd6362eaa2b1333d2c..ba2167a10fbdfd321395446fb1273e67402ca48c 100644 (file)
 #include "coverage.h"
 
 //**********************************************************************************************************************
-vector< vector<float> > Coverage::getValues(FullMatrix*, float) {
-       try {
+Coverage::Coverage() {
                globaldata = GlobalData::getInstance();
-               numGroups = globaldata->Groups.size();
+               numUserGroups = globaldata->Groups.size();
+               numGroups = globaldata->gGroupmap->getNumGroups();
+}
+//**********************************************************************************************************************
+void Coverage::getValues(FullMatrix* matrix, float d, vector< vector<float> >& data) {
+       try {
+               vector<float> min;
+               vector<string> groups;
                
                //initialize data
-               data.resize(numGroups);
+               data.resize(numUserGroups);
                for (int l = 0; l < data.size(); l++) {
                        data[l].push_back(0.0);
                }
 
                /**************************************/
-               //get the minumums for each comparision
+               //get the minimums for each comparision
                /**************************************/
-               return data;
+               int count = 0;
+               int a = 0;
+               int b = 0;
+               for (int i = 0; i < numGroups; i++) {
+                       for (int j = 0; j < numGroups; j++) {
+                               
+                               //is this "box" one hte user wants analyzed?
+                               if ((inUsersGroups(globaldata->gGroupmap->namesOfGroups[i], globaldata->Groups) == true) && (inUsersGroups(globaldata->gGroupmap->namesOfGroups[j], globaldata->Groups) == true)) {
+                                       
+                                       min = matrix->getMins(count); //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB;
+                                       
+                                       //find the coverage at this distance
+                                       sort(min.begin(), min.end());
+//cout << "minvector for : " << globaldata->gGroupmap->namesOfGroups[i] + globaldata->gGroupmap->namesOfGroups[j] << endl;
+//for(int h = 0; h<min.size(); h++) {
+// cout << min[h] << " ";
+//}
+//cout << endl;
+                                       int index = -1;
+                                       //find index in min where value is higher than d
+                                       for (int m = 0; m < min.size(); m++) {
+                                               if (min[m] > d) { index = m; break;     }
+                                       }
+                                       
+                                       // if you don't find one than all the mins are less than d
+                                       if (index == -1) { index = min.size();  }
+                                       
+                                       //save value in data
+                                       data[a][b] = 1.0 - ((min.size()-index)/(float)min.size());
+//cout << "D = " << d << "data " << a << b << " = " << data[a][b] << endl;             
+                                       if (b < numUserGroups-1) {  b++;  }
+                                       else{ //you are moving to a new row of "boxes"
+                                               b = 0;
+                                               a++;
+                                       }
+                               }
+                               count++;
+                       }
+               }
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the Coverage class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the Coverage class function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }       
+}
+//**********************************************************************************************************************
+//For the random matrices
+void Coverage::getValues(FullMatrix* matrix, float d, vector< vector<float> >& data, string r) {
+       try {
+               vector<float> min;
+               vector<string> groups;
+               
+               //initialize data
+               data.resize(numUserGroups);
+               for (int l = 0; l < data.size(); l++) {
+                       data[l].push_back(0.0);
+               }
+
+               /**************************************/
+               //get the minimums for each comparision
+               /**************************************/
+               int count = 0;
+               int a = 0;
+               int b = 0;
+               for (int i = 0; i < numGroups; i++) {
+                       for (int j = 0; j < numGroups; j++) {
+                       
+                               //is this "box" one hte user wants analyzed?
+                               if ((inUsersGroups(globaldata->gGroupmap->namesOfGroups[i], globaldata->Groups) == true) && (inUsersGroups(globaldata->gGroupmap->namesOfGroups[j], globaldata->Groups) == true)) {
+cout << "combo " << a << b << endl;
+cout << "original matrix mins4rows " << endl;
+matrix->printMinsForRows(cout);                                        
+                                       //create random matrix for this comparison
+                                       matrix->shuffle(count);
+                       
+                                       min = matrix->getMins(count);  //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB;
+cout << "shuffled matrix mins4rows " << endl;
+matrix->printMinsForRows(cout);
+
+                                       //find the coverage at this distance
+                                       sort(min.begin(), min.end());
+                                       
+                                       int index = -1;
+                                       //find index in min where value is higher than d
+                                       for (int m = 0; m < min.size(); m++) {
+                                               if (min[m] > d) { index = m; break;     }
+                                       }
+                                       
+                                       // if you don't find one than all the mins are less than d
+                                       if (index == -1) { index = min.size();  }
+                                       
+                                       //save value in data
+                                       data[a][b] = 1.0 - ((min.size()-index)/(float)min.size());
+cout << "D = " << d << "data " << a << b << " = " << data[a][b] << endl;               
+                                       if (b < numUserGroups-1) {  b++;  }
+                                       else{ //you are moving to a new row of "boxes"
+                                               b = 0;
+                                               a++;
+                                       }
+                               }
+                               count++;
+                               
+                               //restore matrix to original form for next shuffle
+                               matrix->restore();
+min = matrix->getMins(count-1); 
+cout << "restored matrix mins4rows " << endl;
+matrix->printMinsForRows(cout);
+                       }
+               }
        }
        catch(exception& e) {
                cout << "Standard Error: " << e.what() << " has occurred in the Coverage class Function getValues. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
index c556bb206c821d5b8c3245ca09fc0ff1b4a8bc21..7e6430019551b7db843ba178815eb5fbfeef5ae8 100644 (file)
@@ -21,14 +21,14 @@ using namespace std;
 class Coverage  {
        
        public: 
-               Coverage(){};
+               Coverage();
                ~Coverage(){};
-               vector< vector<float> > getValues(FullMatrix*, float);  
+               void getValues(FullMatrix*, float, vector< vector<float> >&);
+               void getValues(FullMatrix*, float, vector< vector<float> >&, string);   //for random matrices
                
        private:
                GlobalData* globaldata;
-               vector< vector<float> > data;
-               int numGroups;
+               int numGroups, numUserGroups;
 
 };
 
index 430729d026ec5082416d2cdc118413dee290e48d..397bbd2315fff983687a1723cca46ab7808bf8b3 100644 (file)
@@ -355,7 +355,9 @@ bool ErrorCheck::checkInput(string input) {
                        }else if (listfile != "") { //you want to do single commands
                                validateReadFiles();
                                validateReadPhil();
-                       }else {//you are reading a shared file
+                       }else if ((listfile == "") && (sharedfile == "")) {
+                               cout << "You must enter either a listfile or a sharedfile with the read.otu command. " << endl; return false; 
+                       }else{//you are reading a shared file
                                validateReadFiles();
                        }
                }else if (commandName == "read.tree") { 
@@ -372,6 +374,10 @@ bool ErrorCheck::checkInput(string input) {
                                errorFree = false;
                } 
                
+               if ((commandName == "libshuff") && (globaldata->gMatrix == NULL)) {
+                        cout << "You must read in a matrix before you use the libshuff command. " << endl; return false; 
+               }
+               
                if (commandName == "parsimony") {
                        //are you trying to use parsimony without reading a tree or saying you want random distribution
                        if (randomtree == "")  {
@@ -429,7 +435,7 @@ void ErrorCheck::validateReadFiles() {
                        //unable to open
                        if (ableToOpen == 1) { errorFree = false; }
                        else { globaldata->inputFileName = phylipfile; }
-               //are we reading a phylipfile
+               //are we reading a columnfile
                }else if (columnfile != "") {
                        ableToOpen = openInputFile(columnfile, filehandle);
                        filehandle.close();
@@ -600,6 +606,13 @@ void ErrorCheck::validateReadDist() {
                ifstream filehandle;
                int ableToOpen;
                
+               if (groupfile != "") {
+                       ableToOpen = openInputFile(groupfile, filehandle);
+                       filehandle.close();
+                       //unable to open
+                       if (ableToOpen == 1) {  errorFree = false; }
+               }
+               
                if ((phylipfile == "") && (columnfile == "")) { cout << "When executing a read.dist you must enter a phylip or a column." << endl; errorFree = false; }
                else if ((phylipfile != "") && (columnfile != "")) { cout << "When executing a read.dist you must enter ONLY ONE of the following: phylip or column." << endl; errorFree = false; }
                
index 6151a65202901fa24ec6ff91166017a682483c62..6c09639fcbb4c48497ba2b17a61851ba2435c6af 100644 (file)
@@ -78,7 +78,6 @@ void FullMatrix::readSquareMatrix(ifstream& filehandle) {
                reading = new Progress("Reading matrix:    ", numSeqs * numSeqs);
                
                int count = 0;
-               float distance;
                
                string group, name;
                
@@ -92,9 +91,8 @@ void FullMatrix::readSquareMatrix(ifstream& filehandle) {
                        if(group == "not found") {      cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl; exit(1); }
                                
                        for(int j=0;j<numSeqs;j++){
-                               filehandle >> distance;
-                                       
-                               matrix[i][j] = distance;
+                               filehandle >> matrix[i][j];
+                               
                                count++;
                                reading->update(count);
                        }
@@ -161,7 +159,7 @@ void FullMatrix::sortGroups(int low, int high){
        
                int i = low;
                int j = high;
-               int y = 0;
+               float y = 0;
                string name;
                
                /* compare value */
@@ -235,7 +233,7 @@ void FullMatrix::printMatrix(ostream& out) {
                for (int i = 0; i < numSeqs; i++) {
                        out << "row " << i << " group = " << index[i].groupname << " name = " << index[i].seqName << endl;
                        for (int j = 0; j < numSeqs; j++) {
-                               out << matrix[i][j] << " ";
+                               //out << matrix[i][j] << " ";
                        }
                        out << endl;
                }
@@ -252,57 +250,105 @@ void FullMatrix::printMatrix(ostream& out) {
 }
 
 /**************************************************************************/
-void FullMatrix::getMinsForRowsVectors(){
+void FullMatrix::setBounds(){
        try{
                numGroups = globaldata->gGroupmap->namesOfGroups.size();
                
                //sort globaldata->gGroupmap.namesOfGroups so that it will match the matrix
                sort(globaldata->gGroupmap->namesOfGroups.begin(), globaldata->gGroupmap->namesOfGroups.end());
                
+               //one for each comparision
+               //minsForRows.resize(numGroups*numGroups);
+               
                /*************************************************/
                //find where in matrix each group starts and stops
                /*************************************************/
-               vector<int> bounds;  //bounds[1] = starting row in matrix from group B, bounds[2] = starting row in matrix from group C, bounds[3] = no need to find upper bound of C because its numSeqs.
                bounds.resize(numGroups);
                
                bounds[0] = 0;
-               bounds[numGroups] = numSeqs-1;
+               bounds[numGroups] = numSeqs;
+
                //for each group find bounds of subgroup/comparison
                for (int i = 1; i < numGroups; i++) {
-                       getBounds(bounds[i], globaldata->gGroupmap->namesOfGroups[i]);
+                       getBounds(bounds[i], globaldata->gGroupmap->namesOfGroups[i-1]);
                }
                
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the FullMatrix class function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+
+}
+/**************************************************************************/
+vector<float> FullMatrix::getMins(int x) {
+       try{    
+               //clear out old data
+               minsForRows.clear();
+               
                /************************************************************/
-               //fill the minsForRows vectors for each group the user wants
+               //fill the minsForRows vector for the box the user wants
                /************************************************************/
-               int countx = bounds[1]; //where second group starts
-               int county = bounds[1]; 
+               int count = 0;
+               int lowBoundx = bounds[0]; //where first group starts
+               int lowBoundy = bounds[0]; 
+               int highBoundx = bounds[1]; //where second group starts
+               int highBoundy = bounds[1]; 
                
-               //go through the entire matrix
-               for (int x = 0; x < numSeqs; x++) {
-                       for (int y = 0; y < numSeqs; y++) {
-                               //if have not changed groups
-                               if ((x < countx) && (y < county)) {
-                                       
-                               }
+               int countx = 1;  //index in bound
+               int county = 1; //index in bound
+               
+               //find the bounds for the box the user wants
+               for (int i = 0; i < (numGroups * numGroups); i++) {
+               
+                       //are you at the box?
+                       if (count == x) { break; }
+                       else { count++; }
+                       
+                       //move to next box
+                       if (county < numGroups) {
+                               county++;
+                               highBoundy = bounds[county];
+                               lowBoundy = bounds[county-1];
+                       }else{ //you are moving to a new row of "boxes"
+                               county = 1;
+                               countx++;
+                               highBoundx = bounds[countx];
+                               lowBoundx = bounds[countx-1];
+                               highBoundy = bounds[county];
+                               lowBoundy = bounds[county-1];
                        }
                }
-                                       
                                
+               //each row in the box
+               for (int x = lowBoundx; x < highBoundx; x++) {
+                       float min4Row = 100000.0;
+                       //each entry in that row
+                       for (int y = lowBoundy; y < highBoundy; y++) {
+                               //if you are not on the diagonal and you are less than previous minimum
+                               if ((x != y) && (matrix[x][y] < min4Row)) {
+                                       min4Row = matrix[x][y];
+                               }
+                       }
+                       //save minimum value for that row in minsForRows vector of vectors
+                       minsForRows.push_back(min4Row);
+               }
                        
-       
+               return minsForRows;
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getMins. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
                exit(1);
        }
        catch(...) {
-               cout << "An unknown error has occurred in the FullMatrix class function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               cout << "An unknown error has occurred in the FullMatrix class function getMins. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
                exit(1);
        }
-
 }
-
 /**************************************************************************/
 void FullMatrix::getBounds(int& higher, string group) {
        try{
@@ -311,7 +357,7 @@ void FullMatrix::getBounds(int& higher, string group) {
                //for each group find bounds of subgroup/comparison
                for (it = index.begin(); it != index.end(); it++) {
                        if (it->second.groupname == group) {
-                               if (gotLower != true) { gotLower = true; }
+                               gotLower = true; 
                        }else if ((gotLower == true) && (it->second.groupname != group)) {  higher = it->first; break; }
                }
        
@@ -327,3 +373,215 @@ void FullMatrix::getBounds(int& higher, string group) {
 
 }
 
+/**************************************************************************/
+//print out matrix
+void FullMatrix::printMinsForRows(ostream& out) {
+       try{
+               for (int j = 0; j < minsForRows.size(); j++) {
+                       out << minsForRows[j] << " ";
+               }
+               out << endl;
+
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the FullMatrix class function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+
+}
+/**************************************************************************/
+//shuffles the sequences in the 2 groups passed in.
+void FullMatrix::shuffle(int box){
+       try{
+               vector<int> rows2Swap;
+               vector<int> shuffled;
+               float y = 0;
+               string name = "";
+               
+               /****************************/
+               //find the box the user wants
+               /****************************/
+               int count = 0;
+               int lowBoundy = bounds[0]; //where first group starts
+               int highBoundy = bounds[1]; //where second group starts
+               int county = 1; //index in bound
+               
+               //find the bounds for the box the user wants
+               for (int i = 0; i < (numGroups * numGroups); i++) {
+               
+                       //are you at the box?
+                       if (count == box) { break; }
+                       else { count++; }
+                       
+                       //move to next box
+                       if (county < numGroups) {
+                               county++;
+                               highBoundy = bounds[county];
+                               lowBoundy = bounds[county-1];
+                       }else{ //you are moving to a new row of "boxes"
+                               county = 1;
+                               highBoundy = bounds[county];
+                               lowBoundy = bounds[county-1];
+                       }
+               }
+       
+               /************************/
+               //save its rows locations
+               /************************/
+               //go through the matrix map to find the rows from groups you want to randomize
+               for (int y = lowBoundy; y < highBoundy; y++) {
+                       rows2Swap.push_back(y);
+                       shuffled.push_back(y);
+               }
+               
+               //randomize rows to shuffle in shuffled
+               random_shuffle(shuffled.begin(), shuffled.end());
+               
+               /***************************************/
+               //swap rows and columns to randomize box
+               /***************************************/
+               for (int i = 0; i < shuffled.size(); i++) {
+                       //record the swaps you are making so you can undo them in restore function
+                       restoreIndex[i].a = shuffled[i];
+                       restoreIndex[i].b = rows2Swap[i];
+                       
+                       /* swap rows*/
+                       for (int h = 0; h < numSeqs; h++) {
+                               y = matrix[shuffled[i]][h];
+                               matrix[shuffled[i]][h] = matrix[rows2Swap[i]][h]; 
+                               matrix[rows2Swap[i]][h] = y;
+                       }
+                               
+                       /* swap columns */
+                       for (int b = 0; b < numSeqs; b++) {
+                               y = matrix[b][shuffled[i]];
+                               matrix[b][shuffled[i]] = matrix[b][rows2Swap[i]]; 
+                               matrix[b][rows2Swap[i]] = y;
+                       }
+                               
+                       //swap map elements
+                       name = index[shuffled[i]].seqName;
+                       index[shuffled[i]].seqName = index[rows2Swap[i]].seqName;
+                       index[rows2Swap[i]].seqName = name;
+               }
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function shuffle. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the FullMatrix class function shuffle. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+} 
+/**************************************************************************/
+//unshuffles the matrix.
+void FullMatrix::restore(){
+       try{
+               float y = 0;
+               string name = "";
+
+               //reverse iterate through swaps and undo them to restore original matrix and index map.
+               for(it2 = restoreIndex.rbegin(); it2 != restoreIndex.rend(); it2++) {
+                       /* swap rows */
+                       for (int h = 0; h < numSeqs; h++) {
+                               y = matrix[it2->second.a][h];
+                               matrix[it2->second.a][h] = matrix[it2->second.b][h]; 
+                               matrix[it2->second.b][h] = y;
+                       }
+                       
+                       /* swap columns */
+                       for (int b = 0; b < numSeqs; b++) {
+                               y = matrix[b][it2->second.a];
+                               matrix[b][it2->second.a] = matrix[b][it2->second.b]; 
+                               matrix[b][it2->second.b] = y;
+                       }
+                       
+                               
+                       //swap map elements
+                       name = index[it2->second.a].seqName;
+                       index[it2->second.a].seqName = index[it2->second.b].seqName;
+                       index[it2->second.b].seqName = name;
+               }
+
+               //clear restore for next shuffle
+               restoreIndex.clear();
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the FullMatrix class function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+}  
+/**************************************************************************/
+void FullMatrix::getDist(vector<float>& distances) {
+       try{
+               map<float, float> dist;  //holds the distances for the integral form
+               map<float, float>::iterator it;
+
+               /************************************************************/
+               //fill the minsForRows vectors for each group the user wants
+               /************************************************************/
+               int lowBoundx = bounds[0]; //where first group starts
+               int lowBoundy = bounds[0]; 
+               int highBoundx = bounds[1]; //where second group starts
+               int highBoundy = bounds[1]; 
+               
+               int countx = 1;  //index in bound
+               int county = 1; //index in bound
+               
+               //go through each "box" in the matrix
+               for (int i = 0; i < (numGroups * numGroups); i++) {
+                       //each row in the box
+                       for (int x = lowBoundx; x < highBoundx; x++) {
+                               float min4Row = 100000.0;
+                               //each entry in that row
+                               for (int y = lowBoundy; y < highBoundy; y++) {
+                                       //if you are not on the diagonal and you are less than previous minimum
+                                       if ((x != y) && (matrix[x][y] < min4Row)){
+                                               min4Row = matrix[x][y];
+                                       }
+                               }
+                               //save minimum value 
+                               dist[min4Row] = min4Row;
+                       }
+                       
+                       //****** reset bounds to process next "box" ********
+                       //if you still have more "boxes" in that row
+                       if (county < numGroups) {
+                               county++;
+                               highBoundy = bounds[county];
+                               lowBoundy = bounds[county-1];
+                       }else{ //you are moving to a new row of "boxes"
+                               county = 1;
+                               countx++;
+                               highBoundx = bounds[countx];
+                               lowBoundx = bounds[countx-1];
+                               highBoundy = bounds[county];
+                               lowBoundy = bounds[county-1];
+                       }
+               }
+
+               //store distances in users vector
+               for (it = dist.begin(); it != dist.end(); it++) {
+                       distances.push_back(it->first);
+               }
+               
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the FullMatrix class function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+}
+
index 43bde963ca3396241a26771771189cb27c2df617..b744490d760d108df3ef7780cb35bcae918b9dd2 100644 (file)
@@ -21,6 +21,11 @@ struct Names {
        string          seqName;
 };
 
+struct Swap {
+       int             a;
+       int             b;
+};
+
 
 class FullMatrix {
        
@@ -31,17 +36,29 @@ class FullMatrix {
        
                int getNumSeqs();
                void printMatrix(ostream&);
-               void getMinsForRowsVectors();  //requires globaldata->Groups to be filled
-       
+               void setBounds();  //requires globaldata->Groups to be filled
+               vector<float> getMins(int); //returns vector of mins for "box" requested ie. groups A, B, 0 = AA, 1 = AB, 2 = BA, 3 = BB;
+               void getDist(vector<float>&);  //fills a vector with the valid distances for the integral form.
+               void shuffle(int);  //shuffles the sequences in the box passed in.
+               void restore();  //unshuffles the matrix.
+                               
+               
+       void printMinsForRows(ostream&);
        private:
                void sortGroups(int, int);  //this function sorts the sequences within the matrix.
                void getBounds(int&, string);
                void readSquareMatrix(ifstream&);  
                void readLTMatrix(ifstream&);
-               vector< vector<float> > matrix;  //a 2D distance matrix of all the sequences and their distances to eachother.
-               vector< vector<float> > minsForRows;  //vector< minimum distance for that subrow> -one for each comparison.
+               
                map<int, Names> index; // row in vector, sequence group.  need to know this so when we sort it can be updated.
+               map<int, Swap> restoreIndex; //a map of the swaps made so you can undo them in restore.
                map<int, Names>::iterator it;
+               map<int, Swap>::reverse_iterator it2;
+                       
+               vector< vector<float> > matrix;  //a 2D distance matrix of all the sequences and their distances to eachother.
+               vector<float> minsForRows;  //vector< minimum distance for that subrow> - one for each comparison.
+               vector<int> bounds;  //bounds[1] = starting row in matrix from group B, bounds[2] = starting row in matrix from group C, bounds[3] = no need to find upper bound of C because its numSeqs.
+                                                               
                GroupMap* groupmap;  //maps sequences to groups they belong to.
                GlobalData* globaldata;
                int numSeqs, numGroups, numUserGroups;
index 17b4908515179211b792a6d06801d2dc878a7bca..a0ccdcb690eda34efb4fe9964b375b47b0e71d2f 100644 (file)
@@ -39,6 +39,11 @@ void GlobalData::parseGlobalData(string commandString, string optionText){
                        helpRequest = optionText;
                }
                
+               if (commandName == "libshuff") {
+                       iters = "10000";
+                       cutoff = "1.0";
+               }
+               
                string key, value;              
                //reads in parameters and values
                if((optionText != "") && (commandName != "help")){
@@ -66,7 +71,10 @@ void GlobalData::parseGlobalData(string commandString, string optionText){
                                if (key == "fileroot" )         { fileroot = value;             }
                                if (key == "abund" )        { abund = value;        }
                                if (key == "random" )           { randomtree = value;   }
-                               if (key == "calc")                      { calc = value;         }
+                               if (key == "calc")                      { calc = value;                 }
+                               if (key == "step")                      { step = value;                 }
+                               if (key == "form")                      { form = value;                 }
+                               
 
                                
                                if (key == "line") {//stores lines to be used in a set
@@ -116,7 +124,8 @@ void GlobalData::parseGlobalData(string commandString, string optionText){
                        if (key == "abund" )        { abund = value;        }
                        if (key == "random" )           { randomtree = value;   }
                        if (key == "calc")                      { calc = value;         }
-
+                       if (key == "step")                      { step = value;                 }
+                       if (key == "form")                      { form = value;                 }
 
                        if (key == "line") {//stores lines to be used in a vector
                                lines.clear();
@@ -218,6 +227,8 @@ string GlobalData::getFreq()                        {       return freq;            }
 string GlobalData::getAbund()           {   return abund;       }
 string GlobalData::getRandomTree()             {       return randomtree;      }
 string GlobalData::getGroups()                 {       return groups;          }
+string GlobalData::getStep()                   {       return step;            }
+string GlobalData::getForm()                   {       return form;            }
 void GlobalData::setListFile(string file)      {       listfile = file;        inputFileName = file;}
 void GlobalData::setRabundFile(string file)    {       rabundfile = file;      inputFileName = file;}
 void GlobalData::setSabundFile(string file)    {       sabundfile = file;      inputFileName = file;}
@@ -266,6 +277,8 @@ void GlobalData::clear() {
        method                  =       "furthest";
        fileroot                =       "";
        abund           =   "10";
+       step                    =       "0.01";
+       form                    =       "integral";
 }
 
 //*******************************************************/
@@ -281,7 +294,9 @@ void GlobalData::reset() {
        freq                    =       "100";
        method                  =       "furthest";
        calc                    =       "";
-       abund = "10";
+       abund                   =   "10";
+       step                    =       "0.01";
+       form                    =       "integral";
 }
 /*******************************************************/
 
index 6070dd7b2cdd6122b78c5833359ce487497fd0e3..16bc8d32b6bc75c2b4cbabfdb4ef6dfb78890d4c 100644 (file)
@@ -60,6 +60,8 @@ public:
        string getAbund();
        string getRandomTree();
        string getGroups();
+       string getStep();
+       string getForm();
 
        void setListFile(string);
        void setPhylipFile(string);
@@ -80,7 +82,7 @@ public:
                
 private:
        string phylipfile, columnfile, listfile, rabundfile, sabundfile, namefile, groupfile, orderfile, fastafile, treefile, sharedfile, line, label, randomtree, groups;
-       string cutoff, format, precision, method, fileroot, iters, jumble, freq, calc, abund;
+       string cutoff, format, precision, method, fileroot, iters, jumble, freq, calc, abund, step, form;
 
        static GlobalData* _uniqueInstance;
        GlobalData( const GlobalData& ); // Disable copy constructor
index a34fb3602c2dc5b3b6ce6b9ec01ef2ed6f35eab3..bd0166213bcf26dde642856b4004031d3480110a 100644 (file)
@@ -17,6 +17,8 @@ LibShuffCommand::LibShuffCommand(){
                globaldata = GlobalData::getInstance();
                convert(globaldata->getCutOff(), cutOff);
                convert(globaldata->getIters(), iters);
+               convert(globaldata->getStep(), step);
+               form = globaldata->getForm();
                matrix = globaldata->gMatrix;
                coverageFile = getRootName(globaldata->getPhylipFile()) + "coverage";
                summaryFile = getRootName(globaldata->getPhylipFile()) + "slsummary";
@@ -70,19 +72,28 @@ int LibShuffCommand::execute(){
        try {
                //deltaValues[0] = scores for the difference between AA and AB.
                //cValues[0][0] = AA, cValues[0][1] = AB, cValues[0][2] = AC, cValues[1][0] = BA, cValues[1][1] = BB...
+               vector<float> dist;
+               int next;
                
                sumDelta.resize(numComp-numGroups, 0.0);
-                               
-               float D = 0.0;
                
+               float D = 0.0;
+       
                /*****************************/
                //get values for users matrix
                /*****************************/
-               matrix->getMinsForRowsVectors();
+               matrix->setBounds();
                
+               if (form != "discrete") { matrix->getDist(dist); next = 1; }
+//cout << "Distances" << endl;
+//for (int i = 0; i < dist.size(); i++) { cout << dist[i] << " "; }    
+//cout << endl;
+       
                //get values for users matrix
                while (D <= cutOff) {
-                       cValues = coverage->getValues(matrix, D);
+                       //clear out old Values
+                       deltaValues.clear();                    
+                       coverage->getValues(matrix, D, cValues);
                        
                        //find delta values
                        int count = 0;
@@ -98,15 +109,33 @@ int LibShuffCommand::execute(){
                                }
                        }
                        
-                       D += 0.01;
-                       
                        printCoverageFile(D);
                        
-                       //clear out old Values
-                       cValues.clear();
-                       deltaValues.clear();
+                       //check form
+                       if (form != "discrete") {   
+                               if (next == dist.size()) { break; }
+                               else {  D = dist[next];  next++;        }
+                       }else {  D += step;  }
+                       
+
                }
                
+               //output sum Deltas
+               for (int i = 0; i < numGroups; i++) {
+                       for (int j = 0; j < numGroups; j++) {
+                               //don't output AA to AA
+                               if (i != j) {
+                                       cout << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
+                               }
+                       }
+               }
+               cout << endl;
+               
+               for (int i = 0; i < sumDelta.size(); i++) {
+                       cout << setprecision(6) << sumDelta[i] << '\t';
+               }
+               cout << endl;
+                               
                /*******************************************************************************/
                //create and score random matrixes finding the sumDelta values for summary file
                /******************************************************************************/
@@ -119,11 +148,16 @@ int LibShuffCommand::execute(){
                        }
                }
                
+               
                for (int m = 0; m < iters; m++) {
                        //generate random matrix in getValues
                        //values for random matrix
+                       cout << "Iteration " << m+1 << endl;
+                       D = 0.0;
+                       next = 1;
+                       
                        while (D <= cutOff) {
-                               cValues = coverage->getValues(matrix, D);
+                               coverage->getValues(matrix, D, cValues, "random");
                        
                                //find delta values
                                int count = 0;
@@ -132,17 +166,29 @@ int LibShuffCommand::execute(){
                                                //don't save AA to AA
                                                if (i != j) {
                                                        //(Caa - Cab)^2
-                                                       rsumDelta[count][m] += (abs(cValues[i][i]-cValues[i][j]) * abs(cValues[i][i]-cValues[i][j]));
+                                                       rsumDelta[count][m] += ((abs(cValues[i][i]-cValues[i][j]) * abs(cValues[i][i]-cValues[i][j])));
+//cout << "iter " << m << " box " << i << j << " delta = " << ((abs(cValues[i][i]-cValues[i][j]) * abs(cValues[i][i]-cValues[i][j]))) << endl;
                                                        count++;
                                                }
                                        }
                                }
-                       
-                               D += 0.01;
+
+                               //check form
+                               if (form != "discrete") {   
+                                       if (next == dist.size()) { break; }
+                                       else {  D = dist[next];  next++;        }
+                               }else {  D += step;  }
+
                        
                                //clear out old Values
                                cValues.clear();
                        }
+cout << "random sum delta for iter " << m << endl;
+for (int i = 0; i < rsumDelta.size(); i++) {
+       cout << setprecision(6) << rsumDelta[i][m] << '\t';
+}
+cout << endl;
+
                }
                
                /**********************************************************/
@@ -186,7 +232,7 @@ void LibShuffCommand::printCoverageFile(float d) {
                //format output
                out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
                
-               out << setprecision(globaldata->getIters().length()) << d << '\t';
+               out << setprecision(6) << d << '\t';
                
                //print out coverage values
                for (int i = 0; i < numGroups; i++) {
@@ -222,18 +268,22 @@ void LibShuffCommand::printSummaryFile() {
                        for (int j = 0; j < numGroups; j++) {
                                //don't output AA to AA
                                if (i != j) {
-                                       outSum << "Delta" + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig" + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
+                                       outSum << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
+                                       cout << "Delta " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t'<< "DeltaSig " + globaldata->Groups[i] + "-" + globaldata->Groups[j] << '\t';
                                }
                        }
                }
                outSum << endl;
+               cout << endl;
                
                //print out delta values
                for (int i = 0; i < sumDelta.size(); i++) {
                        outSum << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
+                       cout << setprecision(6) << sumDelta[i] << '\t' << setprecision(globaldata->getIters().length()) << sumDeltaSig[i] << '\t';
                }
                
                outSum << endl;
+               cout << endl;
        
        }
        catch(exception& e) {
@@ -283,6 +333,9 @@ void LibShuffCommand::setGroups() {
                        }
                }
                
+               //sort so labels match
+               sort(globaldata->Groups.begin(), globaldata->Groups.end());
+               
                // number of comparisons i.e. with groups A,B,C = AA, AB, AC, BA, BB, BC...;
                for (int i=0; i<numGroups; i++) { 
                        for (int l = 0; l < numGroups; l++) {
index b2474c52e4ea1188c042550033f6e27581a6f4a3..fdaa7eee6525ab15ff45407df12544a42e0ae949 100644 (file)
@@ -42,9 +42,9 @@ class LibShuffCommand : public Command {
                GlobalData* globaldata;
                Coverage* coverage;
                FullMatrix* matrix;
-               float cutOff;
+               float cutOff, step;
                int numGroups, numComp, iters;
-               string coverageFile, summaryFile;
+               string coverageFile, summaryFile, form;
                ofstream out, outSum;
                                
                
index 1ccb8fca51577674af1ecb2fa5fe895655330654..b1fac5586b6001812f8a5758204296fe2b7193d2 100644 (file)
@@ -33,6 +33,7 @@ ValidCommands::ValidCommands() {
                commands["summary.shared"]              = "summary.shared"; 
                commands["unifrac.weighted"]    = "unifrac.weighted"; 
                commands["unifrac.unweighted"]  = "unifrac.unweighted"; 
+               commands["libshuff"]                    = "libshuff";
 
                                
        }
index 3a96cc64893e4d0b5b8b755202dda24316ba1a08..8d7c18e0bbc00131b2c6e9a6e80a264c251aaa97 100644 (file)
@@ -39,6 +39,8 @@ ValidParameters::ValidParameters() {
                parameters["random"]                    = "random";
                parameters["groups"]                    = "groups";
                parameters["calc"]                              = "calc";
+               parameters["step"]                              = "step";
+               parameters["form"]                              = "form";
 
        }
        catch(exception& e) {