]> git.donarmstrong.com Git - mothur.git/blobdiff - fullmatrix.cpp
added sharedbraycurtis, sharedmorisirahorn, sharedkulczynski, sharedkulczynskicody...
[mothur.git] / fullmatrix.cpp
index c61109c78ce2cdcca001455a0408be34692c47d1..f4acd78c1132022b9bc22a6c878febe5a4df8c1c 100644 (file)
@@ -27,7 +27,8 @@ FullMatrix::FullMatrix(ifstream& filehandle) {
                
                group = groupmap->getGroup(name);
                if(group == "not found") {      cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl; exit(1); }
-               index[0] = group; 
+               index[0].groupname = group; 
+               index[0].seqName = name;
                
                //determine if matrix is square or lower triangle
                //if it is square read the distances for the first sequence
@@ -38,6 +39,7 @@ FullMatrix::FullMatrix(ifstream& filehandle) {
                        if(isalnum(d)){ 
                                square = true;
                                filehandle.putback(d);
+                               
                                for(int i=0;i<numSeqs;i++){
                                        filehandle >> matrix[0][i];
                                }
@@ -55,14 +57,9 @@ FullMatrix::FullMatrix(ifstream& filehandle) {
                if (square == true) { readSquareMatrix(filehandle); }
                else { readLTMatrix(filehandle); }
                
-               
-               
-       printMatrix(cout);
                //sort sequences so they are gathered in groups for processing
-               sortGroups();
-               cout << "after sort" << endl;
-       printMatrix(cout);
-               
+               sortGroups(0, numSeqs-1);
+                       
        }
        catch(exception& e) {
                cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function FullMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
@@ -78,24 +75,24 @@ void FullMatrix::readSquareMatrix(ifstream& filehandle) {
        try {
        
                Progress* reading;
-               reading = new Progress("Reading matrix:    ", numSeqs * numSeqs);
+               reading = new Progress("Reading matrix:     ", numSeqs * numSeqs);
                
                int count = 0;
-               float distance;
+               
                string group, name;
                
                for(int i=1;i<numSeqs;i++){
                        filehandle >> name;             
                        
                        group = groupmap->getGroup(name);
-                       index[i] = group;
+                       index[i].groupname = group;
+                       index[i].seqName = name;
                        
                        if(group == "not found") {      cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl; exit(1); }
                                
                        for(int j=0;j<numSeqs;j++){
-                               filehandle >> distance;
-                                       
-                               matrix[i][j] = distance;
+                               filehandle >> matrix[i][j];
+                               
                                count++;
                                reading->update(count);
                        }
@@ -117,17 +114,19 @@ void FullMatrix::readSquareMatrix(ifstream& filehandle) {
 void FullMatrix::readLTMatrix(ifstream& filehandle) {
        try {
                Progress* reading;
-               reading = new Progress("Reading matrix:    ", numSeqs * (numSeqs - 1) / 2);
+               reading = new Progress("Reading matrix:     ", numSeqs * (numSeqs - 1) / 2);
                
                int count = 0;
                float distance;
+
                string group, name;
                
                for(int i=1;i<numSeqs;i++){
                        filehandle >> name;             
                                                
                        group = groupmap->getGroup(name);
-                       index[i] = group;
+                       index[i].groupname = group;
+                       index[i].seqName = name;
        
                        if(group == "not found") {      cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl;  exit(1); }
                                
@@ -138,6 +137,7 @@ void FullMatrix::readLTMatrix(ifstream& filehandle) {
                                count++;
                                reading->update(count);
                        }
+                       
                }
                reading->finish();
                delete reading;
@@ -154,82 +154,74 @@ void FullMatrix::readLTMatrix(ifstream& filehandle) {
 }
 
 /**************************************************************************/
-void FullMatrix::sortGroups(){
+void FullMatrix::sortGroups(int low, int high){
        try{
-               //sort each row by group and when you do, swap rows too.
-               for (int i = 0; i < numSeqs; i++) {
-                       quicksort(0, numSeqs-1, i);
-               }
-       }
-       catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function sortGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-       catch(...) {
-               cout << "An unknown error has occurred in the FullMatrix class function sortGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
-               exit(1);
-       }
-
-}
-/**************************************************************************/
-//this is a version of quicksort taken from http://www.c.happycodings.com/Sorting_Searching/code13.html
-/* sort everything inbetween `low' <-> `high' */
-void FullMatrix::quicksort(int low, int high, int row) {
-       try {
+       
                int i = low;
                int j = high;
-               int y = 0;
+               float y = 0;
+               string name;
                
                /* compare value */
                //what group does this row belong to
-               string z = index[(low + high) / 2];
+               string z = index[(low + high) / 2].groupname;
 
                /* partition */
                do {
                        /* find member above ... */
-                       while(index[i] < z) i++;
+                       while(index[i].groupname < z) i++;
 
                        /* find element below ... */
-                       while(index[j] > z) j--;
+                       while(index[j].groupname > z) j--;
                        
                        if(i <= j) {
-                               /* swap two elements in row*/
-                               y = matrix[row][i];
-                               matrix[row][i] = matrix[row][j]; 
-                               matrix[row][j] = y;
+                               /* swap rows*/
+                               for (int h = 0; h < numSeqs; h++) {
+                                       y = matrix[i][h];
+                                       matrix[i][h] = matrix[j][h]; 
+                                       matrix[j][h] = y;
+                               }
                                
-                               /* swap two elements in column*/
-                               y = matrix[i][row];
-                               matrix[i][row] = matrix[j][row]; 
-                               matrix[j][row] = y;
+                               /* swap columns*/
+                               for (int b = 0; b < numSeqs; b++) {
+                                       y = matrix[b][i];
+                                       matrix[b][i] = matrix[b][j]; 
+                                       matrix[b][j] = y;
+                               }
                                
                                //swap map elements
-                               z = index[i];
-                               index[i] = index[j];
-                               index[j] = z;
+                               z = index[i].groupname;
+                               index[i].groupname = index[j].groupname;
+                               index[j].groupname = z;
+                               
+                               name = index[i].seqName;
+                               index[i].seqName = index[j].seqName;
+                               index[j].seqName = name;
+
                                
                                i++; 
                                j--;
-//cout << "swapping elements " << i << " " << j << endl;
-//printMatrix(cout); cout << endl;
                        }
                } while(i <= j);
 
                /* recurse */
                if(low < j) 
-               quicksort(low, j, row);
+               sortGroups(low, j);
 
                if(i < high) 
-               quicksort(i, high, row); 
+               sortGroups(i, high); 
+
+       
        }
        catch(exception& e) {
-               cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function quicksort. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function sortGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
                exit(1);
        }
        catch(...) {
-               cout << "An unknown error has occurred in the FullMatrix class function quicksort. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               cout << "An unknown error has occurred in the FullMatrix class function sortGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
                exit(1);
        }
+
 }
 
 /**************************************************************************/   
@@ -239,7 +231,7 @@ int FullMatrix::getNumSeqs(){ return numSeqs; }
 void FullMatrix::printMatrix(ostream& out) {
        try{
                for (int i = 0; i < numSeqs; i++) {
-                       out << "row " << i << " group = " << index[i] << endl;
+                       out << "row " << i << " group = " << index[i].groupname << " name = " << index[i].seqName << endl;
                        for (int j = 0; j < numSeqs; j++) {
                                out << matrix[i][j] << " ";
                        }
@@ -256,5 +248,322 @@ void FullMatrix::printMatrix(ostream& out) {
        }
 
 }
+
 /**************************************************************************/
+void FullMatrix::setBounds(){
+       try{
+               numGroups = globaldata->gGroupmap->namesOfGroups.size();
+               
+               //sort globaldata->gGroupmap.namesOfGroups so that it will match the matrix
+               sort(globaldata->gGroupmap->namesOfGroups.begin(), globaldata->gGroupmap->namesOfGroups.end());
+               
+               //one for each comparision
+               //minsForRows.resize(numGroups*numGroups);
+               
+               /*************************************************/
+               //find where in matrix each group starts and stops
+               /*************************************************/
+               bounds.resize(numGroups);
+               
+               bounds[0] = 0;
+               bounds[numGroups] = numSeqs;
+
+               //for each group find bounds of subgroup/comparison
+               for (int i = 1; i < numGroups; i++) {
+                       getBounds(bounds[i], globaldata->gGroupmap->namesOfGroups[i-1]);
+               }
+               
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the FullMatrix class function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+
+}
+/**************************************************************************/
+vector<float> FullMatrix::getMins(int x) {
+       try{    
+               //clear out old data
+               minsForRows.clear();
+               
+               /************************************************************/
+               //fill the minsForRows vector for the box the user wants
+               /************************************************************/
+               int count = 0;
+               int lowBoundx = bounds[0]; //where first group starts
+               int lowBoundy = bounds[0]; 
+               int highBoundx = bounds[1]; //where second group starts
+               int highBoundy = bounds[1]; 
+               
+               int countx = 1;  //index in bound
+               int county = 1; //index in bound
+               
+               //find the bounds for the box the user wants
+               for (int i = 0; i < (numGroups * numGroups); i++) {
+               
+                       //are you at the box?
+                       if (count == x) { break; }
+                       else { count++; }
+                       
+                       //move to next box
+                       if (county < numGroups) {
+                               county++;
+                               highBoundy = bounds[county];
+                               lowBoundy = bounds[county-1];
+                       }else{ //you are moving to a new row of "boxes"
+                               county = 1;
+                               countx++;
+                               highBoundx = bounds[countx];
+                               lowBoundx = bounds[countx-1];
+                               highBoundy = bounds[county];
+                               lowBoundy = bounds[county-1];
+                       }
+               }
+                               
+               //each row in the box
+               for (int x = lowBoundx; x < highBoundx; x++) {
+                       float min4Row = 100000.0;
+                       //each entry in that row
+                       for (int y = lowBoundy; y < highBoundy; y++) {
+                               //if you are not on the diagonal and you are less than previous minimum
+                               if ((x != y) && (matrix[x][y] < min4Row)) {
+                                       min4Row = matrix[x][y];
+                               }
+                       }
+                       //save minimum value for that row in minsForRows vector of vectors
+                       minsForRows.push_back(min4Row);
+               }
+                       
+               return minsForRows;
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getMins. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the FullMatrix class function getMins. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+}
+/**************************************************************************/
+void FullMatrix::getBounds(int& higher, string group) {
+       try{
+               bool gotLower = false;
+               
+               //for each group find bounds of subgroup/comparison
+               for (it = index.begin(); it != index.end(); it++) {
+                       if (it->second.groupname == group) {
+                               gotLower = true; 
+                       }else if ((gotLower == true) && (it->second.groupname != group)) {  higher = it->first; break; }
+               }
+       
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getBounds. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the FullMatrix class function getBounds. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+
+}
+
+/**************************************************************************/
+//print out matrix
+void FullMatrix::printMinsForRows(ostream& out) {
+       try{
+               for (int j = 0; j < minsForRows.size(); j++) {
+                       out << minsForRows[j] << " ";
+               }
+               out << endl;
+
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the FullMatrix class function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+
+}
+
+/**************************************************************************/
+//shuffles the sequences in the 2 groups passed in.
+void FullMatrix::shuffle(string groupA, string groupB){
+       try{
+               vector<int> rows2Swap;
+               vector<int> shuffled;
+               float y = 0;
+               string name = "";
+               
+                       
+               /********************************/
+               //save rows you want to randomize
+               /********************************/
+               //go through the matrix map to find the rows from groups you want to randomize
+               for (it = index.begin(); it != index.end(); it++) {
+                       //is this row from group A or B?
+                       if ((it->second.groupname == groupA) || (it->second.groupname == groupB)) {
+                               rows2Swap.push_back(it->first);
+                               shuffled.push_back(it->first);
+                       }
+               }
+               
+               //randomize rows to shuffle in shuffled
+               random_shuffle(shuffled.begin(), shuffled.end());
+               
+               /***************************************/
+               //swap rows and columns to randomize box
+               /***************************************/
+               for (int i = 0; i < shuffled.size(); i++) {
+
+                       //record the swaps you are making so you can undo them in restore function
+                       restoreIndex[i].a = shuffled[i];
+                       restoreIndex[i].b = rows2Swap[i];
+                       
+                       /* swap rows*/
+                       for (int h = 0; h < numSeqs; h++) {
+                               y = matrix[shuffled[i]][h];
+                               matrix[shuffled[i]][h] = matrix[rows2Swap[i]][h]; 
+                               matrix[rows2Swap[i]][h] = y;
+                       }
+                               
+                       /* swap columns */
+                       for (int b = 0; b < numSeqs; b++) {
+                               y = matrix[b][shuffled[i]];
+                               matrix[b][shuffled[i]] = matrix[b][rows2Swap[i]]; 
+                               matrix[b][rows2Swap[i]] = y;
+                       }
+                               
+                       //swap map elements
+                       name = index[shuffled[i]].seqName;
+                       index[shuffled[i]].seqName = index[rows2Swap[i]].seqName;
+                       index[rows2Swap[i]].seqName = name;
+
+               }
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function shuffle. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the FullMatrix class function shuffle. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+} 
+/**************************************************************************/
+//unshuffles the matrix.
+void FullMatrix::restore(){
+       try{
+               float y = 0;
+               string name = "";
+
+               //reverse iterate through swaps and undo them to restore original matrix and index map.
+               for(it2 = restoreIndex.rbegin(); it2 != restoreIndex.rend(); it2++) {
+                       /* swap rows */
+
+                       for (int h = 0; h < numSeqs; h++) {
+                               y = matrix[it2->second.a][h];
+                               matrix[it2->second.a][h] = matrix[it2->second.b][h]; 
+                               matrix[it2->second.b][h] = y;
+                       }
+                       
+                       /* swap columns */
+                       for (int b = 0; b < numSeqs; b++) {
+                               y = matrix[b][it2->second.a];
+                               matrix[b][it2->second.a] = matrix[b][it2->second.b]; 
+                               matrix[b][it2->second.b] = y;
+                       }
+                       
+                               
+                       //swap map elements
+                       name = index[it2->second.a].seqName;
+                       index[it2->second.a].seqName = index[it2->second.b].seqName;
+                       index[it2->second.b].seqName = name;
+
+               }
+
+               //clear restore for next shuffle
+               restoreIndex.clear();
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the FullMatrix class function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+}  
+/**************************************************************************/
+void FullMatrix::getDist(vector<float>& distances) {
+       try{
+               map<float, float> dist;  //holds the distances for the integral form
+               map<float, float>::iterator it;
+
+               /************************************************************/
+               //fill the minsForRows vectors for each group the user wants
+               /************************************************************/
+               int lowBoundx = bounds[0]; //where first group starts
+               int lowBoundy = bounds[0]; 
+               int highBoundx = bounds[1]; //where second group starts
+               int highBoundy = bounds[1]; 
+               
+               int countx = 1;  //index in bound
+               int county = 1; //index in bound
+               
+               //go through each "box" in the matrix
+               for (int i = 0; i < (numGroups * numGroups); i++) {
+                       //each row in the box
+                       for (int x = lowBoundx; x < highBoundx; x++) {
+                               float min4Row = 100000.0;
+                               //each entry in that row
+                               for (int y = lowBoundy; y < highBoundy; y++) {
+                                       //if you are not on the diagonal and you are less than previous minimum
+                                       if ((x != y) && (matrix[x][y] < min4Row)){
+                                               min4Row = matrix[x][y];
+                                       }
+                               }
+                               //save minimum value 
+                               dist[min4Row] = min4Row;
+                       }
+                       
+                       //****** reset bounds to process next "box" ********
+                       //if you still have more "boxes" in that row
+                       if (county < numGroups) {
+                               county++;
+                               highBoundy = bounds[county];
+                               lowBoundy = bounds[county-1];
+                       }else{ //you are moving to a new row of "boxes"
+                               county = 1;
+                               countx++;
+                               highBoundx = bounds[countx];
+                               lowBoundx = bounds[countx-1];
+                               highBoundy = bounds[county];
+                               lowBoundy = bounds[county-1];
+                       }
+               }
+
+               //store distances in users vector
+               for (it = dist.begin(); it != dist.end(); it++) {
+                       distances.push_back(it->first);
+               }
+               
+       }
+       catch(exception& e) {
+               cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+       catch(...) {
+               cout << "An unknown error has occurred in the FullMatrix class function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
+               exit(1);
+       }
+}