5 * Created by Sarah Westcott on 3/6/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "fullmatrix.h"
12 /**************************************************************************/
13 //This constructor reads a distance matrix file and stores the data in the matrix.
14 FullMatrix::FullMatrix(ifstream& filehandle) {
16 globaldata = GlobalData::getInstance();
17 groupmap = globaldata->gGroupmap;
20 filehandle >> numSeqs >> name;
22 //make the matrix filled with zeros
23 matrix.resize(numSeqs);
24 for(int i = 0; i < numSeqs; i++) {
25 matrix[i].resize(numSeqs, 0);
28 group = groupmap->getGroup(name);
29 if(group == "not found") { cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl; exit(1); }
30 index[0].groupname = group;
31 index[0].seqName = name;
33 //determine if matrix is square or lower triangle
34 //if it is square read the distances for the first sequence
36 while((d=filehandle.get()) != EOF){
38 //is d a number meaning its square
41 filehandle.putback(d);
43 for(int i=0;i<numSeqs;i++){
44 filehandle >> matrix[0][i];
49 //is d a line return meaning its lower triangle
57 if (square == true) { readSquareMatrix(filehandle); }
58 else { readLTMatrix(filehandle); }
60 //sort sequences so they are gathered in groups for processing
61 sortGroups(0, numSeqs-1);
65 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function FullMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
69 cout << "An unknown error has occurred in the FullMatrix class function FullMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
73 /**************************************************************************/
74 void FullMatrix::readSquareMatrix(ifstream& filehandle) {
78 reading = new Progress("Reading matrix: ", numSeqs * numSeqs);
84 for(int i=1;i<numSeqs;i++){
87 group = groupmap->getGroup(name);
88 index[i].groupname = group;
89 index[i].seqName = name;
91 if(group == "not found") { cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl; exit(1); }
93 for(int j=0;j<numSeqs;j++){
94 filehandle >> matrix[i][j];
97 reading->update(count);
103 catch(exception& e) {
104 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function readSquareMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
108 cout << "An unknown error has occurred in the FullMatrix class function readSquareMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
113 /**************************************************************************/
114 void FullMatrix::readLTMatrix(ifstream& filehandle) {
117 reading = new Progress("Reading matrix: ", numSeqs * (numSeqs - 1) / 2);
124 for(int i=1;i<numSeqs;i++){
127 group = groupmap->getGroup(name);
128 index[i].groupname = group;
129 index[i].seqName = name;
131 if(group == "not found") { cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl; exit(1); }
133 for(int j=0;j<i;j++){
134 filehandle >> distance;
136 matrix[i][j] = distance; matrix[j][i] = distance;
138 reading->update(count);
145 catch(exception& e) {
146 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function readLTMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
150 cout << "An unknown error has occurred in the FullMatrix class function readLTMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
156 /**************************************************************************/
157 void FullMatrix::sortGroups(int low, int high){
166 //what group does this row belong to
167 string z = index[(low + high) / 2].groupname;
171 /* find member above ... */
172 while(index[i].groupname < z) i++;
174 /* find element below ... */
175 while(index[j].groupname > z) j--;
179 for (int h = 0; h < numSeqs; h++) {
181 matrix[i][h] = matrix[j][h];
186 for (int b = 0; b < numSeqs; b++) {
188 matrix[b][i] = matrix[b][j];
193 z = index[i].groupname;
194 index[i].groupname = index[j].groupname;
195 index[j].groupname = z;
197 name = index[i].seqName;
198 index[i].seqName = index[j].seqName;
199 index[j].seqName = name;
216 catch(exception& e) {
217 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function sortGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
221 cout << "An unknown error has occurred in the FullMatrix class function sortGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
227 /**************************************************************************/
228 int FullMatrix::getNumSeqs(){ return numSeqs; }
229 /**************************************************************************/
231 void FullMatrix::printMatrix(ostream& out) {
233 for (int i = 0; i < numSeqs; i++) {
234 out << "row " << i << " group = " << index[i].groupname << " name = " << index[i].seqName << endl;
235 for (int j = 0; j < numSeqs; j++) {
236 out << matrix[i][j] << " ";
241 catch(exception& e) {
242 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
246 cout << "An unknown error has occurred in the FullMatrix class function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
252 /**************************************************************************/
253 void FullMatrix::setBounds(){
255 numGroups = globaldata->gGroupmap->namesOfGroups.size();
257 //sort globaldata->gGroupmap.namesOfGroups so that it will match the matrix
258 sort(globaldata->gGroupmap->namesOfGroups.begin(), globaldata->gGroupmap->namesOfGroups.end());
260 //one for each comparision
261 //minsForRows.resize(numGroups*numGroups);
263 /*************************************************/
264 //find where in matrix each group starts and stops
265 /*************************************************/
266 bounds.resize(numGroups);
269 bounds[numGroups] = numSeqs;
271 //for each group find bounds of subgroup/comparison
272 for (int i = 1; i < numGroups; i++) {
273 getBounds(bounds[i], globaldata->gGroupmap->namesOfGroups[i-1]);
277 catch(exception& e) {
278 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
282 cout << "An unknown error has occurred in the FullMatrix class function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
287 /**************************************************************************/
288 vector<float> FullMatrix::getMins(int x) {
293 /************************************************************/
294 //fill the minsForRows vector for the box the user wants
295 /************************************************************/
297 int lowBoundx = bounds[0]; //where first group starts
298 int lowBoundy = bounds[0];
299 int highBoundx = bounds[1]; //where second group starts
300 int highBoundy = bounds[1];
302 int countx = 1; //index in bound
303 int county = 1; //index in bound
305 //find the bounds for the box the user wants
306 for (int i = 0; i < (numGroups * numGroups); i++) {
308 //are you at the box?
309 if (count == x) { break; }
313 if (county < numGroups) {
315 highBoundy = bounds[county];
316 lowBoundy = bounds[county-1];
317 }else{ //you are moving to a new row of "boxes"
320 highBoundx = bounds[countx];
321 lowBoundx = bounds[countx-1];
322 highBoundy = bounds[county];
323 lowBoundy = bounds[county-1];
327 //each row in the box
328 for (int x = lowBoundx; x < highBoundx; x++) {
329 float min4Row = 100000.0;
330 //each entry in that row
331 for (int y = lowBoundy; y < highBoundy; y++) {
332 //if you are not on the diagonal and you are less than previous minimum
333 if ((x != y) && (matrix[x][y] < min4Row)) {
334 min4Row = matrix[x][y];
337 //save minimum value for that row in minsForRows vector of vectors
338 minsForRows.push_back(min4Row);
343 catch(exception& e) {
344 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getMins. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
348 cout << "An unknown error has occurred in the FullMatrix class function getMins. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
352 /**************************************************************************/
353 void FullMatrix::getBounds(int& higher, string group) {
355 bool gotLower = false;
357 //for each group find bounds of subgroup/comparison
358 for (it = index.begin(); it != index.end(); it++) {
359 if (it->second.groupname == group) {
361 }else if ((gotLower == true) && (it->second.groupname != group)) { higher = it->first; break; }
365 catch(exception& e) {
366 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getBounds. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
370 cout << "An unknown error has occurred in the FullMatrix class function getBounds. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
376 /**************************************************************************/
378 void FullMatrix::printMinsForRows(ostream& out) {
380 for (int j = 0; j < minsForRows.size(); j++) {
381 out << minsForRows[j] << " ";
386 catch(exception& e) {
387 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
391 cout << "An unknown error has occurred in the FullMatrix class function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
397 /**************************************************************************/
398 //shuffles the sequences in the 2 groups passed in.
399 void FullMatrix::shuffle(string groupA, string groupB){
401 vector<int> rows2Swap;
402 vector<int> shuffled;
407 /********************************/
408 //save rows you want to randomize
409 /********************************/
410 //go through the matrix map to find the rows from groups you want to randomize
411 for (it = index.begin(); it != index.end(); it++) {
412 //is this row from group A or B?
413 if ((it->second.groupname == groupA) || (it->second.groupname == groupB)) {
414 rows2Swap.push_back(it->first);
415 shuffled.push_back(it->first);
419 //randomize rows to shuffle in shuffled
420 random_shuffle(shuffled.begin(), shuffled.end());
422 /***************************************/
423 //swap rows and columns to randomize box
424 /***************************************/
425 for (int i = 0; i < shuffled.size(); i++) {
427 //record the swaps you are making so you can undo them in restore function
428 restoreIndex[i].a = shuffled[i];
429 restoreIndex[i].b = rows2Swap[i];
432 for (int h = 0; h < numSeqs; h++) {
433 y = matrix[shuffled[i]][h];
434 matrix[shuffled[i]][h] = matrix[rows2Swap[i]][h];
435 matrix[rows2Swap[i]][h] = y;
439 for (int b = 0; b < numSeqs; b++) {
440 y = matrix[b][shuffled[i]];
441 matrix[b][shuffled[i]] = matrix[b][rows2Swap[i]];
442 matrix[b][rows2Swap[i]] = y;
446 name = index[shuffled[i]].seqName;
447 index[shuffled[i]].seqName = index[rows2Swap[i]].seqName;
448 index[rows2Swap[i]].seqName = name;
452 catch(exception& e) {
453 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function shuffle. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
457 cout << "An unknown error has occurred in the FullMatrix class function shuffle. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
461 /**************************************************************************/
462 //unshuffles the matrix.
463 void FullMatrix::restore(){
468 //reverse iterate through swaps and undo them to restore original matrix and index map.
469 for(it2 = restoreIndex.rbegin(); it2 != restoreIndex.rend(); it2++) {
472 for (int h = 0; h < numSeqs; h++) {
473 y = matrix[it2->second.a][h];
474 matrix[it2->second.a][h] = matrix[it2->second.b][h];
475 matrix[it2->second.b][h] = y;
479 for (int b = 0; b < numSeqs; b++) {
480 y = matrix[b][it2->second.a];
481 matrix[b][it2->second.a] = matrix[b][it2->second.b];
482 matrix[b][it2->second.b] = y;
487 name = index[it2->second.a].seqName;
488 index[it2->second.a].seqName = index[it2->second.b].seqName;
489 index[it2->second.b].seqName = name;
493 //clear restore for next shuffle
494 restoreIndex.clear();
496 catch(exception& e) {
497 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
501 cout << "An unknown error has occurred in the FullMatrix class function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
505 /**************************************************************************/
506 void FullMatrix::getDist(vector<float>& distances) {
508 map<float, float> dist; //holds the distances for the integral form
509 map<float, float>::iterator it;
511 /************************************************************/
512 //fill the minsForRows vectors for each group the user wants
513 /************************************************************/
514 int lowBoundx = bounds[0]; //where first group starts
515 int lowBoundy = bounds[0];
516 int highBoundx = bounds[1]; //where second group starts
517 int highBoundy = bounds[1];
519 int countx = 1; //index in bound
520 int county = 1; //index in bound
522 //go through each "box" in the matrix
523 for (int i = 0; i < (numGroups * numGroups); i++) {
524 //each row in the box
525 for (int x = lowBoundx; x < highBoundx; x++) {
526 float min4Row = 100000.0;
527 //each entry in that row
528 for (int y = lowBoundy; y < highBoundy; y++) {
529 //if you are not on the diagonal and you are less than previous minimum
530 if ((x != y) && (matrix[x][y] < min4Row)){
531 min4Row = matrix[x][y];
535 dist[min4Row] = min4Row;
538 //****** reset bounds to process next "box" ********
539 //if you still have more "boxes" in that row
540 if (county < numGroups) {
542 highBoundy = bounds[county];
543 lowBoundy = bounds[county-1];
544 }else{ //you are moving to a new row of "boxes"
547 highBoundx = bounds[countx];
548 lowBoundx = bounds[countx-1];
549 highBoundy = bounds[county];
550 lowBoundy = bounds[county-1];
554 //store distances in users vector
555 for (it = dist.begin(); it != dist.end(); it++) {
556 distances.push_back(it->first);
560 catch(exception& e) {
561 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
565 cout << "An unknown error has occurred in the FullMatrix class function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";