5 * Created by Sarah Westcott on 3/6/09.
6 * Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
10 #include "fullmatrix.h"
12 /**************************************************************************/
13 //This constructor reads a distance matrix file and stores the data in the matrix.
14 FullMatrix::FullMatrix(ifstream& filehandle) {
16 globaldata = GlobalData::getInstance();
17 groupmap = globaldata->gGroupmap;
20 filehandle >> numSeqs >> name;
22 //make the matrix filled with zeros
23 matrix.resize(numSeqs);
24 for(int i = 0; i < numSeqs; i++) {
25 matrix[i].resize(numSeqs, 0);
28 group = groupmap->getGroup(name);
29 if(group == "not found") { cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl; exit(1); }
30 index[0].groupname = group;
31 index[0].seqName = name;
33 //determine if matrix is square or lower triangle
34 //if it is square read the distances for the first sequence
36 while((d=filehandle.get()) != EOF){
38 //is d a number meaning its square
41 filehandle.putback(d);
43 for(int i=0;i<numSeqs;i++){
44 filehandle >> matrix[0][i];
49 //is d a line return meaning its lower triangle
57 if (square == true) { readSquareMatrix(filehandle); }
58 else { readLTMatrix(filehandle); }
60 //sort sequences so they are gathered in groups for processing
61 sortGroups(0, numSeqs-1);
65 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function FullMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
69 cout << "An unknown error has occurred in the FullMatrix class function FullMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
73 /**************************************************************************/
74 void FullMatrix::readSquareMatrix(ifstream& filehandle) {
78 reading = new Progress("Reading matrix: ", numSeqs * numSeqs);
84 for(int i=1;i<numSeqs;i++){
87 group = groupmap->getGroup(name);
88 index[i].groupname = group;
89 index[i].seqName = name;
91 if(group == "not found") { cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl; exit(1); }
93 for(int j=0;j<numSeqs;j++){
94 filehandle >> matrix[i][j];
97 reading->update(count);
103 catch(exception& e) {
104 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function readSquareMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
108 cout << "An unknown error has occurred in the FullMatrix class function readSquareMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
113 /**************************************************************************/
114 void FullMatrix::readLTMatrix(ifstream& filehandle) {
117 reading = new Progress("Reading matrix: ", numSeqs * (numSeqs - 1) / 2);
124 for(int i=1;i<numSeqs;i++){
127 group = groupmap->getGroup(name);
128 index[i].groupname = group;
129 index[i].seqName = name;
131 if(group == "not found") { cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl; exit(1); }
133 for(int j=0;j<i;j++){
134 filehandle >> distance;
136 matrix[i][j] = distance; matrix[j][i] = distance;
138 reading->update(count);
145 catch(exception& e) {
146 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function readLTMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
150 cout << "An unknown error has occurred in the FullMatrix class function readLTMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
156 /**************************************************************************/
157 void FullMatrix::sortGroups(int low, int high){
166 //what group does this row belong to
167 string z = index[(low + high) / 2].groupname;
171 /* find member above ... */
172 while(index[i].groupname < z) i++;
174 /* find element below ... */
175 while(index[j].groupname > z) j--;
179 for (int h = 0; h < numSeqs; h++) {
181 matrix[i][h] = matrix[j][h];
186 for (int b = 0; b < numSeqs; b++) {
188 matrix[b][i] = matrix[b][j];
193 z = index[i].groupname;
194 index[i].groupname = index[j].groupname;
195 index[j].groupname = z;
197 name = index[i].seqName;
198 index[i].seqName = index[j].seqName;
199 index[j].seqName = name;
216 catch(exception& e) {
217 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function sortGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
221 cout << "An unknown error has occurred in the FullMatrix class function sortGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
227 /**************************************************************************/
228 int FullMatrix::getNumSeqs(){ return numSeqs; }
229 /**************************************************************************/
231 void FullMatrix::printMatrix(ostream& out) {
233 for (int i = 0; i < numSeqs; i++) {
234 out << "row " << i << " group = " << index[i].groupname << " name = " << index[i].seqName << endl;
235 for (int j = 0; j < numSeqs; j++) {
236 //out << matrix[i][j] << " ";
241 catch(exception& e) {
242 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
246 cout << "An unknown error has occurred in the FullMatrix class function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
252 /**************************************************************************/
253 void FullMatrix::setBounds(){
255 numGroups = globaldata->gGroupmap->namesOfGroups.size();
257 //sort globaldata->gGroupmap.namesOfGroups so that it will match the matrix
258 sort(globaldata->gGroupmap->namesOfGroups.begin(), globaldata->gGroupmap->namesOfGroups.end());
260 //one for each comparision
261 //minsForRows.resize(numGroups*numGroups);
263 /*************************************************/
264 //find where in matrix each group starts and stops
265 /*************************************************/
266 bounds.resize(numGroups);
269 bounds[numGroups] = numSeqs;
271 //for each group find bounds of subgroup/comparison
272 for (int i = 1; i < numGroups; i++) {
273 getBounds(bounds[i], globaldata->gGroupmap->namesOfGroups[i-1]);
277 catch(exception& e) {
278 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
282 cout << "An unknown error has occurred in the FullMatrix class function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
287 /**************************************************************************/
288 vector<float> FullMatrix::getMins(int x) {
293 /************************************************************/
294 //fill the minsForRows vector for the box the user wants
295 /************************************************************/
297 int lowBoundx = bounds[0]; //where first group starts
298 int lowBoundy = bounds[0];
299 int highBoundx = bounds[1]; //where second group starts
300 int highBoundy = bounds[1];
302 int countx = 1; //index in bound
303 int county = 1; //index in bound
305 //find the bounds for the box the user wants
306 for (int i = 0; i < (numGroups * numGroups); i++) {
308 //are you at the box?
309 if (count == x) { break; }
313 if (county < numGroups) {
315 highBoundy = bounds[county];
316 lowBoundy = bounds[county-1];
317 }else{ //you are moving to a new row of "boxes"
320 highBoundx = bounds[countx];
321 lowBoundx = bounds[countx-1];
322 highBoundy = bounds[county];
323 lowBoundy = bounds[county-1];
327 //each row in the box
328 for (int x = lowBoundx; x < highBoundx; x++) {
329 float min4Row = 100000.0;
330 //each entry in that row
331 for (int y = lowBoundy; y < highBoundy; y++) {
332 //if you are not on the diagonal and you are less than previous minimum
333 if ((x != y) && (matrix[x][y] < min4Row)) {
334 min4Row = matrix[x][y];
337 //save minimum value for that row in minsForRows vector of vectors
338 minsForRows.push_back(min4Row);
343 catch(exception& e) {
344 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getMins. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
348 cout << "An unknown error has occurred in the FullMatrix class function getMins. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
352 /**************************************************************************/
353 void FullMatrix::getBounds(int& higher, string group) {
355 bool gotLower = false;
357 //for each group find bounds of subgroup/comparison
358 for (it = index.begin(); it != index.end(); it++) {
359 if (it->second.groupname == group) {
361 }else if ((gotLower == true) && (it->second.groupname != group)) { higher = it->first; break; }
365 catch(exception& e) {
366 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getBounds. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
370 cout << "An unknown error has occurred in the FullMatrix class function getBounds. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
376 /**************************************************************************/
378 void FullMatrix::printMinsForRows(ostream& out) {
380 for (int j = 0; j < minsForRows.size(); j++) {
381 out << minsForRows[j] << " ";
386 catch(exception& e) {
387 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
391 cout << "An unknown error has occurred in the FullMatrix class function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
396 /**************************************************************************/
397 //shuffles the sequences in the 2 groups passed in.
398 void FullMatrix::shuffle(int box){
400 vector<int> rows2Swap;
401 vector<int> shuffled;
405 /****************************/
406 //find the box the user wants
407 /****************************/
409 int lowBoundy = bounds[0]; //where first group starts
410 int highBoundy = bounds[1]; //where second group starts
411 int county = 1; //index in bound
413 //find the bounds for the box the user wants
414 for (int i = 0; i < (numGroups * numGroups); i++) {
416 //are you at the box?
417 if (count == box) { break; }
421 if (county < numGroups) {
423 highBoundy = bounds[county];
424 lowBoundy = bounds[county-1];
425 }else{ //you are moving to a new row of "boxes"
427 highBoundy = bounds[county];
428 lowBoundy = bounds[county-1];
432 /************************/
433 //save its rows locations
434 /************************/
435 //go through the matrix map to find the rows from groups you want to randomize
436 for (int y = lowBoundy; y < highBoundy; y++) {
437 rows2Swap.push_back(y);
438 shuffled.push_back(y);
441 //randomize rows to shuffle in shuffled
442 random_shuffle(shuffled.begin(), shuffled.end());
444 /***************************************/
445 //swap rows and columns to randomize box
446 /***************************************/
447 for (int i = 0; i < shuffled.size(); i++) {
448 //record the swaps you are making so you can undo them in restore function
449 restoreIndex[i].a = shuffled[i];
450 restoreIndex[i].b = rows2Swap[i];
453 for (int h = 0; h < numSeqs; h++) {
454 y = matrix[shuffled[i]][h];
455 matrix[shuffled[i]][h] = matrix[rows2Swap[i]][h];
456 matrix[rows2Swap[i]][h] = y;
460 for (int b = 0; b < numSeqs; b++) {
461 y = matrix[b][shuffled[i]];
462 matrix[b][shuffled[i]] = matrix[b][rows2Swap[i]];
463 matrix[b][rows2Swap[i]] = y;
467 name = index[shuffled[i]].seqName;
468 index[shuffled[i]].seqName = index[rows2Swap[i]].seqName;
469 index[rows2Swap[i]].seqName = name;
472 catch(exception& e) {
473 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function shuffle. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
477 cout << "An unknown error has occurred in the FullMatrix class function shuffle. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
481 /**************************************************************************/
482 //unshuffles the matrix.
483 void FullMatrix::restore(){
488 //reverse iterate through swaps and undo them to restore original matrix and index map.
489 for(it2 = restoreIndex.rbegin(); it2 != restoreIndex.rend(); it2++) {
491 for (int h = 0; h < numSeqs; h++) {
492 y = matrix[it2->second.a][h];
493 matrix[it2->second.a][h] = matrix[it2->second.b][h];
494 matrix[it2->second.b][h] = y;
498 for (int b = 0; b < numSeqs; b++) {
499 y = matrix[b][it2->second.a];
500 matrix[b][it2->second.a] = matrix[b][it2->second.b];
501 matrix[b][it2->second.b] = y;
506 name = index[it2->second.a].seqName;
507 index[it2->second.a].seqName = index[it2->second.b].seqName;
508 index[it2->second.b].seqName = name;
511 //clear restore for next shuffle
512 restoreIndex.clear();
514 catch(exception& e) {
515 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
519 cout << "An unknown error has occurred in the FullMatrix class function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
523 /**************************************************************************/
524 void FullMatrix::getDist(vector<float>& distances) {
526 map<float, float> dist; //holds the distances for the integral form
527 map<float, float>::iterator it;
529 /************************************************************/
530 //fill the minsForRows vectors for each group the user wants
531 /************************************************************/
532 int lowBoundx = bounds[0]; //where first group starts
533 int lowBoundy = bounds[0];
534 int highBoundx = bounds[1]; //where second group starts
535 int highBoundy = bounds[1];
537 int countx = 1; //index in bound
538 int county = 1; //index in bound
540 //go through each "box" in the matrix
541 for (int i = 0; i < (numGroups * numGroups); i++) {
542 //each row in the box
543 for (int x = lowBoundx; x < highBoundx; x++) {
544 float min4Row = 100000.0;
545 //each entry in that row
546 for (int y = lowBoundy; y < highBoundy; y++) {
547 //if you are not on the diagonal and you are less than previous minimum
548 if ((x != y) && (matrix[x][y] < min4Row)){
549 min4Row = matrix[x][y];
553 dist[min4Row] = min4Row;
556 //****** reset bounds to process next "box" ********
557 //if you still have more "boxes" in that row
558 if (county < numGroups) {
560 highBoundy = bounds[county];
561 lowBoundy = bounds[county-1];
562 }else{ //you are moving to a new row of "boxes"
565 highBoundx = bounds[countx];
566 lowBoundx = bounds[countx-1];
567 highBoundy = bounds[county];
568 lowBoundy = bounds[county-1];
572 //store distances in users vector
573 for (it = dist.begin(); it != dist.end(); it++) {
574 distances.push_back(it->first);
578 catch(exception& e) {
579 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
583 cout << "An unknown error has occurred in the FullMatrix class function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";