]> git.donarmstrong.com Git - mothur.git/blob - fullmatrix.cpp
working on libshuff
[mothur.git] / fullmatrix.cpp
1 /*
2  *  fullmatrix.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 3/6/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "fullmatrix.h"
11
12 /**************************************************************************/
13 //This constructor reads a distance matrix file and stores the data in the matrix.
14 FullMatrix::FullMatrix(ifstream& filehandle) {
15         try{
16                 globaldata = GlobalData::getInstance();
17                 groupmap = globaldata->gGroupmap;
18                 
19                 string name, group;
20                 filehandle >> numSeqs >> name;
21                 
22                 //make the matrix filled with zeros
23                 matrix.resize(numSeqs); 
24                 for(int i = 0; i < numSeqs; i++) {
25                         matrix[i].resize(numSeqs, 0);
26                 }
27                 
28                 group = groupmap->getGroup(name);
29                 if(group == "not found") {      cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl; exit(1); }
30                 index[0].groupname = group; 
31                 index[0].seqName = name;
32                 
33                 //determine if matrix is square or lower triangle
34                 //if it is square read the distances for the first sequence
35                 char d;
36                 while((d=filehandle.get()) != EOF){
37                         
38                         //is d a number meaning its square
39                         if(isalnum(d)){ 
40                                 square = true;
41                                 filehandle.putback(d);
42                                 
43                                 for(int i=0;i<numSeqs;i++){
44                                         filehandle >> matrix[0][i];
45                                 }
46                                 break;
47                         }
48                         
49                         //is d a line return meaning its lower triangle
50                         if(d == '\n'){
51                                 square = false;
52                                 break;
53                         }
54                 }
55                 
56                 //read rest of matrix
57                 if (square == true) { readSquareMatrix(filehandle); }
58                 else { readLTMatrix(filehandle); }
59                 
60                 //sort sequences so they are gathered in groups for processing
61                 sortGroups(0, numSeqs-1);
62                         
63         }
64         catch(exception& e) {
65                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function FullMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
66                 exit(1);
67         }
68         catch(...) {
69                 cout << "An unknown error has occurred in the FullMatrix class function FullMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
70                 exit(1);
71         }
72 }
73 /**************************************************************************/
74 void FullMatrix::readSquareMatrix(ifstream& filehandle) {
75         try {
76         
77                 Progress* reading;
78                 reading = new Progress("Reading matrix:    ", numSeqs * numSeqs);
79                 
80                 int count = 0;
81                 
82                 string group, name;
83                 
84                 for(int i=1;i<numSeqs;i++){
85                         filehandle >> name;             
86                         
87                         group = groupmap->getGroup(name);
88                         index[i].groupname = group;
89                         index[i].seqName = name;
90                         
91                         if(group == "not found") {      cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl; exit(1); }
92                                 
93                         for(int j=0;j<numSeqs;j++){
94                                 filehandle >> matrix[i][j];
95                                 
96                                 count++;
97                                 reading->update(count);
98                         }
99                 }
100                 reading->finish();
101                 delete reading;
102         }
103         catch(exception& e) {
104                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function readSquareMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
105                 exit(1);
106         }
107         catch(...) {
108                 cout << "An unknown error has occurred in the FullMatrix class function readSquareMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
109                 exit(1);
110         }
111
112
113 /**************************************************************************/
114 void FullMatrix::readLTMatrix(ifstream& filehandle) {
115         try {
116                 Progress* reading;
117                 reading = new Progress("Reading matrix:    ", numSeqs * (numSeqs - 1) / 2);
118                 
119                 int count = 0;
120                 float distance;
121
122                 string group, name;
123                 
124                 for(int i=1;i<numSeqs;i++){
125                         filehandle >> name;             
126                                                 
127                         group = groupmap->getGroup(name);
128                         index[i].groupname = group;
129                         index[i].seqName = name;
130         
131                         if(group == "not found") {      cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl;  exit(1); }
132                                 
133                         for(int j=0;j<i;j++){
134                                 filehandle >> distance;
135                                         
136                                 matrix[i][j] = distance;  matrix[j][i] = distance;
137                                 count++;
138                                 reading->update(count);
139                         }
140                         
141                 }
142                 reading->finish();
143                 delete reading;
144         }
145         catch(exception& e) {
146                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function readLTMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
147                 exit(1);
148         }
149         catch(...) {
150                 cout << "An unknown error has occurred in the FullMatrix class function readLTMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
151                 exit(1);
152         }
153
154 }
155
156 /**************************************************************************/
157 void FullMatrix::sortGroups(int low, int high){
158         try{
159         
160                 int i = low;
161                 int j = high;
162                 float y = 0;
163                 string name;
164                 
165                 /* compare value */
166                 //what group does this row belong to
167                 string z = index[(low + high) / 2].groupname;
168
169                 /* partition */
170                 do {
171                         /* find member above ... */
172                         while(index[i].groupname < z) i++;
173
174                         /* find element below ... */
175                         while(index[j].groupname > z) j--;
176                         
177                         if(i <= j) {
178                                 /* swap rows*/
179                                 for (int h = 0; h < numSeqs; h++) {
180                                         y = matrix[i][h];
181                                         matrix[i][h] = matrix[j][h]; 
182                                         matrix[j][h] = y;
183                                 }
184                                 
185                                 /* swap columns*/
186                                 for (int b = 0; b < numSeqs; b++) {
187                                         y = matrix[b][i];
188                                         matrix[b][i] = matrix[b][j]; 
189                                         matrix[b][j] = y;
190                                 }
191                                 
192                                 //swap map elements
193                                 z = index[i].groupname;
194                                 index[i].groupname = index[j].groupname;
195                                 index[j].groupname = z;
196                                 
197                                 name = index[i].seqName;
198                                 index[i].seqName = index[j].seqName;
199                                 index[j].seqName = name;
200
201                                 
202                                 i++; 
203                                 j--;
204                         }
205                 } while(i <= j);
206
207                 /* recurse */
208                 if(low < j) 
209                 sortGroups(low, j);
210
211                 if(i < high) 
212                 sortGroups(i, high); 
213
214         
215         }
216         catch(exception& e) {
217                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function sortGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
218                 exit(1);
219         }
220         catch(...) {
221                 cout << "An unknown error has occurred in the FullMatrix class function sortGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
222                 exit(1);
223         }
224
225 }
226
227 /**************************************************************************/    
228 int FullMatrix::getNumSeqs(){ return numSeqs; }
229 /**************************************************************************/
230 //print out matrix
231 void FullMatrix::printMatrix(ostream& out) {
232         try{
233                 for (int i = 0; i < numSeqs; i++) {
234                         out << "row " << i << " group = " << index[i].groupname << " name = " << index[i].seqName << endl;
235                         for (int j = 0; j < numSeqs; j++) {
236                                 //out << matrix[i][j] << " ";
237                         }
238                         out << endl;
239                 }
240         }
241         catch(exception& e) {
242                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
243                 exit(1);
244         }
245         catch(...) {
246                 cout << "An unknown error has occurred in the FullMatrix class function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
247                 exit(1);
248         }
249
250 }
251
252 /**************************************************************************/
253 void FullMatrix::setBounds(){
254         try{
255                 numGroups = globaldata->gGroupmap->namesOfGroups.size();
256                 
257                 //sort globaldata->gGroupmap.namesOfGroups so that it will match the matrix
258                 sort(globaldata->gGroupmap->namesOfGroups.begin(), globaldata->gGroupmap->namesOfGroups.end());
259                 
260                 //one for each comparision
261                 //minsForRows.resize(numGroups*numGroups);
262                 
263                 /*************************************************/
264                 //find where in matrix each group starts and stops
265                 /*************************************************/
266                 bounds.resize(numGroups);
267                 
268                 bounds[0] = 0;
269                 bounds[numGroups] = numSeqs;
270
271                 //for each group find bounds of subgroup/comparison
272                 for (int i = 1; i < numGroups; i++) {
273                         getBounds(bounds[i], globaldata->gGroupmap->namesOfGroups[i-1]);
274                 }
275                 
276         }
277         catch(exception& e) {
278                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
279                 exit(1);
280         }
281         catch(...) {
282                 cout << "An unknown error has occurred in the FullMatrix class function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
283                 exit(1);
284         }
285
286 }
287 /**************************************************************************/
288 vector<float> FullMatrix::getMins(int x) {
289         try{    
290                 //clear out old data
291                 minsForRows.clear();
292                 
293                 /************************************************************/
294                 //fill the minsForRows vector for the box the user wants
295                 /************************************************************/
296                 int count = 0;
297                 int lowBoundx = bounds[0]; //where first group starts
298                 int lowBoundy = bounds[0]; 
299                 int highBoundx = bounds[1]; //where second group starts
300                 int highBoundy = bounds[1]; 
301                 
302                 int countx = 1;  //index in bound
303                 int county = 1; //index in bound
304                 
305                 //find the bounds for the box the user wants
306                 for (int i = 0; i < (numGroups * numGroups); i++) {
307                 
308                         //are you at the box?
309                         if (count == x) { break; }
310                         else { count++; }
311                         
312                         //move to next box
313                         if (county < numGroups) {
314                                 county++;
315                                 highBoundy = bounds[county];
316                                 lowBoundy = bounds[county-1];
317                         }else{ //you are moving to a new row of "boxes"
318                                 county = 1;
319                                 countx++;
320                                 highBoundx = bounds[countx];
321                                 lowBoundx = bounds[countx-1];
322                                 highBoundy = bounds[county];
323                                 lowBoundy = bounds[county-1];
324                         }
325                 }
326                                 
327                 //each row in the box
328                 for (int x = lowBoundx; x < highBoundx; x++) {
329                         float min4Row = 100000.0;
330                         //each entry in that row
331                         for (int y = lowBoundy; y < highBoundy; y++) {
332                                 //if you are not on the diagonal and you are less than previous minimum
333                                 if ((x != y) && (matrix[x][y] < min4Row)) {
334                                         min4Row = matrix[x][y];
335                                 }
336                         }
337                         //save minimum value for that row in minsForRows vector of vectors
338                         minsForRows.push_back(min4Row);
339                 }
340                         
341                 return minsForRows;
342         }
343         catch(exception& e) {
344                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getMins. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
345                 exit(1);
346         }
347         catch(...) {
348                 cout << "An unknown error has occurred in the FullMatrix class function getMins. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
349                 exit(1);
350         }
351 }
352 /**************************************************************************/
353 void FullMatrix::getBounds(int& higher, string group) {
354         try{
355                 bool gotLower = false;
356                 
357                 //for each group find bounds of subgroup/comparison
358                 for (it = index.begin(); it != index.end(); it++) {
359                         if (it->second.groupname == group) {
360                                 gotLower = true; 
361                         }else if ((gotLower == true) && (it->second.groupname != group)) {  higher = it->first; break; }
362                 }
363         
364         }
365         catch(exception& e) {
366                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getBounds. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
367                 exit(1);
368         }
369         catch(...) {
370                 cout << "An unknown error has occurred in the FullMatrix class function getBounds. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
371                 exit(1);
372         }
373
374 }
375
376 /**************************************************************************/
377 //print out matrix
378 void FullMatrix::printMinsForRows(ostream& out) {
379         try{
380                 for (int j = 0; j < minsForRows.size(); j++) {
381                         out << minsForRows[j] << " ";
382                 }
383                 out << endl;
384
385         }
386         catch(exception& e) {
387                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
388                 exit(1);
389         }
390         catch(...) {
391                 cout << "An unknown error has occurred in the FullMatrix class function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
392                 exit(1);
393         }
394
395 }
396 /**************************************************************************/
397 //shuffles the sequences in the 2 groups passed in.
398 void FullMatrix::shuffle(int box){
399         try{
400                 vector<int> rows2Swap;
401                 vector<int> shuffled;
402                 float y = 0;
403                 string name = "";
404                 
405                 /****************************/
406                 //find the box the user wants
407                 /****************************/
408                 int count = 0;
409                 int lowBoundy = bounds[0]; //where first group starts
410                 int highBoundy = bounds[1]; //where second group starts
411                 int county = 1; //index in bound
412                 
413                 //find the bounds for the box the user wants
414                 for (int i = 0; i < (numGroups * numGroups); i++) {
415                 
416                         //are you at the box?
417                         if (count == box) { break; }
418                         else { count++; }
419                         
420                         //move to next box
421                         if (county < numGroups) {
422                                 county++;
423                                 highBoundy = bounds[county];
424                                 lowBoundy = bounds[county-1];
425                         }else{ //you are moving to a new row of "boxes"
426                                 county = 1;
427                                 highBoundy = bounds[county];
428                                 lowBoundy = bounds[county-1];
429                         }
430                 }
431         
432                 /************************/
433                 //save its rows locations
434                 /************************/
435                 //go through the matrix map to find the rows from groups you want to randomize
436                 for (int y = lowBoundy; y < highBoundy; y++) {
437                         rows2Swap.push_back(y);
438                         shuffled.push_back(y);
439                 }
440                 
441                 //randomize rows to shuffle in shuffled
442                 random_shuffle(shuffled.begin(), shuffled.end());
443                 
444                 /***************************************/
445                 //swap rows and columns to randomize box
446                 /***************************************/
447                 for (int i = 0; i < shuffled.size(); i++) {
448                         //record the swaps you are making so you can undo them in restore function
449                         restoreIndex[i].a = shuffled[i];
450                         restoreIndex[i].b = rows2Swap[i];
451                         
452                         /* swap rows*/
453                         for (int h = 0; h < numSeqs; h++) {
454                                 y = matrix[shuffled[i]][h];
455                                 matrix[shuffled[i]][h] = matrix[rows2Swap[i]][h]; 
456                                 matrix[rows2Swap[i]][h] = y;
457                         }
458                                 
459                         /* swap columns */
460                         for (int b = 0; b < numSeqs; b++) {
461                                 y = matrix[b][shuffled[i]];
462                                 matrix[b][shuffled[i]] = matrix[b][rows2Swap[i]]; 
463                                 matrix[b][rows2Swap[i]] = y;
464                         }
465                                 
466                         //swap map elements
467                         name = index[shuffled[i]].seqName;
468                         index[shuffled[i]].seqName = index[rows2Swap[i]].seqName;
469                         index[rows2Swap[i]].seqName = name;
470                 }
471         }
472         catch(exception& e) {
473                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function shuffle. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
474                 exit(1);
475         }
476         catch(...) {
477                 cout << "An unknown error has occurred in the FullMatrix class function shuffle. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
478                 exit(1);
479         }
480
481 /**************************************************************************/
482 //unshuffles the matrix.
483 void FullMatrix::restore(){
484         try{
485                 float y = 0;
486                 string name = "";
487
488                 //reverse iterate through swaps and undo them to restore original matrix and index map.
489                 for(it2 = restoreIndex.rbegin(); it2 != restoreIndex.rend(); it2++) {
490                         /* swap rows */
491                         for (int h = 0; h < numSeqs; h++) {
492                                 y = matrix[it2->second.a][h];
493                                 matrix[it2->second.a][h] = matrix[it2->second.b][h]; 
494                                 matrix[it2->second.b][h] = y;
495                         }
496                         
497                         /* swap columns */
498                         for (int b = 0; b < numSeqs; b++) {
499                                 y = matrix[b][it2->second.a];
500                                 matrix[b][it2->second.a] = matrix[b][it2->second.b]; 
501                                 matrix[b][it2->second.b] = y;
502                         }
503                         
504                                 
505                         //swap map elements
506                         name = index[it2->second.a].seqName;
507                         index[it2->second.a].seqName = index[it2->second.b].seqName;
508                         index[it2->second.b].seqName = name;
509                 }
510
511                 //clear restore for next shuffle
512                 restoreIndex.clear();
513         }
514         catch(exception& e) {
515                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
516                 exit(1);
517         }
518         catch(...) {
519                 cout << "An unknown error has occurred in the FullMatrix class function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
520                 exit(1);
521         }
522 }  
523 /**************************************************************************/
524 void FullMatrix::getDist(vector<float>& distances) {
525         try{
526                 map<float, float> dist;  //holds the distances for the integral form
527                 map<float, float>::iterator it;
528
529                 /************************************************************/
530                 //fill the minsForRows vectors for each group the user wants
531                 /************************************************************/
532                 int lowBoundx = bounds[0]; //where first group starts
533                 int lowBoundy = bounds[0]; 
534                 int highBoundx = bounds[1]; //where second group starts
535                 int highBoundy = bounds[1]; 
536                 
537                 int countx = 1;  //index in bound
538                 int county = 1; //index in bound
539                 
540                 //go through each "box" in the matrix
541                 for (int i = 0; i < (numGroups * numGroups); i++) {
542                         //each row in the box
543                         for (int x = lowBoundx; x < highBoundx; x++) {
544                                 float min4Row = 100000.0;
545                                 //each entry in that row
546                                 for (int y = lowBoundy; y < highBoundy; y++) {
547                                         //if you are not on the diagonal and you are less than previous minimum
548                                         if ((x != y) && (matrix[x][y] < min4Row)){
549                                                 min4Row = matrix[x][y];
550                                         }
551                                 }
552                                 //save minimum value 
553                                 dist[min4Row] = min4Row;
554                         }
555                         
556                         //****** reset bounds to process next "box" ********
557                         //if you still have more "boxes" in that row
558                         if (county < numGroups) {
559                                 county++;
560                                 highBoundy = bounds[county];
561                                 lowBoundy = bounds[county-1];
562                         }else{ //you are moving to a new row of "boxes"
563                                 county = 1;
564                                 countx++;
565                                 highBoundx = bounds[countx];
566                                 lowBoundx = bounds[countx-1];
567                                 highBoundy = bounds[county];
568                                 lowBoundy = bounds[county-1];
569                         }
570                 }
571
572                 //store distances in users vector
573                 for (it = dist.begin(); it != dist.end(); it++) {
574                         distances.push_back(it->first);
575                 }
576                 
577         }
578         catch(exception& e) {
579                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
580                 exit(1);
581         }
582         catch(...) {
583                 cout << "An unknown error has occurred in the FullMatrix class function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
584                 exit(1);
585         }
586 }
587