]> git.donarmstrong.com Git - mothur.git/blob - fullmatrix.cpp
libshuff and updated help to include libshuff and modifications to read.dist.
[mothur.git] / fullmatrix.cpp
1 /*
2  *  fullmatrix.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 3/6/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "fullmatrix.h"
11
12 /**************************************************************************/
13 //This constructor reads a distance matrix file and stores the data in the matrix.
14 FullMatrix::FullMatrix(ifstream& filehandle) {
15         try{
16                 globaldata = GlobalData::getInstance();
17                 groupmap = globaldata->gGroupmap;
18                 
19                 string name, group;
20                 filehandle >> numSeqs >> name;
21                 
22                 //make the matrix filled with zeros
23                 matrix.resize(numSeqs); 
24                 for(int i = 0; i < numSeqs; i++) {
25                         matrix[i].resize(numSeqs, 0);
26                 }
27                 
28                 group = groupmap->getGroup(name);
29                 if(group == "not found") {      cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl; exit(1); }
30                 index[0].groupname = group; 
31                 index[0].seqName = name;
32                 
33                 //determine if matrix is square or lower triangle
34                 //if it is square read the distances for the first sequence
35                 char d;
36                 while((d=filehandle.get()) != EOF){
37                         
38                         //is d a number meaning its square
39                         if(isalnum(d)){ 
40                                 square = true;
41                                 filehandle.putback(d);
42                                 
43                                 for(int i=0;i<numSeqs;i++){
44                                         filehandle >> matrix[0][i];
45                                 }
46                                 break;
47                         }
48                         
49                         //is d a line return meaning its lower triangle
50                         if(d == '\n'){
51                                 square = false;
52                                 break;
53                         }
54                 }
55                 
56                 //read rest of matrix
57                 if (square == true) { readSquareMatrix(filehandle); }
58                 else { readLTMatrix(filehandle); }
59                 
60                 //sort sequences so they are gathered in groups for processing
61                 sortGroups(0, numSeqs-1);
62                         
63         }
64         catch(exception& e) {
65                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function FullMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
66                 exit(1);
67         }
68         catch(...) {
69                 cout << "An unknown error has occurred in the FullMatrix class function FullMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
70                 exit(1);
71         }
72 }
73 /**************************************************************************/
74 void FullMatrix::readSquareMatrix(ifstream& filehandle) {
75         try {
76         
77                 Progress* reading;
78                 reading = new Progress("Reading matrix:    ", numSeqs * numSeqs);
79                 
80                 int count = 0;
81                 
82                 string group, name;
83                 
84                 for(int i=1;i<numSeqs;i++){
85                         filehandle >> name;             
86                         
87                         group = groupmap->getGroup(name);
88                         index[i].groupname = group;
89                         index[i].seqName = name;
90                         
91                         if(group == "not found") {      cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl; exit(1); }
92                                 
93                         for(int j=0;j<numSeqs;j++){
94                                 filehandle >> matrix[i][j];
95                                 
96                                 count++;
97                                 reading->update(count);
98                         }
99                 }
100                 reading->finish();
101                 delete reading;
102         }
103         catch(exception& e) {
104                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function readSquareMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
105                 exit(1);
106         }
107         catch(...) {
108                 cout << "An unknown error has occurred in the FullMatrix class function readSquareMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
109                 exit(1);
110         }
111
112
113 /**************************************************************************/
114 void FullMatrix::readLTMatrix(ifstream& filehandle) {
115         try {
116                 Progress* reading;
117                 reading = new Progress("Reading matrix:    ", numSeqs * (numSeqs - 1) / 2);
118                 
119                 int count = 0;
120                 float distance;
121
122                 string group, name;
123                 
124                 for(int i=1;i<numSeqs;i++){
125                         filehandle >> name;             
126                                                 
127                         group = groupmap->getGroup(name);
128                         index[i].groupname = group;
129                         index[i].seqName = name;
130         
131                         if(group == "not found") {      cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl;  exit(1); }
132                                 
133                         for(int j=0;j<i;j++){
134                                 filehandle >> distance;
135                                         
136                                 matrix[i][j] = distance;  matrix[j][i] = distance;
137                                 count++;
138                                 reading->update(count);
139                         }
140                         
141                 }
142                 reading->finish();
143                 delete reading;
144         }
145         catch(exception& e) {
146                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function readLTMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
147                 exit(1);
148         }
149         catch(...) {
150                 cout << "An unknown error has occurred in the FullMatrix class function readLTMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
151                 exit(1);
152         }
153
154 }
155
156 /**************************************************************************/
157 void FullMatrix::sortGroups(int low, int high){
158         try{
159         
160                 int i = low;
161                 int j = high;
162                 float y = 0;
163                 string name;
164                 
165                 /* compare value */
166                 //what group does this row belong to
167                 string z = index[(low + high) / 2].groupname;
168
169                 /* partition */
170                 do {
171                         /* find member above ... */
172                         while(index[i].groupname < z) i++;
173
174                         /* find element below ... */
175                         while(index[j].groupname > z) j--;
176                         
177                         if(i <= j) {
178                                 /* swap rows*/
179                                 for (int h = 0; h < numSeqs; h++) {
180                                         y = matrix[i][h];
181                                         matrix[i][h] = matrix[j][h]; 
182                                         matrix[j][h] = y;
183                                 }
184                                 
185                                 /* swap columns*/
186                                 for (int b = 0; b < numSeqs; b++) {
187                                         y = matrix[b][i];
188                                         matrix[b][i] = matrix[b][j]; 
189                                         matrix[b][j] = y;
190                                 }
191                                 
192                                 //swap map elements
193                                 z = index[i].groupname;
194                                 index[i].groupname = index[j].groupname;
195                                 index[j].groupname = z;
196                                 
197                                 name = index[i].seqName;
198                                 index[i].seqName = index[j].seqName;
199                                 index[j].seqName = name;
200
201                                 
202                                 i++; 
203                                 j--;
204                         }
205                 } while(i <= j);
206
207                 /* recurse */
208                 if(low < j) 
209                 sortGroups(low, j);
210
211                 if(i < high) 
212                 sortGroups(i, high); 
213
214         
215         }
216         catch(exception& e) {
217                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function sortGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
218                 exit(1);
219         }
220         catch(...) {
221                 cout << "An unknown error has occurred in the FullMatrix class function sortGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
222                 exit(1);
223         }
224
225 }
226
227 /**************************************************************************/    
228 int FullMatrix::getNumSeqs(){ return numSeqs; }
229 /**************************************************************************/
230 //print out matrix
231 void FullMatrix::printMatrix(ostream& out) {
232         try{
233                 for (int i = 0; i < numSeqs; i++) {
234                         out << "row " << i << " group = " << index[i].groupname << " name = " << index[i].seqName << endl;
235                         for (int j = 0; j < numSeqs; j++) {
236                                 out << matrix[i][j] << " ";
237                         }
238                         out << endl;
239                 }
240         }
241         catch(exception& e) {
242                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
243                 exit(1);
244         }
245         catch(...) {
246                 cout << "An unknown error has occurred in the FullMatrix class function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
247                 exit(1);
248         }
249
250 }
251
252 /**************************************************************************/
253 void FullMatrix::setBounds(){
254         try{
255                 numGroups = globaldata->gGroupmap->namesOfGroups.size();
256                 
257                 //sort globaldata->gGroupmap.namesOfGroups so that it will match the matrix
258                 sort(globaldata->gGroupmap->namesOfGroups.begin(), globaldata->gGroupmap->namesOfGroups.end());
259                 
260                 //one for each comparision
261                 //minsForRows.resize(numGroups*numGroups);
262                 
263                 /*************************************************/
264                 //find where in matrix each group starts and stops
265                 /*************************************************/
266                 bounds.resize(numGroups);
267                 
268                 bounds[0] = 0;
269                 bounds[numGroups] = numSeqs;
270
271                 //for each group find bounds of subgroup/comparison
272                 for (int i = 1; i < numGroups; i++) {
273                         getBounds(bounds[i], globaldata->gGroupmap->namesOfGroups[i-1]);
274                 }
275                 
276         }
277         catch(exception& e) {
278                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
279                 exit(1);
280         }
281         catch(...) {
282                 cout << "An unknown error has occurred in the FullMatrix class function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
283                 exit(1);
284         }
285
286 }
287 /**************************************************************************/
288 vector<float> FullMatrix::getMins(int x) {
289         try{    
290                 //clear out old data
291                 minsForRows.clear();
292                 
293                 /************************************************************/
294                 //fill the minsForRows vector for the box the user wants
295                 /************************************************************/
296                 int count = 0;
297                 int lowBoundx = bounds[0]; //where first group starts
298                 int lowBoundy = bounds[0]; 
299                 int highBoundx = bounds[1]; //where second group starts
300                 int highBoundy = bounds[1]; 
301                 
302                 int countx = 1;  //index in bound
303                 int county = 1; //index in bound
304                 
305                 //find the bounds for the box the user wants
306                 for (int i = 0; i < (numGroups * numGroups); i++) {
307                 
308                         //are you at the box?
309                         if (count == x) { break; }
310                         else { count++; }
311                         
312                         //move to next box
313                         if (county < numGroups) {
314                                 county++;
315                                 highBoundy = bounds[county];
316                                 lowBoundy = bounds[county-1];
317                         }else{ //you are moving to a new row of "boxes"
318                                 county = 1;
319                                 countx++;
320                                 highBoundx = bounds[countx];
321                                 lowBoundx = bounds[countx-1];
322                                 highBoundy = bounds[county];
323                                 lowBoundy = bounds[county-1];
324                         }
325                 }
326                                 
327                 //each row in the box
328                 for (int x = lowBoundx; x < highBoundx; x++) {
329                         float min4Row = 100000.0;
330                         //each entry in that row
331                         for (int y = lowBoundy; y < highBoundy; y++) {
332                                 //if you are not on the diagonal and you are less than previous minimum
333                                 if ((x != y) && (matrix[x][y] < min4Row)) {
334                                         min4Row = matrix[x][y];
335                                 }
336                         }
337                         //save minimum value for that row in minsForRows vector of vectors
338                         minsForRows.push_back(min4Row);
339                 }
340                         
341                 return minsForRows;
342         }
343         catch(exception& e) {
344                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getMins. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
345                 exit(1);
346         }
347         catch(...) {
348                 cout << "An unknown error has occurred in the FullMatrix class function getMins. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
349                 exit(1);
350         }
351 }
352 /**************************************************************************/
353 void FullMatrix::getBounds(int& higher, string group) {
354         try{
355                 bool gotLower = false;
356                 
357                 //for each group find bounds of subgroup/comparison
358                 for (it = index.begin(); it != index.end(); it++) {
359                         if (it->second.groupname == group) {
360                                 gotLower = true; 
361                         }else if ((gotLower == true) && (it->second.groupname != group)) {  higher = it->first; break; }
362                 }
363         
364         }
365         catch(exception& e) {
366                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getBounds. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
367                 exit(1);
368         }
369         catch(...) {
370                 cout << "An unknown error has occurred in the FullMatrix class function getBounds. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
371                 exit(1);
372         }
373
374 }
375
376 /**************************************************************************/
377 //print out matrix
378 void FullMatrix::printMinsForRows(ostream& out) {
379         try{
380                 for (int j = 0; j < minsForRows.size(); j++) {
381                         out << minsForRows[j] << " ";
382                 }
383                 out << endl;
384
385         }
386         catch(exception& e) {
387                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
388                 exit(1);
389         }
390         catch(...) {
391                 cout << "An unknown error has occurred in the FullMatrix class function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
392                 exit(1);
393         }
394
395 }
396
397 /**************************************************************************/
398 //shuffles the sequences in the 2 groups passed in.
399 void FullMatrix::shuffle(string groupA, string groupB){
400         try{
401                 vector<int> rows2Swap;
402                 vector<int> shuffled;
403                 float y = 0;
404                 string name = "";
405                 
406                         
407                 /********************************/
408                 //save rows you want to randomize
409                 /********************************/
410                 //go through the matrix map to find the rows from groups you want to randomize
411                 for (it = index.begin(); it != index.end(); it++) {
412                         //is this row from group A or B?
413                         if ((it->second.groupname == groupA) || (it->second.groupname == groupB)) {
414                                 rows2Swap.push_back(it->first);
415                                 shuffled.push_back(it->first);
416                         }
417                 }
418                 
419                 //randomize rows to shuffle in shuffled
420                 random_shuffle(shuffled.begin(), shuffled.end());
421                 
422                 /***************************************/
423                 //swap rows and columns to randomize box
424                 /***************************************/
425                 for (int i = 0; i < shuffled.size(); i++) {
426
427                         //record the swaps you are making so you can undo them in restore function
428                         restoreIndex[i].a = shuffled[i];
429                         restoreIndex[i].b = rows2Swap[i];
430                         
431                         /* swap rows*/
432                         for (int h = 0; h < numSeqs; h++) {
433                                 y = matrix[shuffled[i]][h];
434                                 matrix[shuffled[i]][h] = matrix[rows2Swap[i]][h]; 
435                                 matrix[rows2Swap[i]][h] = y;
436                         }
437                                 
438                         /* swap columns */
439                         for (int b = 0; b < numSeqs; b++) {
440                                 y = matrix[b][shuffled[i]];
441                                 matrix[b][shuffled[i]] = matrix[b][rows2Swap[i]]; 
442                                 matrix[b][rows2Swap[i]] = y;
443                         }
444                                 
445                         //swap map elements
446                         name = index[shuffled[i]].seqName;
447                         index[shuffled[i]].seqName = index[rows2Swap[i]].seqName;
448                         index[rows2Swap[i]].seqName = name;
449
450                 }
451         }
452         catch(exception& e) {
453                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function shuffle. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
454                 exit(1);
455         }
456         catch(...) {
457                 cout << "An unknown error has occurred in the FullMatrix class function shuffle. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
458                 exit(1);
459         }
460
461 /**************************************************************************/
462 //unshuffles the matrix.
463 void FullMatrix::restore(){
464         try{
465                 float y = 0;
466                 string name = "";
467
468                 //reverse iterate through swaps and undo them to restore original matrix and index map.
469                 for(it2 = restoreIndex.rbegin(); it2 != restoreIndex.rend(); it2++) {
470                         /* swap rows */
471
472                         for (int h = 0; h < numSeqs; h++) {
473                                 y = matrix[it2->second.a][h];
474                                 matrix[it2->second.a][h] = matrix[it2->second.b][h]; 
475                                 matrix[it2->second.b][h] = y;
476                         }
477                         
478                         /* swap columns */
479                         for (int b = 0; b < numSeqs; b++) {
480                                 y = matrix[b][it2->second.a];
481                                 matrix[b][it2->second.a] = matrix[b][it2->second.b]; 
482                                 matrix[b][it2->second.b] = y;
483                         }
484                         
485                                 
486                         //swap map elements
487                         name = index[it2->second.a].seqName;
488                         index[it2->second.a].seqName = index[it2->second.b].seqName;
489                         index[it2->second.b].seqName = name;
490
491                 }
492
493                 //clear restore for next shuffle
494                 restoreIndex.clear();
495         }
496         catch(exception& e) {
497                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
498                 exit(1);
499         }
500         catch(...) {
501                 cout << "An unknown error has occurred in the FullMatrix class function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
502                 exit(1);
503         }
504 }  
505 /**************************************************************************/
506 void FullMatrix::getDist(vector<float>& distances) {
507         try{
508                 map<float, float> dist;  //holds the distances for the integral form
509                 map<float, float>::iterator it;
510
511                 /************************************************************/
512                 //fill the minsForRows vectors for each group the user wants
513                 /************************************************************/
514                 int lowBoundx = bounds[0]; //where first group starts
515                 int lowBoundy = bounds[0]; 
516                 int highBoundx = bounds[1]; //where second group starts
517                 int highBoundy = bounds[1]; 
518                 
519                 int countx = 1;  //index in bound
520                 int county = 1; //index in bound
521                 
522                 //go through each "box" in the matrix
523                 for (int i = 0; i < (numGroups * numGroups); i++) {
524                         //each row in the box
525                         for (int x = lowBoundx; x < highBoundx; x++) {
526                                 float min4Row = 100000.0;
527                                 //each entry in that row
528                                 for (int y = lowBoundy; y < highBoundy; y++) {
529                                         //if you are not on the diagonal and you are less than previous minimum
530                                         if ((x != y) && (matrix[x][y] < min4Row)){
531                                                 min4Row = matrix[x][y];
532                                         }
533                                 }
534                                 //save minimum value 
535                                 dist[min4Row] = min4Row;
536                         }
537                         
538                         //****** reset bounds to process next "box" ********
539                         //if you still have more "boxes" in that row
540                         if (county < numGroups) {
541                                 county++;
542                                 highBoundy = bounds[county];
543                                 lowBoundy = bounds[county-1];
544                         }else{ //you are moving to a new row of "boxes"
545                                 county = 1;
546                                 countx++;
547                                 highBoundx = bounds[countx];
548                                 lowBoundx = bounds[countx-1];
549                                 highBoundy = bounds[county];
550                                 lowBoundy = bounds[county-1];
551                         }
552                 }
553
554                 //store distances in users vector
555                 for (it = dist.begin(); it != dist.end(); it++) {
556                         distances.push_back(it->first);
557                 }
558                 
559         }
560         catch(exception& e) {
561                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
562                 exit(1);
563         }
564         catch(...) {
565                 cout << "An unknown error has occurred in the FullMatrix class function restore. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
566                 exit(1);
567         }
568 }
569