]> git.donarmstrong.com Git - mothur.git/blob - fullmatrix.cpp
added nseqs and sharednseqs calculators, removed excess tabs in output files.
[mothur.git] / fullmatrix.cpp
1 /*
2  *  fullmatrix.cpp
3  *  Mothur
4  *
5  *  Created by Sarah Westcott on 3/6/09.
6  *  Copyright 2009 Schloss Lab UMASS Amherst. All rights reserved.
7  *
8  */
9
10 #include "fullmatrix.h"
11
12 /**************************************************************************/
13 //This constructor reads a distance matrix file and stores the data in the matrix.
14 FullMatrix::FullMatrix(ifstream& filehandle) {
15         try{
16                 globaldata = GlobalData::getInstance();
17                 groupmap = globaldata->gGroupmap;
18                 
19                 string name, group;
20                 filehandle >> numSeqs >> name;
21                 
22                 //make the matrix filled with zeros
23                 matrix.resize(numSeqs); 
24                 for(int i = 0; i < numSeqs; i++) {
25                         matrix[i].resize(numSeqs, 0);
26                 }
27                 
28                 group = groupmap->getGroup(name);
29                 if(group == "not found") {      cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl; exit(1); }
30                 index[0].groupname = group; 
31                 index[0].seqName = name;
32                 
33                 //determine if matrix is square or lower triangle
34                 //if it is square read the distances for the first sequence
35                 char d;
36                 while((d=filehandle.get()) != EOF){
37                         
38                         //is d a number meaning its square
39                         if(isalnum(d)){ 
40                                 square = true;
41                                 filehandle.putback(d);
42                                 
43                                 for(int i=0;i<numSeqs;i++){
44                                         filehandle >> matrix[0][i];
45                                 }
46                                 break;
47                         }
48                         
49                         //is d a line return meaning its lower triangle
50                         if(d == '\n'){
51                                 square = false;
52                                 break;
53                         }
54                 }
55                 
56                 //read rest of matrix
57                 if (square == true) { readSquareMatrix(filehandle); }
58                 else { readLTMatrix(filehandle); }
59                 
60                 //sort sequences so they are gathered in groups for processing
61                 sortGroups(0, numSeqs-1);
62                         
63         }
64         catch(exception& e) {
65                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function FullMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
66                 exit(1);
67         }
68         catch(...) {
69                 cout << "An unknown error has occurred in the FullMatrix class function FullMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
70                 exit(1);
71         }
72 }
73 /**************************************************************************/
74 void FullMatrix::readSquareMatrix(ifstream& filehandle) {
75         try {
76         
77                 Progress* reading;
78                 reading = new Progress("Reading matrix:    ", numSeqs * numSeqs);
79                 
80                 int count = 0;
81                 float distance;
82                 
83                 string group, name;
84                 
85                 for(int i=1;i<numSeqs;i++){
86                         filehandle >> name;             
87                         
88                         group = groupmap->getGroup(name);
89                         index[i].groupname = group;
90                         index[i].seqName = name;
91                         
92                         if(group == "not found") {      cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl; exit(1); }
93                                 
94                         for(int j=0;j<numSeqs;j++){
95                                 filehandle >> distance;
96                                         
97                                 matrix[i][j] = distance;
98                                 count++;
99                                 reading->update(count);
100                         }
101                 }
102                 reading->finish();
103                 delete reading;
104         }
105         catch(exception& e) {
106                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function readSquareMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
107                 exit(1);
108         }
109         catch(...) {
110                 cout << "An unknown error has occurred in the FullMatrix class function readSquareMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
111                 exit(1);
112         }
113
114
115 /**************************************************************************/
116 void FullMatrix::readLTMatrix(ifstream& filehandle) {
117         try {
118                 Progress* reading;
119                 reading = new Progress("Reading matrix:    ", numSeqs * (numSeqs - 1) / 2);
120                 
121                 int count = 0;
122                 float distance;
123
124                 string group, name;
125                 
126                 for(int i=1;i<numSeqs;i++){
127                         filehandle >> name;             
128                                                 
129                         group = groupmap->getGroup(name);
130                         index[i].groupname = group;
131                         index[i].seqName = name;
132         
133                         if(group == "not found") {      cout << "Error: Sequence '" << name << "' was not found in the group file, please correct." << endl;  exit(1); }
134                                 
135                         for(int j=0;j<i;j++){
136                                 filehandle >> distance;
137                                         
138                                 matrix[i][j] = distance;  matrix[j][i] = distance;
139                                 count++;
140                                 reading->update(count);
141                         }
142                         
143                 }
144                 reading->finish();
145                 delete reading;
146         }
147         catch(exception& e) {
148                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function readLTMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
149                 exit(1);
150         }
151         catch(...) {
152                 cout << "An unknown error has occurred in the FullMatrix class function readLTMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
153                 exit(1);
154         }
155
156 }
157
158 /**************************************************************************/
159 void FullMatrix::sortGroups(int low, int high){
160         try{
161         
162                 int i = low;
163                 int j = high;
164                 int y = 0;
165                 string name;
166                 
167                 /* compare value */
168                 //what group does this row belong to
169                 string z = index[(low + high) / 2].groupname;
170
171                 /* partition */
172                 do {
173                         /* find member above ... */
174                         while(index[i].groupname < z) i++;
175
176                         /* find element below ... */
177                         while(index[j].groupname > z) j--;
178                         
179                         if(i <= j) {
180                                 /* swap rows*/
181                                 for (int h = 0; h < numSeqs; h++) {
182                                         y = matrix[i][h];
183                                         matrix[i][h] = matrix[j][h]; 
184                                         matrix[j][h] = y;
185                                 }
186                                 
187                                 /* swap columns*/
188                                 for (int b = 0; b < numSeqs; b++) {
189                                         y = matrix[b][i];
190                                         matrix[b][i] = matrix[b][j]; 
191                                         matrix[b][j] = y;
192                                 }
193                                 
194                                 //swap map elements
195                                 z = index[i].groupname;
196                                 index[i].groupname = index[j].groupname;
197                                 index[j].groupname = z;
198                                 
199                                 name = index[i].seqName;
200                                 index[i].seqName = index[j].seqName;
201                                 index[j].seqName = name;
202
203                                 
204                                 i++; 
205                                 j--;
206                         }
207                 } while(i <= j);
208
209                 /* recurse */
210                 if(low < j) 
211                 sortGroups(low, j);
212
213                 if(i < high) 
214                 sortGroups(i, high); 
215
216         
217         }
218         catch(exception& e) {
219                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function sortGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
220                 exit(1);
221         }
222         catch(...) {
223                 cout << "An unknown error has occurred in the FullMatrix class function sortGroups. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
224                 exit(1);
225         }
226
227 }
228
229 /**************************************************************************/    
230 int FullMatrix::getNumSeqs(){ return numSeqs; }
231 /**************************************************************************/
232 //print out matrix
233 void FullMatrix::printMatrix(ostream& out) {
234         try{
235                 for (int i = 0; i < numSeqs; i++) {
236                         out << "row " << i << " group = " << index[i].groupname << " name = " << index[i].seqName << endl;
237                         for (int j = 0; j < numSeqs; j++) {
238                                 out << matrix[i][j] << " ";
239                         }
240                         out << endl;
241                 }
242         }
243         catch(exception& e) {
244                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
245                 exit(1);
246         }
247         catch(...) {
248                 cout << "An unknown error has occurred in the FullMatrix class function printMatrix. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
249                 exit(1);
250         }
251
252 }
253
254 /**************************************************************************/
255 void FullMatrix::getMinsForRowsVectors(){
256         try{
257                 numGroups = globaldata->gGroupmap->namesOfGroups.size();
258                 
259                 //sort globaldata->gGroupmap.namesOfGroups so that it will match the matrix
260                 sort(globaldata->gGroupmap->namesOfGroups.begin(), globaldata->gGroupmap->namesOfGroups.end());
261                 
262                 /*************************************************/
263                 //find where in matrix each group starts and stops
264                 /*************************************************/
265                 vector<int> bounds;  //bounds[1] = starting row in matrix from group B, bounds[2] = starting row in matrix from group C, bounds[3] = no need to find upper bound of C because its numSeqs.
266                 bounds.resize(numGroups);
267                 
268                 bounds[0] = 0;
269                 bounds[numGroups] = numSeqs-1;
270                 //for each group find bounds of subgroup/comparison
271                 for (int i = 1; i < numGroups; i++) {
272                         getBounds(bounds[i], globaldata->gGroupmap->namesOfGroups[i]);
273                 }
274                 
275                 /************************************************************/
276                 //fill the minsForRows vectors for each group the user wants
277                 /************************************************************/
278                 int countx = bounds[1]; //where second group starts
279                 int county = bounds[1]; 
280                 
281                 //go through the entire matrix
282                 for (int x = 0; x < numSeqs; x++) {
283                         for (int y = 0; y < numSeqs; y++) {
284                                 //if have not changed groups
285                                 if ((x < countx) && (y < county)) {
286                                         
287                                 }
288                         }
289                 }
290                                         
291                                 
292                         
293         
294         }
295         catch(exception& e) {
296                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
297                 exit(1);
298         }
299         catch(...) {
300                 cout << "An unknown error has occurred in the FullMatrix class function getMinsForRowsVectors. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
301                 exit(1);
302         }
303
304 }
305
306 /**************************************************************************/
307 void FullMatrix::getBounds(int& higher, string group) {
308         try{
309                 bool gotLower = false;
310                 
311                 //for each group find bounds of subgroup/comparison
312                 for (it = index.begin(); it != index.end(); it++) {
313                         if (it->second.groupname == group) {
314                                 if (gotLower != true) { gotLower = true; }
315                         }else if ((gotLower == true) && (it->second.groupname != group)) {  higher = it->first; break; }
316                 }
317         
318         }
319         catch(exception& e) {
320                 cout << "Standard Error: " << e.what() << " has occurred in the FullMatrix class Function getBounds. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
321                 exit(1);
322         }
323         catch(...) {
324                 cout << "An unknown error has occurred in the FullMatrix class function getBounds. Please contact Pat Schloss at pschloss@microbio.umass.edu." << "\n";
325                 exit(1);
326         }
327
328 }
329