]> git.donarmstrong.com Git - mothur.git/blob - mothurmetastats.cpp
1.21.0
[mothur.git] / mothurmetastats.cpp
1 /*
2  *  mothurmetastats.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 7/6/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mothurmetastats.h"
11 #include "mothurfisher.h"
12
13 /***********************************************************/
14 MothurMetastats::MothurMetastats(double t, int n) {
15         try {
16                 m = MothurOut::getInstance(); 
17                 threshold = t;
18                 numPermutations = n;
19                 
20         }catch(exception& e) {
21                 m->errorOut(e, "MothurMetastats", "MothurMetastats");
22                 exit(1);
23         }       
24 }
25 /***********************************************************/
26 MothurMetastats::~MothurMetastats() {}
27 /***********************************************************/
28 //main metastats function
29 int MothurMetastats::runMetastats(string outputFileName, vector< vector<double> >& data, int secondGroupingStart) {
30         try {
31                 int bflag = 0;
32                 row = data.size();               //numBins
33                 column = data[0].size(); //numGroups in subset
34                 int size = row*column;
35                 
36                 //consistent with original, but this should never be true
37                 if ((secondGroupingStart >= column) || (secondGroupingStart <= 0)) { m->mothurOut("[ERROR]: Check your g value."); m->mothurOutEndLine(); return 0; }
38                 
39                 //Initialize the matrices
40                 vector<double> pmatrix; pmatrix.resize(size, 0.0);
41                 vector<double> permuted; permuted.resize(size, 0.0);
42                 vector< vector<double> > storage; storage.resize(row);
43                 for (int i = 0; i < storage.size(); i++) { storage[i].resize(9, 0.0); }
44                 
45                 //Produces the sum of each column
46                 vector<double> total; total.resize(column, 0.0);
47                 vector<double> ratio; ratio.resize(column, 0.0);
48                 double total1 = 0.0; double total2 = 0.0;
49                 
50                 //total[i] = total abundance for group[i]
51                 for (int i = 0; i < column; i++) {
52                         for (int j = 0; j < row; j++) {
53                                 total[i] += data[j][i];
54                         }
55                 }
56                 
57                 //total for first grouping
58                 for (int i = 0; i < (secondGroupingStart-1); i++) { total1 += total[i]; }
59                 
60                 //total for second grouping
61                 for (int i = (secondGroupingStart-1); i < column; i++) { total2 += total[i]; }
62                 
63                 //Creates the ratios by first finding the minimum of totals
64                 double min = total[0];
65                 for (int i = 0; i < total.size(); i++) {
66                          if (total[i] < min) { min = total[i]; }
67                 }
68                 
69                 //sanity check
70                 if (min <= 0.0) { m->mothurOut("[ERROR]: the sum of one of the columns <= 0."); m->mothurOutEndLine(); return 0; }
71                 
72                 //Ratio time...
73                 for(int i = 0; i < ratio.size(); i++){  ratio[i] = total[i] / min; }
74                 
75                 //Change matrix into an array as received by R for compatibility - kept to be consistent with original
76                 int count = 0;
77                 for(int i = 0; i < column; i++){
78                         for(int j = 0; j < row; j++){
79                                 pmatrix[count]=data[j][i];
80                                 count++;
81                         }
82                 }
83                 
84                 if(row == 1){
85                         for (int i =0; i < column; i++){ pmatrix[i] /= ratio[i]; }
86                 }else {
87                         count = 0; int j=-1;
88                         
89                         for (int i=0; i < size; i++) {
90                                 if (count % row == 0) { j++; }
91                                 pmatrix[i] /= ratio[j];
92                                 count++; 
93                         }   
94                 }
95                 
96                 vector<double> permuted_ttests; permuted_ttests.resize(row, 0.0);
97                 vector<double> pvalues;                 pvalues.resize(row, 0.0);
98                 vector<double> tinitial;                tinitial.resize(row, 0.0);
99                 
100                 if (m->control_pressed) { return 1; }
101                 
102                 //Find the initial values for the matrix.
103                 start(pmatrix, secondGroupingStart, tinitial, storage);
104                 
105                 if (m->control_pressed) { return 1; }
106                 
107                 // Start the calculations.
108                 if ( (column == 2) || ((secondGroupingStart-1) < 8) || ((column-secondGroupingStart+1) < 8) ){ 
109                         
110                         vector<double> fish;    fish.resize(row, 0.0);
111                         vector<double> fish2;   fish2.resize(row, 0.0);
112                         
113                         for(int i = 0; i < row; i++){
114                                 
115                                 for(int j = 0; j < (secondGroupingStart-1); j++)                { fish[i] += data[i][j];        }
116                                 for(int j = (secondGroupingStart-1); j < column; j++)   { fish2[i] += data[i][j];       }
117                                 
118                                 //vector<double> tempData; tempData.resize(4, 0.0);
119                                 double f11, f12, f21, f22;
120                                 f11 = fish[i];
121                                 f12 = fish2[i];
122                                 f21 = total1 - fish[i];
123                                 f22 = total2 - fish2[i];
124                                 
125                                 double pre = 0.0;
126                                 
127                                 MothurFisher fisher;
128                                 pre = fisher.fexact(f11, f12, f21, f22);
129                                 
130                                 if (m->control_pressed) { return 1; }
131                                 
132                                 if (pre > 0.999999999)  { pre = 1.0; }
133                                 storage[i][8] = pre;
134                                 pvalues[i] = pre;
135                         }
136                         
137                 }else {
138         
139                         testp(permuted_ttests, permuted, pmatrix, secondGroupingStart, tinitial, pvalues);
140                         
141                         if (m->control_pressed) { return 1; }
142                         
143                         // Checks to make sure the matrix isn't sparse.
144                         vector<double> sparse;          sparse.resize(row, 0.0);
145                         vector<double> sparse2;         sparse2.resize(row, 0.0);
146                         
147                         int c = 0;
148                         
149                         for(int i = 0; i < row; i++){
150                                 
151                                 for(int j = 0; j < (secondGroupingStart-1); j++){       sparse[i] += data[i][j]; }
152                                 if(sparse[i] < (double)(secondGroupingStart-1)){        c++; }
153                                 
154                                 // ?<= for col
155                                 for(int j = (secondGroupingStart-1); j < column; j++){  sparse2[i] += data[i][j]; }
156                                 if( (sparse2[i] < (double)(column-secondGroupingStart+1))) { c++; }
157                                 
158                                 if (c == 2) {
159                                         c=0;
160                                         double f11,f12,f21,f22;
161                                         
162                                         f11=sparse[i];  sparse[i]=0;
163                                         f12=sparse2[i];  sparse2[i]=0;
164                                         f21 = total1 - f11;
165                                         f22 = total2 - f12;
166                                         
167                                         double pre = 0.0;
168                                         
169                                         MothurFisher fisher;
170                                         pre = fisher.fexact(f11, f12, f21, f22);
171                                         
172                                         if (m->control_pressed) { return 1; }
173                                         
174                                         if (pre > 0.999999999){
175                                                 pre = 1.0;
176                                         }
177                                         
178                                         storage[i][8] = pre;
179                                         pvalues[i] = pre;
180                                 }                               
181                         }
182                         
183                         bflag = 1;
184                 }
185
186                 // Calculates the mean of counts (not normalized)
187                 vector< vector<double> > temp; temp.resize(row);
188                 for (int i = 0; i < temp.size(); i++) { temp[i].resize(2, 0.0); }
189                 
190                 for (int j = 0; j < row; j++){
191                         if (m->control_pressed) { return 1; }
192                         
193                         for (int i = 1; i <= (secondGroupingStart-1); i++){ temp[j][0] += data[j][i-1]; }
194                         temp[j][0] /= (double)(secondGroupingStart-1);
195                         
196                         for(int i = secondGroupingStart; i <= column; i++){ temp[j][1] += data[j][i-1]; }
197                         temp[j][1] /= (double)(column-secondGroupingStart+1);
198                 }
199                 
200                 for(int i = 0; i < row; i++){
201                         if (m->control_pressed) { return 1; }
202                         
203                         storage[i][3]=temp[i][0];
204                         storage[i][7]=temp[i][1];
205                         storage[i][8]=pvalues[i];
206                 }
207                 
208                 // BACKUP checks
209                 cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
210                 for (int i = 0; i < row; i++){
211                         
212                         if (m->control_pressed) { return 1; }
213                         
214                         if(pvalues[i] < threshold){
215                                 m->mothurOut("Feature " + toString((i+1)) + " is significant, p = "); 
216                                 cout << pvalues[i];
217                                 m->mothurOutJustToLog(toString(pvalues[i])); m->mothurOutEndLine();
218                         }       
219                 }
220                 
221                 // And now we write the files to a text file.
222                 struct tm *local;
223                 time_t t; t = time(NULL);
224                 local = localtime(&t);
225                 
226                 ofstream out;
227                 m->openOutputFile(outputFileName, out);
228                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
229                                                   
230                 out << "Local time and date of test: " << asctime(local) << endl;
231                 out << "# rows = " << row << ", # col = " << column << ", g = " << secondGroupingStart << endl << endl;
232                 if (bflag == 1){ out << numPermutations << " permutations" << endl << endl;     }
233                 
234                 //output column headings - not really sure... documentation labels 9 columns, there are 10 in the output file
235                 //storage 0 = meanGroup1 - line 529, 1 = varGroup1 - line 532, 2 = err rate1 - line 534, 3 = mean of counts group1?? - line 291, 4 = meanGroup2 - line 536, 5 = varGroup2 - line 539, 6 = err rate2 - line 541, 7 = mean of counts group2?? - line 292, 8 = pvalues - line 293
236                 out << "OTU\tmean(group1)\tvariance(group1)\tstderr(group1)\tmean_of_counts(group1)\tmean(group2)\tvariance(group2)\tstderr(group2)\tmean_of_counts(group1)\tp-value\n";
237                 
238                 for(int i = 0; i < row; i++){
239                         if (m->control_pressed) { out.close(); return 0; }
240                         
241                         out << (i+1);
242                         for(int j = 0; j < 9; j++){ out << '\t' << storage[i][j]; }
243                         out << endl;
244                 }  
245                 
246                 out << endl << endl;
247                 out.close();
248                 
249                 return 0;
250         
251         }catch(exception& e) {
252                 m->errorOut(e, "MothurMetastats", "runMetastats");
253                 exit(1);
254         }       
255 }
256 /***********************************************************/
257 //Find the initial values for the matrix
258 int MothurMetastats::start(vector<double>& Imatrix, int secondGroupingStart, vector<double>& initial, vector< vector<double> >& storage) {
259         try {
260                 
261                 int a = row; a*=4;
262                 
263                 double xbardiff = 0.0; double denom = 0.0;
264                 vector<double> store;   store.resize(a, 0.0);
265                 vector<double> tool;    tool.resize(a, 0.0);
266                 vector< vector<double> > C1; C1.resize(row);
267                 for (int i = 0; i < C1.size(); i++) { C1[i].resize(3, 0.0); }
268                 vector< vector<double> > C2; C2.resize(row);
269                 for (int i = 0; i < C2.size(); i++) { C2[i].resize(3, 0.0); }
270                 
271                 meanvar(Imatrix, secondGroupingStart, store);
272                 
273                 if (m->control_pressed) { return 0; }
274                 
275                 //copy store into tool
276                 tool = store;
277                 
278                 for (int i = 0; i < row; i++){
279                         C1[i][0]=tool[i]; //mean group 1
280                         storage[i][0]=C1[i][0];
281                         C1[i][1]=tool[i+row+row]; // var group 1
282                         storage[i][1]=C1[i][1];
283                         C1[i][2]=C1[i][1]/(secondGroupingStart-1);
284                         storage[i][2]=sqrt(C1[i][2]);
285                         
286                         C2[i][0]=tool[i+row]; // mean group 2
287                         storage[i][4]=C2[i][0];    
288                         C2[i][1]=tool[i+row+row+row]; // var group 2 
289                         storage[i][5]=C2[i][1];        
290                         C2[i][2]=C2[i][1]/(column-secondGroupingStart+1);
291                         storage[i][6]=sqrt(C2[i][2]);   
292                 }
293                 
294                 if (m->control_pressed) { return 0; }
295                 
296                 for (int i = 0; i < row; i++){
297                         xbardiff = C1[i][0]-C2[i][0];
298                         denom = sqrt(C1[i][2]+C2[i][2]);
299                         initial[i]=fabs(xbardiff/denom);
300                 }       
301
302                 return 0; 
303                 
304         }catch(exception& e) {
305                 m->errorOut(e, "MothurMetastats", "start");
306                 exit(1);
307         }       
308 }
309 /***********************************************************/
310 int MothurMetastats::meanvar(vector<double>& pmatrix, int secondGroupingStart, vector<double>& store) {
311         try {
312                 vector<double> temp;    temp.resize(row, 0.0);
313                 vector<double> temp2;   temp2.resize(row, 0.0);
314                 vector<double> var;             var.resize(row, 0.0);
315                 vector<double> var2;    var2.resize(row, 0.0);
316                 
317                 double a = secondGroupingStart-1;
318                 double b = column - a;
319                 int m = a * row;
320                 int n = row * column;
321                 
322                 for (int i = 0; i < m; i++)             { temp[i%row] += pmatrix[i];    }
323                 for (int i = 0; i < n; i++)             { temp2[i%row]+= pmatrix[i];    }
324                 for (int i = 0; i < row; i++)   { temp2[i] -= temp[i];          }
325                 for (int i = 0; i <= row-1;i++) {
326                         store[i] = temp[i]/a;
327                         store[i+row]=temp2[i]/b;
328                 }
329                 
330                 //That completes the mean calculations.
331                 
332                 for (int i = 0; i < m; i++)             { var[i%row] += pow((pmatrix[i]-store[i%row]),2);               }
333                 for (int i = m; i < n; i++)             { var2[i%row]+= pow((pmatrix[i]-store[(i%row)+row]),2); }
334                 for (int i = 0; i <= row-1; i++){
335                         store[i+2*row]=var[i]/(a-1);
336                         store[i+3*row]=var2[i]/(b-1);
337                 }
338                 
339                 // That completes var calculations.
340                 
341                 return 0;
342                 
343         }catch(exception& e) {
344                 m->errorOut(e, "MothurMetastats", "meanvar");
345                 exit(1);
346         }       
347 }
348 /***********************************************************/
349 int MothurMetastats::testp(vector<double>& permuted_ttests, vector<double>& permuted, vector<double>& Imatrix, int secondGroupingStart, vector<double>& Tinitial, vector<double>& ps) {
350         try {
351                 
352                 vector<double> Tvalues;         Tvalues.resize(row, 0.0);
353                 vector<double> counter;         counter.resize(row, 0.0);
354                 int a, b, n;
355                 
356                 a = numPermutations;
357                 b = row;
358                 n = a*b;
359                 
360                 for (int j = 1; j <= row; j++)  {       
361                         if (m->control_pressed) { return 0; }
362                         permute_matrix(Imatrix, permuted, secondGroupingStart, Tvalues, Tinitial, counter);     
363                 }
364                 
365                 for(int j = 0; j < row; j++)    {       
366                         if (m->control_pressed) { return 0; }
367                         ps[j] = ((counter[j]+1)/(double)(a+1)); 
368                 }
369                 
370                 return 0;
371                 
372         }catch(exception& e) {
373                 m->errorOut(e, "MothurMetastats", "testp");
374                 exit(1);
375         }       
376 }       
377 /***********************************************************/
378 int MothurMetastats::permute_matrix(vector<double>& Imatrix, vector<double>& permuted, int secondGroupingStart, vector<double>& trial_ts, vector<double>& Tinitial, vector<double>& counter1){
379         try {
380         
381                 vector<int> y; y.resize(column, 0);
382                 for (int i = 1; i <= column; i++){ y[i-1] = i; }
383                 
384                 permute_array(y); 
385                 
386                 int f = 0; int c = 0; int k = 0;
387                 for (int i = 0; i < column; i++){
388                         
389                         if (m->control_pressed) { return 0; }
390                         
391                         f = y[i]; //column number
392                         c = 1;
393                         c *= (f-1);
394                         c *= row;
395                         if (f == 1){ c = 0; } // starting value position in the Imatrix
396                         
397                         for(int j = 1; j <= row; j++){
398                                 permuted[k] = Imatrix[c];
399                                 c++; k++;
400                         }
401                 }
402                 
403                 calc_twosample_ts(permuted, secondGroupingStart, trial_ts, Tinitial, counter1);
404                 
405                 return 0;
406                 
407         }catch(exception& e) {
408                 m->errorOut(e, "MothurMetastats", "permute_matrix");
409                 exit(1);
410         }       
411 }
412 /***********************************************************/
413 int MothurMetastats::permute_array(vector<int>& array) {
414         try {
415                 static int seeded = 0;
416                 
417                 if (! seeded) {
418                         seeded = 1;
419                         srand(time(NULL));
420                 }
421                 
422                 for (int i = 0; i < array.size(); i++) {
423                         if (m->control_pressed) { return 0; }
424                         
425                         int selection = rand() % (array.size() - i);
426                         int tmp = array[i + selection];
427                         array[i + selection] = array[i];
428                         array[i] = tmp;
429                 }
430                 
431                 return 0;
432                 
433         }catch(exception& e) {
434                 m->errorOut(e, "MothurMetastats", "permute_array");
435                 exit(1);
436         }       
437 }
438 /***********************************************************/
439 int MothurMetastats::calc_twosample_ts(vector<double>& Pmatrix, int secondGroupingStart, vector<double>& Ts, vector<double>& Tinitial, vector<double>& counter) {
440         try {
441                 int a = row * 4;
442                 
443                 vector< vector<double> > C1; C1.resize(row);
444                 for (int i = 0; i < C1.size(); i++) { C1[i].resize(3, 0.0); }
445                 vector< vector<double> > C2; C2.resize(row);
446                 for (int i = 0; i < C2.size(); i++) { C2[i].resize(3, 0.0); }
447                 vector<double> storage; storage.resize(a, 0.0);
448                 vector<double> tool;    tool.resize(a, 0.0);
449                 double xbardiff = 0.0; double denom = 0.0;
450                 
451                 meanvar(Pmatrix, secondGroupingStart, storage);
452                 
453                 for(int i = 0;i <= (a-1); i++) {        
454                         if (m->control_pressed) { return 0; }
455                         tool[i] = storage[i];   
456                 }
457                 
458                 for (int i = 0; i < row; i++){
459                         if (m->control_pressed) { return 0; }
460                         C1[i][0]=tool[i];
461                         C1[i][1]=tool[i+row+row];
462                         C1[i][2]=C1[i][1]/(secondGroupingStart-1);
463                         
464                         C2[i][0]=tool[i+row];
465                         C2[i][1]=tool[i+row+row+row]; // var group 2 
466                         C2[i][2]=C2[i][1]/(column-secondGroupingStart+1);
467                 }
468                 
469                 for (int i = 0; i < row; i++){
470                         if (m->control_pressed) { return 0; }
471                         xbardiff = C1[i][0]-C2[i][0];
472                         denom = sqrt(C1[i][2]+C2[i][2]);
473                         Ts[i]=fabs(xbardiff/denom);
474                         if (fabs(Ts[i])>(fabs(Tinitial[i])+.0000000000001)){ //13th place
475                                 counter[i]++;
476                         }
477                 }
478                 
479                 return 0;
480                 
481         }catch(exception& e) {
482                 m->errorOut(e, "MothurMetastats", "calc_twosample_ts");
483                 exit(1);
484         }
485 }
486 /***********************************************************/
487
488