]> git.donarmstrong.com Git - mothur.git/blob - mothurmetastats.cpp
rewrote metastats command in c++, added mothurRemove function to handle ~ error....
[mothur.git] / mothurmetastats.cpp
1 /*
2  *  mothurmetastats.cpp
3  *  Mothur
4  *
5  *  Created by westcott on 7/6/11.
6  *  Copyright 2011 Schloss Lab. All rights reserved.
7  *
8  */
9
10 #include "mothurmetastats.h"
11 #include "mothurfisher.h"
12
13 /***********************************************************/
14 MothurMetastats::MothurMetastats(double t, int n) {
15         try {
16                 m = MothurOut::getInstance(); 
17                 threshold = t;
18                 numPermutations = n;
19                 
20         }catch(exception& e) {
21                 m->errorOut(e, "MothurMetastats", "MothurMetastats");
22                 exit(1);
23         }       
24 }
25 /***********************************************************/
26 MothurMetastats::~MothurMetastats() {
27         try {
28                 
29                 
30         }catch(exception& e) {
31                 m->errorOut(e, "MothurMetastats", "~MothurMetastats");
32                 exit(1);
33         }       
34 }
35 /***********************************************************/
36 //main metastats function
37 int MothurMetastats::runMetastats(string outputFileName, vector< vector<double> >& data, int secondGroupingStart) {
38         try {
39                 int bflag = 0;
40                 row = data.size();               //numBins
41                 column = data[0].size(); //numGroups in subset
42                 int size = row*column;
43                 
44                 //consistent with original, but this should never be true
45                 if ((secondGroupingStart >= column) || (secondGroupingStart <= 0)) { m->mothurOut("[ERROR]: Check your g value."); m->mothurOutEndLine(); return 0; }
46                 
47                 //Initialize the matrices
48                 vector<double> pmatrix; pmatrix.resize(size, 0.0);
49                 vector<double> permuted; permuted.resize(size, 0.0);
50                 vector< vector<double> > storage; storage.resize(row);
51                 for (int i = 0; i < storage.size(); i++) { storage[i].resize(9, 0.0); }
52                 
53                 //Produces the sum of each column
54                 vector<double> total; total.resize(column, 0.0);
55                 vector<double> ratio; ratio.resize(column, 0.0);
56                 double total1 = 0.0; double total2 = 0.0;
57                 
58                 //total[i] = total abundance for group[i]
59                 for (int i = 0; i < column; i++) {
60                         for (int j = 0; j < row; j++) {
61                                 total[i] += data[j][i];
62                         }
63                 }
64                 
65                 //total for first grouping
66                 for (int i = 0; i < (secondGroupingStart-1); i++) { total1 += total[i]; }
67                 
68                 //total for second grouping
69                 for (int i = (secondGroupingStart-1); i < column; i++) { total2 += total[i]; }
70                 
71                 //Creates the ratios by first finding the minimum of totals
72                 double min = total[0];
73                 for (int i = 0; i < total.size(); i++) {
74                          if (total[i] < min) { min = total[i]; }
75                 }
76                 
77                 //sanity check
78                 if (min <= 0.0) { m->mothurOut("[ERROR]: the sum of one of the columns <= 0."); m->mothurOutEndLine(); return 0; }
79                 
80                 //Ratio time...
81                 for(int i = 0; i < ratio.size(); i++){  ratio[i] = total[i] / min; }
82                 
83                 //Change matrix into an array as received by R for compatibility - kept to be consistent with original
84                 int count = 0;
85                 for(int i = 0; i < column; i++){
86                         for(int j = 0; j < row; j++){
87                                 pmatrix[count]=data[j][i];
88                                 count++;
89                         }
90                 }
91                 
92                 if(row == 1){
93                         for (int i =0; i < column; i++){ pmatrix[i] /= ratio[i]; }
94                 }else {
95                         count = 0; int j=-1;
96                         
97                         for (int i=0; i < size; i++) {
98                                 if (count % row == 0) { j++; }
99                                 pmatrix[i] /= ratio[j];
100                                 count++; 
101                         }   
102                 }
103                 
104                 vector<double> permuted_ttests; permuted_ttests.resize(row, 0.0);
105                 vector<double> pvalues;                 pvalues.resize(row, 0.0);
106                 vector<double> tinitial;                tinitial.resize(row, 0.0);
107                 
108                 if (m->control_pressed) { return 1; }
109                 
110                 //Find the initial values for the matrix.
111                 start(pmatrix, secondGroupingStart, tinitial, storage);
112                 
113                 if (m->control_pressed) { return 1; }
114                 
115                 // Start the calculations.
116                 if ( (column == 2) || ((secondGroupingStart-1) < 8) || ((column-secondGroupingStart+1) < 8) ){ 
117                         
118                         vector<double> fish;    fish.resize(row, 0.0);
119                         vector<double> fish2;   fish2.resize(row, 0.0);
120                         
121                         for(int i = 0; i < row; i++){
122                                 
123                                 for(int j = 0; j < (secondGroupingStart-1); j++)                { fish[i] += data[i][j];        }
124                                 for(int j = (secondGroupingStart-1); j < column; j++)   { fish2[i] += data[i][j];       }
125                                 
126                                 //vector<double> tempData; tempData.resize(4, 0.0);
127                                 double f11, f12, f21, f22;
128                                 f11 = fish[i];
129                                 f12 = fish2[i];
130                                 f21 = total1 - fish[i];
131                                 f22 = total2 - fish2[i];
132                                 
133                                 double pre = 0.0;
134                                 
135                                 MothurFisher fisher;
136                                 pre = fisher.fexact(f11, f12, f21, f22);
137                                 
138                                 if (m->control_pressed) { return 1; }
139                                 
140                                 if (pre > 0.999999999)  { pre = 1.0; }
141                                 storage[i][8] = pre;
142                                 pvalues[i] = pre;
143                         }
144                         
145                 }else {
146         
147                         testp(permuted_ttests, permuted, pmatrix, secondGroupingStart, tinitial, pvalues);
148                         
149                         if (m->control_pressed) { return 1; }
150                         
151                         // Checks to make sure the matrix isn't sparse.
152                         vector<double> sparse;          sparse.resize(row, 0.0);
153                         vector<double> sparse2;         sparse2.resize(row, 0.0);
154                         
155                         int c = 0;
156                         
157                         for(int i = 0; i < row; i++){
158                                 
159                                 for(int j = 0; j < (secondGroupingStart-1); j++){       sparse[i] += data[i][j]; }
160                                 if(sparse[i] < (double)(secondGroupingStart-1)){        c++; }
161                                 
162                                 // ?<= for col
163                                 for(int j = (secondGroupingStart-1); j < column; j++){  sparse2[i] += data[i][j]; }
164                                 if( (sparse2[i] < (double)(column-secondGroupingStart+1))) { c++; }
165                                 
166                                 if (c == 2) {
167                                         c=0;
168                                         double f11,f12,f21,f22;
169                                         
170                                         f11=sparse[i];  sparse[i]=0;
171                                         f12=sparse2[i];  sparse2[i]=0;
172                                         f21 = total1 - f11;
173                                         f22 = total2 - f12;
174                                         
175                                         double pre = 0.0;
176                                         
177                                         MothurFisher fisher;
178                                         pre = fisher.fexact(f11, f12, f21, f22);
179                                         
180                                         if (m->control_pressed) { return 1; }
181                                         
182                                         if (pre > 0.999999999){
183                                                 pre = 1.0;
184                                         }
185                                         
186                                         storage[i][8] = pre;
187                                         pvalues[i] = pre;
188                                 }                               
189                         }
190                         
191                         bflag = 1;
192                 }
193
194                 // Calculates the mean of counts (not normalized)
195                 vector< vector<double> > temp; temp.resize(row);
196                 for (int i = 0; i < temp.size(); i++) { temp[i].resize(2, 0.0); }
197                 
198                 for (int j = 0; j < row; j++){
199                         if (m->control_pressed) { return 1; }
200                         
201                         for (int i = 1; i <= (secondGroupingStart-1); i++){ temp[j][0] += data[j][i-1]; }
202                         temp[j][0] /= (double)(secondGroupingStart-1);
203                         
204                         for(int i = secondGroupingStart; i <= column; i++){ temp[j][1] += data[j][i-1]; }
205                         temp[j][1] /= (double)(column-secondGroupingStart+1);
206                 }
207                 
208                 for(int i = 0; i < row; i++){
209                         if (m->control_pressed) { return 1; }
210                         
211                         storage[i][3]=temp[i][0];
212                         storage[i][7]=temp[i][1];
213                         storage[i][8]=pvalues[i];
214                 }
215                 
216                 // BACKUP checks
217                 cout.setf(ios::fixed, ios::floatfield); cout.setf(ios::showpoint);
218                 for (int i = 0; i < row; i++){
219                         
220                         if (m->control_pressed) { return 1; }
221                         
222                         if(pvalues[i] < threshold){
223                                 m->mothurOut("Feature " + toString((i+1)) + " is significant, p = "); 
224                                 cout << pvalues[i];
225                                 m->mothurOutJustToLog(toString(pvalues[i])); m->mothurOutEndLine();
226                         }       
227                 }
228                 
229                 // And now we write the files to a text file.
230                 struct tm *local;
231                 time_t t; t = time(NULL);
232                 local = localtime(&t);
233                 
234                 ofstream out;
235                 m->openOutputFile(outputFileName, out);
236                 out.setf(ios::fixed, ios::floatfield); out.setf(ios::showpoint);
237                                                   
238                 out << "Local time and date of test: " << asctime(local) << endl;
239                 out << "# rows = " << row << ", # col = " << column << ", g = " << secondGroupingStart << endl << endl;
240                 if (bflag == 1){ out << numPermutations << " permutations" << endl << endl;     }
241                 
242                 //output column headings - not really sure... documentation labels 9 columns, there are 10 in the output file
243                 //storage 0 = meanGroup1 - line 529, 1 = varGroup1 - line 532, 2 = err rate1 - line 534, 3 = mean of counts group1?? - line 291, 4 = meanGroup2 - line 536, 5 = varGroup2 - line 539, 6 = err rate2 - line 541, 7 = mean of counts group2?? - line 292, 8 = pvalues - line 293
244                 out << "OTU\tmean(group1)\tvariance(group1)\tstderr(group1)\tmean_of_counts(group1)\tmean(group2)\tvariance(group2)\tstderr(group2)\tmean_of_counts(group1)\tp-value\n";
245                 
246                 for(int i = 0; i < row; i++){
247                         if (m->control_pressed) { out.close(); return 0; }
248                         
249                         out << (i+1);
250                         for(int j = 0; j < 9; j++){ out << '\t' << storage[i][j]; }
251                         out << endl;
252                 }  
253                 
254                 out << endl << endl;
255                 out.close();
256                 
257                 return 0;
258         
259         }catch(exception& e) {
260                 m->errorOut(e, "MothurMetastats", "runMetastats");
261                 exit(1);
262         }       
263 }
264 /***********************************************************/
265 //Find the initial values for the matrix
266 int MothurMetastats::start(vector<double>& Imatrix, int secondGroupingStart, vector<double>& initial, vector< vector<double> >& storage) {
267         try {
268                 
269                 int a = row; a*=4;
270                 
271                 double xbardiff = 0.0; double denom = 0.0;
272                 vector<double> store;   store.resize(a, 0.0);
273                 vector<double> tool;    tool.resize(a, 0.0);
274                 vector< vector<double> > C1; C1.resize(row);
275                 for (int i = 0; i < C1.size(); i++) { C1[i].resize(3, 0.0); }
276                 vector< vector<double> > C2; C2.resize(row);
277                 for (int i = 0; i < C2.size(); i++) { C2[i].resize(3, 0.0); }
278                 
279                 meanvar(Imatrix, secondGroupingStart, store);
280                 
281                 if (m->control_pressed) { return 0; }
282                 
283                 //copy store into tool
284                 tool = store;
285                 
286                 for (int i = 0; i < row; i++){
287                         C1[i][0]=tool[i]; //mean group 1
288                         storage[i][0]=C1[i][0];
289                         C1[i][1]=tool[i+row+row]; // var group 1
290                         storage[i][1]=C1[i][1];
291                         C1[i][2]=C1[i][1]/(secondGroupingStart-1);
292                         storage[i][2]=sqrt(C1[i][2]);
293                         
294                         C2[i][0]=tool[i+row]; // mean group 2
295                         storage[i][4]=C2[i][0];    
296                         C2[i][1]=tool[i+row+row+row]; // var group 2 
297                         storage[i][5]=C2[i][1];        
298                         C2[i][2]=C2[i][1]/(column-secondGroupingStart+1);
299                         storage[i][6]=sqrt(C2[i][2]);   
300                 }
301                 
302                 if (m->control_pressed) { return 0; }
303                 
304                 for (int i = 0; i < row; i++){
305                         xbardiff = C1[i][0]-C2[i][0];
306                         denom = sqrt(C1[i][2]+C2[i][2]);
307                         initial[i]=fabs(xbardiff/denom);
308                 }       
309
310                 return 0; 
311                 
312         }catch(exception& e) {
313                 m->errorOut(e, "MothurMetastats", "start");
314                 exit(1);
315         }       
316 }
317 /***********************************************************/
318 int MothurMetastats::meanvar(vector<double>& pmatrix, int secondGroupingStart, vector<double>& store) {
319         try {
320                 vector<double> temp;    temp.resize(row, 0.0);
321                 vector<double> temp2;   temp2.resize(row, 0.0);
322                 vector<double> var;             var.resize(row, 0.0);
323                 vector<double> var2;    var2.resize(row, 0.0);
324                 
325                 double a = secondGroupingStart-1;
326                 double b = column - a;
327                 int m = a * row;
328                 int n = row * column;
329                 
330                 for (int i = 0; i < m; i++)             { temp[i%row] += pmatrix[i];    }
331                 for (int i = 0; i < n; i++)             { temp2[i%row]+= pmatrix[i];    }
332                 for (int i = 0; i < row; i++)   { temp2[i] -= temp[i];          }
333                 for (int i = 0; i <= row-1;i++) {
334                         store[i] = temp[i]/a;
335                         store[i+row]=temp2[i]/b;
336                 }
337                 
338                 //That completes the mean calculations.
339                 
340                 for (int i = 0; i < m; i++)             { var[i%row] += pow((pmatrix[i]-store[i%row]),2);               }
341                 for (int i = m; i < n; i++)             { var2[i%row]+= pow((pmatrix[i]-store[(i%row)+row]),2); }
342                 for (int i = 0; i <= row-1; i++){
343                         store[i+2*row]=var[i]/(a-1);
344                         store[i+3*row]=var2[i]/(b-1);
345                 }
346                 
347                 // That completes var calculations.
348                 
349                 return 0;
350                 
351         }catch(exception& e) {
352                 m->errorOut(e, "MothurMetastats", "meanvar");
353                 exit(1);
354         }       
355 }
356 /***********************************************************/
357 int MothurMetastats::testp(vector<double>& permuted_ttests, vector<double>& permuted, vector<double>& Imatrix, int secondGroupingStart, vector<double>& Tinitial, vector<double>& ps) {
358         try {
359                 
360                 vector<double> Tvalues;         Tvalues.resize(row, 0.0);
361                 vector<double> counter;         counter.resize(row, 0.0);
362                 int a, b, n;
363                 
364                 a = numPermutations;
365                 b = row;
366                 n = a*b;
367                 
368                 for (int j = 1; j <= row; j++)  {       
369                         if (m->control_pressed) { return 0; }
370                         permute_matrix(Imatrix, permuted, secondGroupingStart, Tvalues, Tinitial, counter);     
371                 }
372                 
373                 for(int j = 0; j < row; j++)    {       
374                         if (m->control_pressed) { return 0; }
375                         ps[j] = ((counter[j]+1)/(double)(a+1)); 
376                 }
377                 
378                 return 0;
379                 
380         }catch(exception& e) {
381                 m->errorOut(e, "MothurMetastats", "testp");
382                 exit(1);
383         }       
384 }       
385 /***********************************************************/
386 int MothurMetastats::permute_matrix(vector<double>& Imatrix, vector<double>& permuted, int secondGroupingStart, vector<double>& trial_ts, vector<double>& Tinitial, vector<double>& counter1){
387         try {
388         
389                 vector<int> y; y.resize(column, 0);
390                 for (int i = 1; i <= column; i++){ y[i-1] = i; }
391                 
392                 permute_array(y); 
393                 
394                 int f = 0; int c = 0; int k = 0;
395                 for (int i = 0; i < column; i++){
396                         
397                         if (m->control_pressed) { return 0; }
398                         
399                         f = y[i]; //column number
400                         c = 1;
401                         c *= (f-1);
402                         c *= row;
403                         if (f == 1){ c = 0; } // starting value position in the Imatrix
404                         
405                         for(int j = 1; j <= row; j++){
406                                 permuted[k] = Imatrix[c];
407                                 c++; k++;
408                         }
409                 }
410                 
411                 calc_twosample_ts(permuted, secondGroupingStart, trial_ts, Tinitial, counter1);
412                 
413                 return 0;
414                 
415         }catch(exception& e) {
416                 m->errorOut(e, "MothurMetastats", "permute_matrix");
417                 exit(1);
418         }       
419 }
420 /***********************************************************/
421 int MothurMetastats::permute_array(vector<int>& array) {
422         try {
423                 static int seeded = 0;
424                 
425                 if (! seeded) {
426                         seeded = 1;
427                         srand(time(NULL));
428                 }
429                 
430                 for (int i = 0; i < array.size(); i++) {
431                         if (m->control_pressed) { return 0; }
432                         
433                         int selection = rand() % (array.size() - i);
434                         int tmp = array[i + selection];
435                         array[i + selection] = array[i];
436                         array[i] = tmp;
437                 }
438                 
439                 return 0;
440                 
441         }catch(exception& e) {
442                 m->errorOut(e, "MothurMetastats", "permute_array");
443                 exit(1);
444         }       
445 }
446 /***********************************************************/
447 int MothurMetastats::calc_twosample_ts(vector<double>& Pmatrix, int secondGroupingStart, vector<double>& Ts, vector<double>& Tinitial, vector<double>& counter) {
448         try {
449                 int a = row * 4;
450                 
451                 vector< vector<double> > C1; C1.resize(row);
452                 for (int i = 0; i < C1.size(); i++) { C1[i].resize(3, 0.0); }
453                 vector< vector<double> > C2; C2.resize(row);
454                 for (int i = 0; i < C2.size(); i++) { C2[i].resize(3, 0.0); }
455                 vector<double> storage; storage.resize(row, 0.0);
456                 vector<double> tool;    tool.resize(row, 0.0);
457                 double xbardiff = 0.0; double denom = 0.0;
458                 
459                 meanvar(Pmatrix, secondGroupingStart, storage);
460                 
461                 for(int i = 0;i <= (a-1); i++) {        
462                         if (m->control_pressed) { return 0; }
463                         tool[i] = storage[i];   
464                 }
465                 
466                 for (int i = 0; i < row; i++){
467                         if (m->control_pressed) { return 0; }
468                         C1[i][0]=tool[i];
469                         C1[i][1]=tool[i+row+row];
470                         C1[i][2]=C1[i][1]/(secondGroupingStart-1);
471                         
472                         C2[i][0]=tool[i+row];
473                         C2[i][1]=tool[i+row+row+row]; // var group 2 
474                         C2[i][2]=C2[i][1]/(column-secondGroupingStart+1);
475                 }
476                 
477                 for (int i = 0; i < row; i++){
478                         if (m->control_pressed) { return 0; }
479                         xbardiff = C1[i][0]-C2[i][0];
480                         denom = sqrt(C1[i][2]+C2[i][2]);
481                         Ts[i]=fabs(xbardiff/denom);
482                         if (fabs(Ts[i])>(fabs(Tinitial[i])+.0000000000001)){ //13th place
483                                 counter[i]++;
484                         }
485                 }
486                 
487                 return 0;
488                 
489         }catch(exception& e) {
490                 m->errorOut(e, "MothurMetastats", "calc_twosample_ts");
491                 exit(1);
492         }
493 }
494 /***********************************************************/
495
496