]> git.donarmstrong.com Git - mothur.git/blob - metastats2.c
removed various build warnings
[mothur.git] / metastats2.c
1
2 #include "metastats.h"
3
4 //The following code has been modified using the original Metastats program from White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).
5
6 int metastat_main (char* outputFileName, int numRows, int numCols, double threshold, int numPermutations, double** data, int secondGroupingStart){
7         
8         int size,c=0,i=0,j=0,counter=0, bflag=0; 
9         int B=numPermutations;
10         int row = numRows;
11         int col = numCols;
12         int g = secondGroupingStart;
13         double thresh=threshold;
14         double min=0; 
15         
16         char output[1024];
17         strcpy(output, outputFileName);
18         FILE *out;
19         
20         if (g>=col || g<=0){
21                 printf("Check your g value\n"); 
22         }
23         
24         // Initialize the matrices
25         size = row*col;
26         double matrix[row][col];
27         double pmatrix[size],permuted[size];  
28         double storage[row][9];
29         
30         for (i=0;i<row;i++){
31                 for (j =0;j<9;j++){
32                         storage[i][j]=0;                
33                 }       
34         }
35         
36         for(i=0; i<row; i++){
37                 for(j=0; j<col;j++){
38                         matrix[i][j]=data[i][j];
39                         pmatrix[c]=0; // initializing to zero
40                         permuted[c]=0;
41                         c++;
42                 }
43         }
44         
45         // Produces the sum of each column
46         double total[col],total1=0,total2=0;
47         double ratio[col];
48         
49         for(i=0;i<col;i++){
50                 total[i]=0;
51                 ratio[i]=0; }
52         
53         for(i=0; i<col; i++){
54                 for(j=0;j<row;j++){
55                         total[i]=total[i]+matrix[j][i];                 
56                 }
57         }
58         
59         for(i=0;i<g-1;i++){
60                 total1=total1+total[i];}
61         
62         for(i=g-1;i<col;i++){
63                 total2=total2+total[i];}
64         
65         
66         // Creates the ratios by first finding the minimum of totals
67         min = total[0];
68         if (col==2){
69                 if (total[0]<total[1]){
70                         min = total[1];}        
71         }
72         if (col >2){
73                 for(i=1;i<col;i++){
74                         if (min > total[i]){
75                                 min = total[i];}
76                 }
77         }
78         if (min<=0){
79                 printf("Error, the sum of one of the columns <= 0.");
80                 return 0;       
81         }
82         
83         
84         // Ratio time...
85         for(i=0;i<col;i++){
86                 ratio[i]=total[i]/min;
87         }
88         
89         // Change matrix into an array as received by R for compatibility.
90         
91         c=0;
92         for(i=0;i<col;i++){
93                 for(j=0;j<row;j++){
94                         pmatrix[c]=matrix[j][i];
95                         c++;
96                 }
97         }
98         
99         if(row == 1){
100                 for (i =0; i<col;i++){
101                         pmatrix[i]=pmatrix[i]/ratio[i];
102                 }
103         }
104         else {
105                 counter = 0;
106                 j=-1;
107                 for (i=0; i<size; i++) {
108                         if (counter % row == 0) {
109                                 j++;
110                         }
111                         pmatrix[i]=pmatrix[i]/ratio[j];
112                         counter++; 
113                 }   
114         }
115         // pass everything to the rest of the code using pointers. then 
116         // write to output file. below pointers for most of the values are 
117         // created to send everything by reference.
118         
119         int ptt_size, *permutes,*nc,*nr,*gvalue;
120         
121         nc = &col;
122         nr = &row;
123         gvalue = &g;
124         
125         permutes = &B;
126         ptt_size = B*row;
127         
128         //changing ptt_size to row
129         double permuted_ttests[row], pvalues[row], tinitial[row];
130         
131         for(i=0;i<row;i++){
132                 permuted_ttests[i]=0;}
133         
134         for(i=0;i<row;i++){
135                 pvalues[i]=0;
136                 tinitial[i]=0; }
137         
138         // Find the initial values for the matrix.
139         start(pmatrix,gvalue,nr,nc,tinitial,storage);
140         
141         // Start the calculations.
142         
143         if ( (col==2) || ((g-1)<8) || ((col-g+1) < 8) ){  
144                 
145                 double fish[row], fish2[row];
146                 for(i=0;i<row;i++){
147                         fish[i]=0;
148                         fish2[i]=0;}
149                 
150                 for(i=0;i<row;i++){
151                         
152                         for(j=0;j<g-1;j++){
153                                 fish[i]=fish[i]+matrix[i][j];
154                         }
155                         
156                         for(j=g-1;j<col;j++){ 
157                                 fish2[i]=fish2[i]+matrix[i][j];
158                         }
159                         
160                         double  f11,f12,f21,f22;
161                         
162                         f11=fish[i];
163                         f12=fish2[i];
164                         
165                         f21=total1-f11;
166                         f22=total2-f12;
167                         
168                         double data[] = {f11, f12, f21, f22};
169                         
170                         // CONTINGENGCY TABLE:
171                         //   f11   f12
172                         //   f21   f22
173                         
174                         int *nr, *nc, *ldtabl, *work;
175                         int nrow=2, ncol=2, ldtable=2, workspace=100000;
176                         double *expect, *prc, *emin,*prt,*pre;
177                         double e=0, prc1=0, emin1=0, prt1=0, pre1=0;
178                         
179                         nr = &nrow;
180                         nc = &ncol;
181                         ldtabl=&ldtable;
182                         work = &workspace;
183                         
184                         expect = &e;
185                         prc = &prc1;
186                         emin=&emin1;
187                         prt=&prt1;
188                         pre=&pre1;
189                         
190                         fexact(nr,nc,data, ldtabl,expect,prc,emin,prt,pre,work);
191                         
192                         if (*pre>.999999999){
193                                 *pre=1;
194                         }
195                         storage[i][8] = *pre;
196                         pvalues[i]=*pre;
197                 }
198         }
199         else{
200                 
201                 testp(permuted_ttests, permutes, permuted,pmatrix, nc, nr, gvalue,tinitial,pvalues);
202                 
203                 // Checks to make sure the matrix isn't sparse.
204                 double sparse[row], sparse2[row];
205                 for(i=0;i<row;i++){
206                         sparse[i]=0;
207                         sparse2[i]=0;}
208                 
209                 c=0;    
210                 for(i=0;i<row;i++){
211                         
212                         for(j=0;j<g-1;j++){
213                                 sparse[i]=sparse[i]+matrix[i][j];
214                         }
215                         
216                         if(sparse[i] < (double)(g-1)){
217                                 c++;
218                         }
219                         for(j=g-1;j<col;j++){ // ?<= for col
220                                 sparse2[i]=sparse2[i]+matrix[i][j];
221                         }
222                         
223                         if( (sparse2[i] <(double)(col-g+1))) {
224                                 c++;
225                         }
226                         
227                         if (c==2){
228                                 c=0;
229                                 
230                                 double  f11,f12,f21,f22;
231                                 
232                                 f11=sparse[i];
233                                 sparse[i]=0;
234                                 
235                                 f12=sparse2[i];
236                                 sparse2[i]=0;
237                                 
238                                 f21=total1-f11;
239                                 f22=total2-f12;
240                                 
241                                 double data[] = {f11, f12, f21, f22};
242                                 
243                                 int *nr, *nc, *ldtabl, *work;
244                                 int nrow=2, ncol=2, ldtable=2, workspace=10000000; // I added two zeros for larger data sets
245                                 double *expect, *prc, *emin,*prt,*pre;
246                                 double e=0, prc1=0, emin1=0, prt1=0, pre1=0;
247                                 
248                                 nr = &nrow;
249                                 nc = &ncol;
250                                 ldtabl=&ldtable;
251                                 work = &workspace;
252                                 
253                                 expect = &e;
254                                 prc = &prc1;
255                                 emin=&emin1;
256                                 prt=&prt1;
257                                 pre=&pre1;
258                                 
259                                 fexact(nr,nc,data, ldtabl,expect,prc,emin,prt,pre,work);
260                                 
261                                 if (*pre>.999999999){
262                                         *pre=1;
263                                 }
264                                 storage[i][8] = *pre;
265                                 pvalues[i]=*pre;
266                         }
267                 }  
268                 // End of else statement
269                 bflag = 1;
270         }
271         
272         // Calculates the mean of counts (not normalized)
273         double temp[row][2];
274         
275         for(j=0;j<row;j++){
276                 for(i=0;i<2;i++){
277                         temp[j][i]=0;
278                 }
279         }
280         
281         for (j=0;j<row;j++){
282                 for (i=1; i<=(g-1); i++){
283                         temp[j][0]=temp[j][0]+matrix[j][i-1];
284                 }
285                 temp[j][0]= (double) temp[j][0]/(g-1);
286                 for(i=g;i<=col;i++){
287                         temp[j][1]=temp[j][1]+matrix[j][i-1];
288                 }
289                 temp[j][1]= (double) temp[j][1]/(col-g+1);
290         }
291         
292         for(i=0;i<row;i++){
293                 storage[i][3]=temp[i][0];
294                 storage[i][7]=temp[i][1];
295                 storage[i][8]=pvalues[i];
296         }
297         
298         // BACKUP checks
299         
300         for (i=0;i<row;i++){
301                 if(pvalues[i]<thresh){
302                         printf("Feature %d is significant, p = %.10lf \n",i+1,pvalues[i]);
303                 }       
304         }
305         
306         // And now we write the files to a text file.
307         struct tm *local;
308         time_t t;
309         t = time(NULL);
310         local = localtime(&t);
311         
312         out = fopen(output,"w");
313         
314         fprintf(out,"Local time and date of test: %s\n", asctime(local));
315         fprintf(out,"# rows = %d, # col = %d, g = %d\n\n",row,col,g);
316         if (bflag == 1){
317                 fprintf(out,"%d permutations\n\n",B);   
318         }
319         
320         //output column headings - not really sure... documentation labels 9 columns, there are 10 in the output file
321         //storage 0 = meanGroup1 - line 529, 1 = varGroup1 - line 532, 2 = err rate1 - line 534, 3 = mean of counts group1?? - line 291, 4 = meanGroup2 - line 536, 5 = varGroup2 - line 539, 6 = err rate2 - line 541, 7 = mean of counts group2?? - line 292, 8 = pvalues - line 293
322         fprintf(out, "OTU\tmean(group1)\tvariance(group1)\tstderr(group1)\tmean_of_counts(group1)\tmean(group2)\tvariance(group2)\tstderr(group2)\tmean_of_counts(group1)\tp-value\n");
323     
324         for(i=0; i<row; i++){
325                 fprintf(out,"%d",(i+1));
326                 
327                 for(j=0; j<9;j++){
328                         fprintf(out,"\t%.12lf",storage[i][j]);
329                 }
330                 fprintf(out,"\n");
331         }  
332         
333         fprintf(out,"\n \n");
334         
335         // fclose(jobj);
336         fclose(out);
337         
338         return 0;
339 }
340
341 void testp(double *permuted_ttests,int *B,double *permuted,
342                    double *Imatrix,int *nc,int *nr,int *g,double *Tinitial,double 
343                    *ps) {
344         
345         double Tvalues[*nr];
346         int a, b, n, j;
347         
348         a = *B;
349         b = *nr;
350         n = a*b;
351         
352         double counter[b];
353         
354         for(j=0;j<b;j++){
355                 counter[j]=0;
356         }    
357         
358         for (j=1; j<=*B; j++){
359                 permute_matrix(Imatrix,nc,nr,permuted,g,Tvalues,Tinitial,counter);
360                 // for(i=0;i<*nr;i++){
361                 //   permuted_ttests[k]=fabs(Tvalues[i]);
362                 //    k++;
363     }
364         
365         
366         for(j=0;j<*nr;j++){
367                 ps[j]=((counter[j]+1)/(double)(a+1));
368         }
369 }       
370
371 void permute_matrix(double *Imatrix,int *nc,int *nr,double *permuted,
372                                         int *g,double *trial_ts,double *Tinitial,double *counter1){
373         
374         int i=0,j=0,n=0,a=0,b=0,f=0,c=0,k=0;
375         
376         a = *nr; // number of rows
377         b = *nc;
378         n = a*b;
379         
380         int y[b];
381         
382         for (i=1; i<=*nc; i++){
383                 y[i-1] = i;
384         }
385         
386         permute_array(y, b); 
387         
388         for (i=0; i<*nc; i++){
389                 f = y[i]; //column number
390                 c=1;
391                 c*=(f-1);
392                 c*=a;
393                 if (f == 1){
394                         c = 0;
395                 } // starting value position in the Imatrix
396                 for(j=1; j<=*nr; j++){
397                         permuted[k] = Imatrix[c];
398                         c++;
399                         k++;
400                 }
401         }
402         
403         calc_twosample_ts(permuted,g,nc,nr,trial_ts,Tinitial,counter1);
404 }
405
406 void permute_array(int *array, int n) {
407         static int seeded = 0;
408         int i;
409         
410         if (! seeded) {
411                 seeded = 1;
412                 srand(time(NULL));
413         }
414         
415         for (i = 0; i < n; i++) {
416                 int selection = rand() % (n - i);
417                 int tmp = array[i + selection];
418                 array[i + selection] = array[i];
419                 array[i] = tmp;
420         }
421 }
422
423 void calc_twosample_ts(double *Pmatrix,int *g,int *nc,int *nr,
424                                            double *Ts,double *Tinitial,double *counter) {
425         int i,a;
426         a = *nr;
427         a*=4;
428         
429         double C1[*nr][3], C2[*nr][3], storage[a],tool[a];
430         double nrows,ncols,gvalue, xbardiff=0, denom=0;
431         
432         nrows = (double) *nr;
433         ncols = (double) *nc;
434         gvalue= (double) *g;
435         
436     meanvar(Pmatrix,g,nr,nc,storage);
437     for(i=0;i<=a-1;i++){
438                 tool[i]=storage[i];
439     }
440     for (i=0; i<*nr;i++){
441                 C1[i][0]=tool[i];
442                 C1[i][1]=tool[i+*nr+*nr];
443                 C1[i][2]=C1[i][1]/(gvalue-1);
444                 
445                 C2[i][0]=tool[i+*nr];
446                 C2[i][1]=tool[i+*nr+*nr+*nr]; // var group 2 
447                 C2[i][2]=C2[i][1]/(ncols-gvalue+1);
448     }
449     
450     for (i=0; i<*nr; i++){
451                 xbardiff = C1[i][0]-C2[i][0];
452                 denom = sqrt(C1[i][2]+C2[i][2]);
453                 Ts[i]=fabs(xbardiff/denom);
454                 if (fabs(Ts[i])>(fabs(Tinitial[i])+.0000000000001)){ //13th place
455                         counter[i]++;
456                 }
457     }   
458 }
459
460 void meanvar(double *pmatrix,int *g,int *nr,int *nc,double *store){
461         double temp[*nr], temp2[*nr],var[*nr],var2[*nr],a,b;
462         
463         int i,m,k,l,n;
464         
465         a = (double) *g-1;          
466         b = (double) (*nc-a);
467         
468         for (i = 0; i<*nr; i++){
469                 temp[i]=0;
470                 temp2[i]=0;
471                 var[i]=0;
472                 var2[i]=0;
473         }
474         
475         k = *nr; // number of rows 
476         l = *nc;
477         n = k*l;        
478         
479     m=0;
480     m=*g-1;
481     k=*nr;
482     m*=k; // m = g * nr now
483     for (i=0;i<m;i++){
484                 temp[i%k]=temp[i%k]+pmatrix[i];
485     }
486     for (i=0;i<n;i++){
487                 temp2[i%k]=temp2[i%k]+pmatrix[i];
488     }
489     for (i=0;i<*nr;i++){
490                 temp2[i]=temp2[i]-temp[i];
491     }
492     for (i=0;i<=*nr-1;i++){
493                 store[i]=temp[i]/a;
494                 store[i+*nr]=temp2[i]/b;
495     }
496     
497     // That completes the mean calculations.
498     
499     for (i=0;i<m;i++){
500                 var[i%k]=var[i%k]+pow((pmatrix[i]-store[i%k]),2);
501     }
502     for (i=m;i<n;i++){
503                 var2[i%k]=var2[i%k]+pow((pmatrix[i]-store[(i%k)+*nr]),2);
504     }
505     
506     for (i=0;i<=*nr-1;i++){
507                 store[i+2*k]=var[i]/(a-1);
508                 store[i+3*k]=var2[i]/(b-1);
509     }
510     // That completes var calculations.
511 }
512
513 void start(double *Imatrix,int *g,int *nr,int *nc,double *initial,
514                    double storage[][9]){
515         int i, a = *nr;
516         a*=4;
517         
518         double store[a], tool[a], C1[*nr][3], C2[*nr][3];
519         double nrows,ncols,gvalue, xbardiff=0, denom=0;
520         
521         nrows = (double) *nr;
522         ncols = (double) *nc;
523         gvalue= (double) *g;
524         
525         meanvar(Imatrix,g,nr,nc,store);
526         
527         for(i=0;i<=a-1;i++){
528                 tool[i]=store[i];
529         }
530         for (i=0; i<*nr;i++){
531                 C1[i][0]=tool[i]; //mean group 1
532                 storage[i][0]=C1[i][0];
533                 C1[i][1]=tool[i+*nr+*nr]; // var group 1
534                 storage[i][1]=C1[i][1];
535                 C1[i][2]=C1[i][1]/(gvalue-1);
536                 storage[i][2]=sqrt(C1[i][2]);
537                 
538                 C2[i][0]=tool[i+*nr]; // mean group 2
539                 storage[i][4]=C2[i][0];    
540                 C2[i][1]=tool[i+*nr+*nr+*nr]; // var group 2 
541                 storage[i][5]=C2[i][1];        
542                 C2[i][2]=C2[i][1]/(ncols-gvalue+1);
543                 storage[i][6]=sqrt(C2[i][2]);   
544         }
545         for (i=0; i<*nr; i++){
546                 xbardiff = C1[i][0]-C2[i][0];
547                 denom = sqrt(C1[i][2]+C2[i][2]);
548                 initial[i]=fabs(xbardiff/denom);
549         }                                                                                       
550 }
551
552
553
554