4 //The following code has been modified using the original Metastats program from White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).
6 int metastat_main (char* outputFileName, int numRows, int numCols, double threshold, int numPermutations, double** data, int secondGroupingStart){
8 int size,c=0,i=0,j=0,k,counter=0, bflag=0;
12 int g = secondGroupingStart;
13 double thresh=threshold;
14 double placeholder=0,min=0;
17 strcpy(output, outputFileName);
21 printf("Check your g value\n");
24 // Initialize the matrices
27 double ** matrix, * pmatrix, * permuted, ** storage;
28 matrix = malloc(row*sizeof(double *));
29 storage = malloc(row*sizeof(double *));
30 for (i = 0; i<row; i++){
31 matrix[i] = malloc(col*sizeof(double));
33 for (i = 0; i<row;i++){
34 storage[i] = malloc(9*sizeof(double));
37 pmatrix = (double *) malloc(size*sizeof(double *));
38 permuted = (double *) malloc(size*sizeof(double *));
44 matrix[i][j]=data[i][j];
45 pmatrix[c]=0; // initializing to zero
51 // Produces the sum of each column
52 double total[col],total1=0,total2=0;
61 total[i]=total[i]+matrix[j][i];
66 total1=total1+total[i];}
69 total2=total2+total[i];}
72 // Creates the ratios by first finding the minimum of totals
75 if (total[0]<total[1]){
85 printf("Error, the sum of one of the columns <= 0.");
92 ratio[i]=total[i]/min;
95 // Change matrix into an array as received by R for compatibility.
100 pmatrix[c]=matrix[j][i];
106 for (i =0; i<col;i++){
107 pmatrix[i]=pmatrix[i]/ratio[i];
113 for (i=0; i<size; i++) {
114 if (counter % row == 0) {
117 pmatrix[i]=pmatrix[i]/ratio[j];
121 // pass everything to the rest of the code using pointers. then
122 // write to output file. below pointers for most of the values are
123 // created to send everything by reference.
125 int ptt_size, *permutes,*nc,*nr,*gvalue;
134 //changing ptt_size to row
135 double * permuted_ttests, * pvalues, * tinitial;
136 permuted_ttests = (double *) malloc(size*sizeof(double *));
137 pvalues = (double *) malloc(size*sizeof(double *));
138 tinitial = (double *) malloc(size*sizeof(double *));
141 permuted_ttests[i]=0;}
147 // Find the initial values for the matrix.
149 start(pmatrix,gvalue,nr,nc,tinitial,storage);
151 // Start the calculations.
153 if ( (col==2) || ((g-1)<8) || ((col-g+1) < 8) ){
155 double * fish, *fish2;
156 fish = (double *) malloc(size*sizeof(double *));
157 fish2 = (double *) malloc(size*sizeof(double *));
166 fish[i]=fish[i]+matrix[i][j];
169 for(j=g-1;j<col;j++){
170 fish2[i]=fish2[i]+matrix[i][j];
173 double f11,f12,f21,f22;
181 double data[] = {f11, f12, f21, f22};
183 // CONTINGENGCY TABLE:
187 int *nr, *nc, *ldtabl, *work;
188 int nrow=2, ncol=2, ldtable=2;
189 int workspace = 6*(row*col*sizeof(double *));
190 double *expect, *prc, *emin,*prt,*pre;
191 double e=0, prc1=0, emin1=0, prt1=0, pre1=0;
193 prt = (double *) malloc(size*sizeof(double *));
194 prc = (double *) malloc(size*sizeof(double *));
207 fexact(nr,nc,data, ldtabl,expect,prc,emin,prt,pre,work);
209 if (*pre>.999999999){
212 storage[i][8] = *pre;
217 printf("here before testp\n");
218 testp(permuted_ttests, permutes, permuted,pmatrix, nc, nr, gvalue,tinitial,pvalues);
220 // Checks to make sure the matrix isn't sparse.
221 double * sparse, * sparse2;
222 sparse = (double *) malloc(size*sizeof(double *));
223 sparse2 = (double *) malloc(size*sizeof(double *));
233 sparse[i]=sparse[i]+matrix[i][j];
236 if(sparse[i] < (double)(g-1)){
239 for(j=g-1;j<col;j++){ // ?<= for col
240 sparse2[i]=sparse2[i]+matrix[i][j];
243 if( (sparse2[i] <(double)(col-g+1))) {
250 double f11,f12,f21,f22;
261 double data[] = {f11, f12, f21, f22};
263 int *nr, *nc, *ldtabl, *work;
264 int nrow=2, ncol=2, ldtable=2, workspace=INT_MAX; // I added two zeros for larger data sets
265 double *expect, *prc, *emin,*prt,*pre;
266 double e=0, prc1=0, emin1=0, prt1=0, pre1=0;
278 printf("here before fexact2\n");
279 fexact(nr,nc,data, ldtabl,expect,prc,emin,prt,pre,work);
281 if (*pre>.999999999){
284 storage[i][8] = *pre;
288 // End of else statement
292 // Calculates the mean of counts (not normalized)
294 temp = malloc(row*sizeof(double *));
295 for (i = 0; i<row; i++){
296 temp[i] = malloc(col*sizeof(double));
306 for (i=1; i<=(g-1); i++){
307 temp[j][0]=temp[j][0]+matrix[j][i-1];
309 temp[j][0]= (double) temp[j][0]/(g-1);
311 temp[j][1]=temp[j][1]+matrix[j][i-1];
313 temp[j][1]= (double) temp[j][1]/(col-g+1);
317 storage[i][3]=temp[i][0];
318 storage[i][7]=temp[i][1];
319 storage[i][8]=pvalues[i];
325 if(pvalues[i]<thresh){
326 printf("Feature %d is significant, p = %.10lf \n",i+1,pvalues[i]);
330 // And now we write the files to a text file.
334 local = localtime(&t);
336 out = fopen(output,"w");
338 fprintf(out,"Local time and date of test: %s\n", asctime(local));
339 fprintf(out,"# rows = %d, # col = %d, g = %d\n\n",row,col,g);
341 fprintf(out,"%d permutations\n\n",B);
344 //output column headings - not really sure... documentation labels 9 columns, there are 10 in the output file
345 //storage 0 = meanGroup1 - line 529, 1 = varGroup1 - line 532, 2 = err rate1 - line 534, 3 = mean of counts group1?? - line 291, 4 = meanGroup2 - line 536, 5 = varGroup2 - line 539, 6 = err rate2 - line 541, 7 = mean of counts group2?? - line 292, 8 = pvalues - line 293
346 fprintf(out, "OTU\tmean(group1)\tvariance(group1)\tstderr(group1)\tmean_of_counts(group1)\tmean(group2)\tvariance(group2)\tstderr(group2)\tmean_of_counts(group1)\tp-value\n");
348 for(i=0; i<row; i++){
349 fprintf(out,"%d",(i+1));
352 fprintf(out,"\t%.12lf",storage[i][j]);
357 fprintf(out,"\n \n");
365 void testp(double *permuted_ttests,int *B,double *permuted,
366 double *Imatrix,int *nc,int *nr,int *g,double *Tinitial,double
370 int a, b, n, i, j,k=0;
382 for (j=1; j<=*B; j++){
383 permute_matrix(Imatrix,nc,nr,permuted,g,Tvalues,Tinitial,counter);
384 // for(i=0;i<*nr;i++){
385 // permuted_ttests[k]=fabs(Tvalues[i]);
391 ps[j]=((counter[j]+1)/(double)(a+1));
395 void permute_matrix(double *Imatrix,int *nc,int *nr,double *permuted,
396 int *g,double *trial_ts,double *Tinitial,double *counter1){
398 int i=0,j=0,n=0,a=0,b=0,f=0,c=0,k=0;
400 a = *nr; // number of rows
406 for (i=1; i<=*nc; i++){
412 for (i=0; i<*nc; i++){
413 f = y[i]; //column number
419 } // starting value position in the Imatrix
420 for(j=1; j<=*nr; j++){
421 permuted[k] = Imatrix[c];
427 calc_twosample_ts(permuted,g,nc,nr,trial_ts,Tinitial,counter1);
430 void permute_array(int *array, int n) {
431 static int seeded = 0;
439 for (i = 0; i < n; i++) {
440 int selection = rand() % (n - i);
441 int tmp = array[i + selection];
442 array[i + selection] = array[i];
447 void calc_twosample_ts(double *Pmatrix,int *g,int *nc,int *nr,
448 double *Ts,double *Tinitial,double *counter) {
453 double C1[*nr][3], C2[*nr][3], storage[a],tool[a];
454 double nrows,ncols,gvalue, xbardiff=0, denom=0;
456 nrows = (double) *nr;
457 ncols = (double) *nc;
460 meanvar(Pmatrix,g,nr,nc,storage);
464 for (i=0; i<*nr;i++){
466 C1[i][1]=tool[i+*nr+*nr];
467 C1[i][2]=C1[i][1]/(gvalue-1);
469 C2[i][0]=tool[i+*nr];
470 C2[i][1]=tool[i+*nr+*nr+*nr]; // var group 2
471 C2[i][2]=C2[i][1]/(ncols-gvalue+1);
474 for (i=0; i<*nr; i++){
475 xbardiff = C1[i][0]-C2[i][0];
476 denom = sqrt(C1[i][2]+C2[i][2]);
477 Ts[i]=fabs(xbardiff/denom);
478 if (fabs(Ts[i])>(fabs(Tinitial[i])+.0000000000001)){ //13th place
484 void meanvar(double *pmatrix,int *g,int *nr,int *nc,double *store){
485 double temp[*nr], temp2[*nr],var[*nr],var2[*nr],a,b;
490 b = (double) (*nc-a);
492 for (i = 0; i<*nr; i++){
499 k = *nr; // number of rows
506 m*=k; // m = g * nr now
508 temp[i%k]=temp[i%k]+pmatrix[i];
511 temp2[i%k]=temp2[i%k]+pmatrix[i];
514 temp2[i]=temp2[i]-temp[i];
516 for (i=0;i<=*nr-1;i++){
518 store[i+*nr]=temp2[i]/b;
521 // That completes the mean calculations.
524 var[i%k]=var[i%k]+pow((pmatrix[i]-store[i%k]),2);
527 var2[i%k]=var2[i%k]+pow((pmatrix[i]-store[(i%k)+*nr]),2);
530 for (i=0;i<=*nr-1;i++){
531 store[i+2*k]=var[i]/(a-1);
532 store[i+3*k]=var2[i]/(b-1);
534 // That completes var calculations.
537 void start(double *Imatrix,int *g,int *nr,int *nc,double *initial,
542 double store[a], tool[a], C1[*nr][3], C2[*nr][3];
543 double nrows,ncols,gvalue, xbardiff=0, denom=0;
545 nrows = (double) *nr;
546 ncols = (double) *nc;
549 meanvar(Imatrix,g,nr,nc,store);
555 for (i=0; i<*nr;i++){
556 C1[i][0]=tool[i]; //mean group 1
557 storage[i][0]=C1[i][0];
558 C1[i][1]=tool[i+*nr+*nr]; // var group 1
559 storage[i][1]=C1[i][1];
560 C1[i][2]=C1[i][1]/(gvalue-1);
561 storage[i][2]=sqrt(C1[i][2]);
563 C2[i][0]=tool[i+*nr]; // mean group 2
564 storage[i][4]=C2[i][0];
565 C2[i][1]=tool[i+*nr+*nr+*nr]; // var group 2
566 storage[i][5]=C2[i][1];
567 C2[i][2]=C2[i][1]/(ncols-gvalue+1);
568 storage[i][6]=sqrt(C2[i][2]);
572 for (i=0; i<*nr; i++){
573 xbardiff = C1[i][0]-C2[i][0];
574 denom = sqrt(C1[i][2]+C2[i][2]);
575 initial[i]=fabs(xbardiff/denom);