]> git.donarmstrong.com Git - mothur.git/blobdiff - metastats2.c
added pcr.seqs command. fixed bug in rarefacftion.single that caused parsing error...
[mothur.git] / metastats2.c
diff --git a/metastats2.c b/metastats2.c
deleted file mode 100644 (file)
index 70edc3d..0000000
+++ /dev/null
@@ -1,559 +0,0 @@
-
-#include "metastats.h"
-
-//The following code has been modified using the original Metastats program from White, J.R., Nagarajan, N. & Pop, M. Statistical methods for detecting differentially abundant features in clinical metagenomic samples. PLoS Comput Biol 5, e1000352 (2009).
-
-int metastat_main (char* outputFileName, int numRows, int numCols, double threshold, int numPermutations, double** data, int secondGroupingStart){
-       
-       int size,c=0,i=0,j=0,counter=0, bflag=0; 
-       int B=numPermutations;
-       int row = numRows;
-       int col = numCols;
-       int g = secondGroupingStart;
-       double thresh=threshold;
-       double min=0; 
-       
-       char output[1024];
-       strcpy(output, outputFileName);
-       FILE *out;
-       
-       if (g>=col || g<=0){
-               printf("Check your g value\n"); 
-       }
-       
-       // Initialize the matrices
-       size = row*col;
-       double matrix[row][col];
-       double pmatrix[size],permuted[size];  
-       double storage[row][9];
-       
-       for (i=0;i<row;i++){
-               for (j =0;j<9;j++){
-                       storage[i][j]=0;                
-               }       
-       }
-       
-       for(i=0; i<row; i++){
-               for(j=0; j<col;j++){
-                       matrix[i][j]=data[i][j];
-                       pmatrix[c]=0; // initializing to zero
-                       permuted[c]=0;
-                       c++;
-               }
-       }
-       
-       // Produces the sum of each column
-       double total[col],total1=0,total2=0;
-       double ratio[col];
-       
-       for(i=0;i<col;i++){
-               total[i]=0;
-               ratio[i]=0; }
-       
-       for(i=0; i<col; i++){
-               for(j=0;j<row;j++){
-                       total[i]=total[i]+matrix[j][i];                 
-               }
-       }
-       
-       for(i=0;i<g-1;i++){
-               total1=total1+total[i];}
-       
-       for(i=g-1;i<col;i++){
-               total2=total2+total[i];}
-       
-       
-       // Creates the ratios by first finding the minimum of totals
-       min = total[0];
-       if (col==2){
-               if (total[0]<total[1]){
-                       min = total[1];}        
-       }
-       if (col >2){
-               for(i=1;i<col;i++){
-                       if (min > total[i]){
-                               min = total[i];}
-               }
-       }
-       if (min<=0){
-               printf("Error, the sum of one of the columns <= 0.");
-               return 0;       
-       }
-       
-       
-       // Ratio time...
-       for(i=0;i<col;i++){
-               ratio[i]=total[i]/min;
-       }
-       
-       //Change matrix into an array as received by R for compatibility.
-       
-       c=0;
-       for(i=0;i<col;i++){
-               for(j=0;j<row;j++){
-                       pmatrix[c]=matrix[j][i];
-                       c++;
-               }
-       }
-       
-       if(row == 1){
-               for (i =0; i<col;i++){
-                       pmatrix[i]=pmatrix[i]/ratio[i];
-               }
-       }
-       else {
-               counter = 0;
-               j=-1;
-               for (i=0; i<size; i++) {
-                       if (counter % row == 0) {
-                               j++;
-                       }
-                       pmatrix[i]=pmatrix[i]/ratio[j];
-                       counter++; 
-               }   
-       }
-       // pass everything to the rest of the code using pointers. then 
-       // write to output file. below pointers for most of the values are 
-       // created to send everything by reference.
-       
-       int ptt_size, *permutes,*nc,*nr,*gvalue;
-       
-       nc = &col;
-       nr = &row;
-       gvalue = &g;
-       
-       permutes = &B;
-       ptt_size = B*row;
-       
-       //changing ptt_size to row
-       double permuted_ttests[row], pvalues[row], tinitial[row];
-       
-       for(i=0;i<row;i++){
-               permuted_ttests[i]=0;}
-       
-       for(i=0;i<row;i++){
-               pvalues[i]=0;
-               tinitial[i]=0; }
-       
-       // Find the initial values for the matrix.
-       start(pmatrix,gvalue,nr,nc,tinitial,storage);
-       
-       // Start the calculations.
-       
-       if ( (col==2) || ((g-1)<8) || ((col-g+1) < 8) ){  
-               
-               double fish[row], fish2[row];
-               for(i=0;i<row;i++){
-                       fish[i]=0;
-                       fish2[i]=0;}
-               
-               for(i=0;i<row;i++){
-                       
-                       for(j=0;j<g-1;j++){
-                               fish[i]=fish[i]+matrix[i][j];
-                       }
-                       
-                       for(j=g-1;j<col;j++){ 
-                               fish2[i]=fish2[i]+matrix[i][j];
-                       }
-                       
-                       double  f11,f12,f21,f22;
-                       
-                       f11=fish[i];
-                       f12=fish2[i];
-                       
-                       f21=total1-f11;
-                       f22=total2-f12;
-                       
-                       double data[] = {f11, f12, f21, f22};
-                       
-                       // CONTINGENGCY TABLE:
-                       //   f11   f12
-                       //   f21   f22
-                       
-                       int *nr, *nc, *ldtabl, *work;
-                       int nrow=2, ncol=2, ldtable=2, workspace=100000;
-                       double *expect, *prc, *emin,*prt,*pre;
-                       double e=0, prc1=0, emin1=0, prt1=0, pre1=0;
-                       
-                       nr = &nrow;
-                       nc = &ncol;
-                       ldtabl=&ldtable;
-                       work = &workspace;
-                       
-                       expect = &e;
-                       prc = &prc1;
-                       emin=&emin1;
-                       prt=&prt1;
-                       pre=&pre1;
-                       
-                       //MothurFisher fishtere;
-                       //double mothurFex = fishtere.fexact(f11, f12, f21, f22);
-                       
-                       fexact(nr,nc,data, ldtabl,expect,prc,emin,prt,pre,work);
-                       
-                       if (*pre>.999999999){
-                               *pre=1;
-                       }
-                       
-                       //printf("feaxt = %f\t%f\t%f\t%f\t%f\t%f\n", *expect, *pre, f11, f12, f21, f22);
-                       storage[i][8] = *pre;
-                       pvalues[i]=*pre;
-               }
-       }
-       else{
-               
-               testp(permuted_ttests, permutes, permuted,pmatrix, nc, nr, gvalue,tinitial,pvalues);
-               
-               // Checks to make sure the matrix isn't sparse.
-               double sparse[row], sparse2[row];
-               for(i=0;i<row;i++){
-                       sparse[i]=0;
-                       sparse2[i]=0;}
-               
-               c=0;    
-               for(i=0;i<row;i++){
-                       
-                       for(j=0;j<g-1;j++){
-                               sparse[i]=sparse[i]+matrix[i][j];
-                       }
-                       
-                       if(sparse[i] < (double)(g-1)){
-                               c++;
-                       }
-                       for(j=g-1;j<col;j++){ // ?<= for col
-                               sparse2[i]=sparse2[i]+matrix[i][j];
-                       }
-                       
-                       if( (sparse2[i] <(double)(col-g+1))) {
-                               c++;
-                       }
-                       
-                       if (c==2){
-                               c=0;
-                               
-                               double  f11,f12,f21,f22;
-                               
-                               f11=sparse[i];
-                               sparse[i]=0;
-                               
-                               f12=sparse2[i];
-                               sparse2[i]=0;
-                               
-                               f21=total1-f11;
-                               f22=total2-f12;
-                               
-                               double data[] = {f11, f12, f21, f22};
-                               
-                               int *nr, *nc, *ldtabl, *work;
-                               int nrow=2, ncol=2, ldtable=2, workspace=10000000; // I added two zeros for larger data sets
-                               double *expect, *prc, *emin,*prt,*pre;
-                               double e=0, prc1=0, emin1=0, prt1=0, pre1=0;
-                               
-                               nr = &nrow;
-                               nc = &ncol;
-                               ldtabl=&ldtable;
-                               work = &workspace;
-                               
-                               expect = &e;
-                               prc = &prc1;
-                               emin=&emin1;
-                               prt=&prt1;
-                               pre=&pre1;
-                               
-                               fexact(nr,nc,data, ldtabl,expect,prc,emin,prt,pre,work);
-                               
-                               if (*pre>.999999999){
-                                       *pre=1;
-                               }
-                               storage[i][8] = *pre;
-                               pvalues[i]=*pre;
-                       }
-               }  
-               // End of else statement
-               bflag = 1;
-       }
-       
-       // Calculates the mean of counts (not normalized)
-       double temp[row][2];
-       
-       for(j=0;j<row;j++){
-               for(i=0;i<2;i++){
-                       temp[j][i]=0;
-               }
-       }
-       
-       for (j=0;j<row;j++){
-               for (i=1; i<=(g-1); i++){
-                       temp[j][0]=temp[j][0]+matrix[j][i-1];
-               }
-               temp[j][0]= (double) temp[j][0]/(g-1);
-               for(i=g;i<=col;i++){
-                       temp[j][1]=temp[j][1]+matrix[j][i-1];
-               }
-               temp[j][1]= (double) temp[j][1]/(col-g+1);
-       }
-       
-       for(i=0;i<row;i++){
-               storage[i][3]=temp[i][0];
-               storage[i][7]=temp[i][1];
-               storage[i][8]=pvalues[i];
-       }
-       
-       // BACKUP checks
-       
-       for (i=0;i<row;i++){
-               if(pvalues[i]<thresh){
-                       printf("Feature %d is significant, p = %.10lf \n",i+1,pvalues[i]);
-               }       
-       }
-       
-       // And now we write the files to a text file.
-       struct tm *local;
-       time_t t;
-       t = time(NULL);
-       local = localtime(&t);
-       
-       out = fopen(output,"w");
-       
-       fprintf(out,"Local time and date of test: %s\n", asctime(local));
-       fprintf(out,"# rows = %d, # col = %d, g = %d\n\n",row,col,g);
-       if (bflag == 1){
-               fprintf(out,"%d permutations\n\n",B);   
-       }
-       
-       //output column headings - not really sure... documentation labels 9 columns, there are 10 in the output file
-       //storage 0 = meanGroup1 - line 529, 1 = varGroup1 - line 532, 2 = err rate1 - line 534, 3 = mean of counts group1?? - line 291, 4 = meanGroup2 - line 536, 5 = varGroup2 - line 539, 6 = err rate2 - line 541, 7 = mean of counts group2?? - line 292, 8 = pvalues - line 293
-       fprintf(out, "OTU\tmean(group1)\tvariance(group1)\tstderr(group1)\tmean_of_counts(group1)\tmean(group2)\tvariance(group2)\tstderr(group2)\tmean_of_counts(group1)\tp-value\n");
-    
-       for(i=0; i<row; i++){
-               fprintf(out,"%d",(i+1));
-               
-               for(j=0; j<9;j++){
-                       fprintf(out,"\t%.12lf",storage[i][j]);
-               }
-               fprintf(out,"\n");
-       }  
-       
-       fprintf(out,"\n \n");
-       
-       // fclose(jobj);
-       fclose(out);
-       
-       return 0;
-}
-
-void testp(double *permuted_ttests,int *B,double *permuted,
-                  double *Imatrix,int *nc,int *nr,int *g,double *Tinitial,double 
-                  *ps) {
-       
-       double Tvalues[*nr];
-       int a, b, n, j;
-       
-       a = *B;
-       b = *nr;
-       n = a*b;
-       
-       double counter[b];
-       
-       for(j=0;j<b;j++){
-               counter[j]=0;
-       }    
-       
-       for (j=1; j<=*B; j++){
-               permute_matrix(Imatrix,nc,nr,permuted,g,Tvalues,Tinitial,counter);
-               // for(i=0;i<*nr;i++){
-               //   permuted_ttests[k]=fabs(Tvalues[i]);
-               //    k++;
-    }
-       
-       
-       for(j=0;j<*nr;j++){
-               ps[j]=((counter[j]+1)/(double)(a+1));
-       }
-}      
-
-void permute_matrix(double *Imatrix,int *nc,int *nr,double *permuted,
-                                       int *g,double *trial_ts,double *Tinitial,double *counter1){
-       
-       int i=0,j=0,n=0,a=0,b=0,f=0,c=0,k=0;
-       
-       a = *nr; // number of rows
-       b = *nc;
-       n = a*b;
-       
-       int y[b];
-       
-       for (i=1; i<=*nc; i++){
-               y[i-1] = i;
-       }
-       
-       permute_array(y, b); 
-       
-       for (i=0; i<*nc; i++){
-               f = y[i]; //column number
-               c=1;
-               c*=(f-1);
-               c*=a;
-               if (f == 1){
-                       c = 0;
-               } // starting value position in the Imatrix
-               for(j=1; j<=*nr; j++){
-                       permuted[k] = Imatrix[c];
-                       c++;
-                       k++;
-               }
-       }
-       
-       calc_twosample_ts(permuted,g,nc,nr,trial_ts,Tinitial,counter1);
-}
-
-void permute_array(int *array, int n) {
-       static int seeded = 0;
-       int i;
-       
-       if (! seeded) {
-               seeded = 1;
-               srand(time(NULL));
-       }
-       
-       for (i = 0; i < n; i++) {
-               int selection = rand() % (n - i);
-               int tmp = array[i + selection];
-               array[i + selection] = array[i];
-               array[i] = tmp;
-       }
-}
-
-void calc_twosample_ts(double *Pmatrix,int *g,int *nc,int *nr,
-                                          double *Ts,double *Tinitial,double *counter) {
-       int i,a;
-       a = *nr;
-       a*=4;
-       
-       double C1[*nr][3], C2[*nr][3], storage[a],tool[a];
-       double nrows,ncols,gvalue, xbardiff=0, denom=0;
-       
-       nrows = (double) *nr;
-       ncols = (double) *nc;
-       gvalue= (double) *g;
-       
-    meanvar(Pmatrix,g,nr,nc,storage);
-    for(i=0;i<=a-1;i++){
-               tool[i]=storage[i];
-    }
-    for (i=0; i<*nr;i++){
-               C1[i][0]=tool[i];
-               C1[i][1]=tool[i+*nr+*nr];
-               C1[i][2]=C1[i][1]/(gvalue-1);
-               
-               C2[i][0]=tool[i+*nr];
-               C2[i][1]=tool[i+*nr+*nr+*nr]; // var group 2 
-               C2[i][2]=C2[i][1]/(ncols-gvalue+1);
-    }
-    
-    for (i=0; i<*nr; i++){
-               xbardiff = C1[i][0]-C2[i][0];
-               denom = sqrt(C1[i][2]+C2[i][2]);
-               Ts[i]=fabs(xbardiff/denom);
-               if (fabs(Ts[i])>(fabs(Tinitial[i])+.0000000000001)){ //13th place
-                       counter[i]++;
-               }
-    }  
-}
-
-void meanvar(double *pmatrix,int *g,int *nr,int *nc,double *store){
-       double temp[*nr], temp2[*nr],var[*nr],var2[*nr],a,b;
-       
-       int i,m,k,l,n;
-       
-       a = (double) *g-1;          
-       b = (double) (*nc-a);
-       
-       for (i = 0; i<*nr; i++){
-               temp[i]=0;
-               temp2[i]=0;
-               var[i]=0;
-               var2[i]=0;
-       }
-       
-       k = *nr; // number of rows 
-       l = *nc;
-       n = k*l;        
-       
-    m=0;
-    m=*g-1;
-    k=*nr;
-    m*=k; // m = g * nr now
-    for (i=0;i<m;i++){
-               temp[i%k]=temp[i%k]+pmatrix[i];
-    }
-    for (i=0;i<n;i++){
-               temp2[i%k]=temp2[i%k]+pmatrix[i];
-    }
-    for (i=0;i<*nr;i++){
-               temp2[i]=temp2[i]-temp[i];
-    }
-    for (i=0;i<=*nr-1;i++){
-               store[i]=temp[i]/a;
-               store[i+*nr]=temp2[i]/b;
-    }
-    
-    // That completes the mean calculations.
-    
-    for (i=0;i<m;i++){
-               var[i%k]=var[i%k]+pow((pmatrix[i]-store[i%k]),2);
-    }
-    for (i=m;i<n;i++){
-               var2[i%k]=var2[i%k]+pow((pmatrix[i]-store[(i%k)+*nr]),2);
-    }
-    
-    for (i=0;i<=*nr-1;i++){
-               store[i+2*k]=var[i]/(a-1);
-               store[i+3*k]=var2[i]/(b-1);
-    }
-    // That completes var calculations.
-}
-
-void start(double *Imatrix,int *g,int *nr,int *nc,double *initial,
-                  double storage[][9]){
-       int i, a = *nr;
-       a*=4;
-       
-       double store[a], tool[a], C1[*nr][3], C2[*nr][3];
-       double nrows,ncols,gvalue, xbardiff=0, denom=0;
-       
-       nrows = (double) *nr;
-       ncols = (double) *nc;
-       gvalue= (double) *g;
-       
-       meanvar(Imatrix,g,nr,nc,store);
-       
-       for(i=0;i<=a-1;i++){
-               tool[i]=store[i];
-       }
-       for (i=0; i<*nr;i++){
-               C1[i][0]=tool[i]; //mean group 1
-               storage[i][0]=C1[i][0];
-               C1[i][1]=tool[i+*nr+*nr]; // var group 1
-               storage[i][1]=C1[i][1];
-               C1[i][2]=C1[i][1]/(gvalue-1);
-               storage[i][2]=sqrt(C1[i][2]);
-               
-               C2[i][0]=tool[i+*nr]; // mean group 2
-               storage[i][4]=C2[i][0];    
-               C2[i][1]=tool[i+*nr+*nr+*nr]; // var group 2 
-               storage[i][5]=C2[i][1];        
-               C2[i][2]=C2[i][1]/(ncols-gvalue+1);
-               storage[i][6]=sqrt(C2[i][2]);   
-       }
-       for (i=0; i<*nr; i++){
-               xbardiff = C1[i][0]-C2[i][0];
-               denom = sqrt(C1[i][2]+C2[i][2]);
-               initial[i]=fabs(xbardiff/denom);
-       }                                                                                       
-}
-
-
-
-