3 cl -O2 -Ot -W4 multiruns.c
\r
4 cc -o multiruns -O2 multiruns.c -lm
\r
6 multiruns <rstfile1> <rstfile2> ... <lnLColumn>
\r
8 Examples (comparing three runs with lnL in column 19 in rst1):
\r
10 multiruns rst1.r1 rst1.r2 rst1.r3 19
\r
11 multiruns a/rst1 b/rst1 c/rst1 19
\r
13 March 2003, Ziheng Yang
\r
14 September 2005, changed tworuns into multiruns, ziheng yang
\r
16 This program compares outputs from multiple separate ML runs analyzing
\r
17 many data sets (using ndata) to assemble a result file. Because of local
\r
18 peaks and convergence problems, multiple runs for the same analysis may not
\r
19 generate the same results. Then we should use the results corresponding to
\r
20 the highest lnL. This program takes input files which have summary results
\r
21 from multiple runs, one line for each data set. The program takes one line
\r
22 from each of the input files and compare the first field, which is an index
\r
23 column and should be identical between the input files, and an lnL column.
\r
24 The program decides which run generated the highest lnL, and copy the line
\r
25 from that run into the output file: out.txt.
\r
27 This is useful when you analyze the same set of simulated replicate data
\r
28 sets multiple times, using different starting values. For example, codeml
\r
29 may write a line of output in rst1 for each data set, including parameter
\r
30 estimates and lnL. You can then use this program to compare the rst1 output
\r
31 files from multiple runs to generate one output file. The program allows the
\r
32 fields to be either numerical or text, but the first (index) and lnL columns
\r
33 should be numerical.
\r
42 #define MAXNFIILES 20
\r
43 #define MAXNFIELDS 1000
\r
44 #define MAXLLINE 64000
\r
46 void error2 (char * message);
\r
47 FILE *gfopen(char *filename, char *mode);
\r
48 int splitline (char line[], int fields[]);
\r
51 int main(int argc, char* argv[])
\r
53 FILE *fout, *fin[MAXNFIILES];
\r
54 char infile[MAXNFIILES][96]={"rst1.r1", "rst1.r2"}, outfile[96]="out.txt";
\r
55 int index=0, nfile, nfileread, lnLcolumn=13, i, nrecords=0, lline=MAXLLINE;
\r
56 int nfields[MAXNFIILES],fields[MAXNFIELDS], minf, maxf, miss[MAXNFIILES];
\r
57 char *line[MAXNFIILES];
\r
58 double lnL[MAXNFIILES], lnLmin, lnLmax, indexfield[MAXNFIILES], y;
\r
60 puts("Usage: \n\tmultiruns <file1> <file2> ... <lnLcolumn>\n");
\r
62 if(nfile<2) exit(1);
\r
64 for(i=0; i<nfile; i++) {
\r
65 strcpy(infile[i], argv[1+i]);
\r
66 fin[i]=gfopen(infile[i],"r");
\r
67 if((line[i]=(char*)malloc(MAXLLINE*sizeof(char)))==NULL) error2("oom");
\r
68 printf("%s ", infile[i]);
\r
70 printf(" ==> %s\n\n", outfile);
\r
72 sscanf(argv[argc-1], "%d", &lnLcolumn);
\r
74 fout=(FILE*)gfopen(outfile,"w");
\r
76 for(nrecords=0; ; nrecords++) {
\r
77 for(i=0,nfileread=0; i<nfile; i++) {
\r
78 nfields[i]=0; lnL[i]=0; line[i][0]='\0'; miss[i]=1;
\r
79 if(!fgets(line[i], lline, fin[i])) continue;
\r
80 nfields[i]=splitline(line[i],fields);
\r
82 if(nfields[i]>index) sscanf(line[i]+fields[index], "%lf", &indexfield[i]);
\r
83 if(nfields[i]>lnLcolumn) {
\r
84 sscanf(line[i]+fields[lnLcolumn], "%lf", &lnL[i]);
\r
89 if(nfileread==0) break;
\r
90 for(i=0,y=-1; i<nfile; i++) {
\r
92 if(y==-1) y=indexfield[i];
\r
93 else if(y!=indexfield[i]) error2("index field different");
\r
96 for(i=0,lnLmin=1e300,lnLmax=-1e300; i<nfile; i++) {
\r
97 if(miss[i]) continue;
\r
98 if(lnL[i]<lnLmin) { lnLmin=lnL[i]; minf=i; }
\r
99 if(lnL[i]>lnLmax) { lnLmax=lnL[i]; maxf=i; }
\r
102 fprintf(fout, "%s", line[maxf]);
\r
104 printf("record %4d (", nrecords+1);
\r
105 for(i=0; i<nfile; i++) printf("%c", (miss[i]?'-':'+'));
\r
106 printf(") %10.3f (%d) - %10.3f (%d) = %8.3f\n",
\r
107 lnLmin, minf+1, lnLmax, maxf+1, lnLmin-lnLmax);
\r
109 printf("\nwrote %d records into %s\n", nrecords, outfile);
\r
110 for(i=0; i<nfile; i++) { fclose(fin[i]); free(line[i]); }
\r
114 void error2 (char * message)
\r
115 { printf("\nError: %s.\n", message); exit(-1); }
\r
117 FILE *gfopen(char *filename, char *mode)
\r
121 if(filename==NULL || filename[0]==0)
\r
122 error2("file name empty.");
\r
124 fp=(FILE*)fopen(filename, mode);
\r
126 printf("\nerror when opening file %s\n", filename);
\r
127 if(!strchr(mode,'r')) exit(-1);
\r
128 printf("tell me the full path-name of the file? ");
\r
129 scanf("%s", filename);
\r
130 if((fp=(FILE*)fopen(filename, mode))!=NULL) return(fp);
\r
131 puts("Can't find the file. I give up.");
\r
137 int splitline (char line[], int fields[])
\r
139 /* This finds out how many fields there are in the line, and marks the starting
\r
140 positions of the fields.
\r
141 Fields are separated by spaces, and texts are allowed as well.
\r
143 int lline=64000, i, nfields=0, InSpace=1;
\r
146 for(i=0; i<lline && *p && *p!='\n'; i++,p++) {
\r
152 fields[nfields++]=i;
\r
153 /* if(nfields>MAXNFIELDS) puts("raise MAXNFIELDS?"); */
\r