1 /* mvrs.c 2012-04-02 */
3 /* Copyright 2011-2012 Andrei-Alin Popescu */
5 /* This file is part of the R-package `ape'. */
6 /* See the file ../COPYING for licensing issues. */
10 void mvrs(double *D, double* v,int *N, int *edge1, int *edge2, double *edge_length,int* fsS)
11 { //assume missing values are denoted by -1
13 double *S,*R ,*new_v, Sdist, Ndist, *new_dist, A, B, smallest_S, x, y;
14 int n, i, j, k, ij, smallest, OTU1, OTU2, cur_nod, o_l, *otu_label;
15 /*for(i=0;i<n*(n-1)/2;i++)
16 {if(isNA(D[i])){D[i]=-1;}
18 int *s;//s contains |Sxy|, which is all we need for agglomeration
29 R = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
30 new_v = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
31 S = (double*)R_alloc(n + 1, sizeof(double));
32 newR = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
33 new_dist = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
34 otu_label = (int*)R_alloc(n + 1, sizeof(int));
35 s = (int*)R_alloc(n*(n - 1)/2, sizeof(int));
36 newS = (int*)R_alloc(n*(n - 1)/2, sizeof(int));
38 for (i = 1; i <= n; i++) otu_label[i] = i; /* otu_label[0] is not used */
43 for(i=0;i<n*(n-1)/2;i++)
52 {//algorithm assumes i,j /in Sij, so skip pair if it is not known
53 if(D[give_index(i,j,n)]==-1)
58 {//ij is the pair for which we compute
59 //skip k if we do not know the distances between it and i AND j
63 if(i!=k)R[give_index(i,j,n)]+=D[give_index(i,k,n)];
64 if(j!=k)R[give_index(i,j,n)]+=D[give_index(j,k,n)];
65 s[give_index(i,j,n)]++;
66 //Rprintf("%i",s[give_index(i,j,n)]);
70 if(D[give_index(i,k,n)]==-1 || D[give_index(j,k,n)]==-1)continue;
72 s[give_index(i,j,n)]++;
73 R[give_index(i,j,n)]+=D[give_index(i,k,n)];
74 R[give_index(i,j,n)]+=D[give_index(j,k,n)];
75 //Rprintf("%i",s[give_index(i,j,n)]);
76 //Rprintf("%f",R[give_index(i,j,n)]);
85 Rprintf("R[%i,%i]=%f ",i,j,R[give_index(i,j,n)]);
94 Rprintf("s[%i,%i]=%i ",i,j,s[give_index(i,j,n)]);
99 int sw=1;//if 1 then incomplete
105 {newR[give_index(i,j,n)]=0;
106 newS[give_index(i,j,n)]=0;
118 choosePair(D,n,R,s,&sw,&OTU1,&OTU2,fS);
120 else{ //Rprintf("distance matrix is now complete\n");
124 //Rprintf("give_index(%i,%i)=%i\n",i,j,give_index(i,j,n));
125 //Rprintf("D[%i,%i]=%f\n",i,j,D[give_index(i,j,n)]);
126 S[i]+=D[give_index(i,j,n)];
129 //Rprintf("n=%i,B=%f",n,B);
130 for (i = 1; i < n; i++) {
131 for (j = i + 1; j <= n; j++) {
132 //Rprintf("S[%i]=%f, S[%i]=%f, D[%i,%i]=%f, B=%f",i,S[i],j,S[j],i,j,D[give_index(i,j,n)],B);
133 A=S[i]+S[j]-B*D[give_index(i,j,n)];
134 //Rprintf("Q[%i,%i]=%f\n",i,j,A);
135 if (A > smallest_S) {
145 if(s[give_index(OTU1,OTU2,n)]<=2)
146 {error("distance information insufficient to construct a tree, leaves %i and %i isolated from tree",OTU1,OTU2);
148 //Rprintf("agglomerating %i and %i, Q=%f \n",OTU1,OTU2,smallest_S);
154 Rprintf("R[%i,%i]=%f ",i,j,R[give_index(i,j,n)]);
163 Rprintf("s[%i,%i]=%i ",i,j,s[give_index(i,j,n)]);
172 Rprintf("d[%i,%i]=%f ",i,j,D[give_index(i,j,n)]);
177 //update R and S, only if matrix still incomplete
180 {if(i==OTU1 || i==OTU2)continue;
182 {if(j==OTU1 || j==OTU2)continue;
183 if(D[give_index(i,j,n)]==-1)continue;
184 if(D[give_index(i,OTU1,n)]!=-1 && D[give_index(j,OTU1,n)]!=-1)
185 {//OTU1 was considered for Rij, so now subtract
186 R[give_index(i,j,n)]-=(D[give_index(i,OTU1,n)]+D[give_index(j,OTU1,n)]);
187 s[give_index(i,j,n)]--;
189 if(D[give_index(i,OTU2,n)]!=-1 && D[give_index(j,OTU2,n)]!=-1)
190 {//OTU2 was considered for Rij, so now subtract
191 R[give_index(i,j,n)]-=(D[give_index(i,OTU2,n)]+D[give_index(j,OTU2,n)]);
192 s[give_index(i,j,n)]--;
197 edge2[k] = otu_label[OTU1];
198 edge2[k + 1] = otu_label[OTU2];
199 edge1[k] = edge1[k + 1] = cur_nod;
205 if(i == OTU1 || i==OTU2)continue;
206 if(D[give_index(i,OTU1,n)]==-1 || D[give_index(i,OTU2,n)]==-1)continue;
207 //Rprintf("index(%i,%i)=%i index(%i,%i)=%i",i,OTU1,give_index(i,OTU1,n),i,OTU2,give_index(i,OTU2,n));
208 miuSum+=(1/(v[give_index(i,OTU1,n)]+v[give_index(i,OTU2,n)]));
216 if(i == OTU1 || i==OTU2)continue;
217 if(D[give_index(i,OTU1,n)]==-1 || D[give_index(i,OTU2,n)]==-1)continue;
218 double wi=miu/(v[give_index(i,OTU1,n)]+v[give_index(i,OTU2,n)]);
219 eLenSum+=wi*(D[give_index(i,OTU1,n)]-D[give_index(i,OTU2,n)]);
222 edge_length[k]=D[give_index(OTU1,OTU2,n)]/2 + eLenSum;
227 if(i == OTU1 || i==OTU2)continue;
228 if(D[give_index(i,OTU1,n)]==-1 || D[give_index(i,OTU2,n)]==-1)continue;
229 double wi=miu/(v[give_index(i,OTU1,n)]+v[give_index(i,OTU2,n)]);
230 eLenSum+=wi*(D[give_index(i,OTU2,n)]-D[give_index(i,OTU1,n)]);
233 edge_length[k+1]=D[give_index(OTU1,OTU2,n)]/2 + eLenSum;
235 //no need to change distance matrix update for complete distance
236 //case, as pairs will automatically fall in the right cathegory
238 //OTU1=x, OTU2=y from formulas
239 A = D[give_index(OTU1,OTU2,n)];
241 for (i = 1; i <= n; i++) {
242 if (i == OTU1 || i == OTU2) continue;
243 if(D[give_index(OTU1,i,n)]!=-1 && D[give_index(OTU2,i,n)]!=-1)
244 { double lamb=v[give_index(i,OTU2,n)]/(v[give_index(i,OTU2,n)]+v[give_index(i,OTU1,n)]);
245 new_dist[ij]= lamb*(D[give_index(OTU1,i,n)]-edge_length[k])+(1-lamb)*(D[give_index(OTU2,i,n)]-edge_length[k+1]);
246 new_v[ij]=(v[give_index(i,OTU2,n)]*v[give_index(i,OTU1,n)])/(v[give_index(i,OTU2,n)]+v[give_index(i,OTU1,n)]);
248 if(D[give_index(OTU1,i,n)]!=-1)
250 new_dist[ij]=D[give_index(OTU1,i,n)]-edge_length[k];
251 new_v[ij]=v[give_index(OTU1,i,n)];
253 if(D[give_index(OTU2,i,n)]!=-1)
255 new_dist[ij]=D[give_index(OTU2,i,n)]-edge_length[k+1];
256 new_v[ij]=v[give_index(OTU2,i,n)];
257 }else{new_dist[ij]=-1;new_v[ij]=-1;}
264 for (i = 1; i < n; i++) {
265 if (i == OTU1 || i == OTU2) continue;
266 for (j = i + 1; j <= n; j++) {
267 if (j == OTU1 || j == OTU2) continue;
268 new_dist[ij] = D[DINDEX(i, j)];
269 new_v[ij]=v[give_index(i,j,n)];
276 for(j=i+1;j<=n-1;j++)
277 {Rprintf("%f ",new_dist[give_index(i,j,n-1)]);
281 //compute Rui, only if distance matrix is still incomplete
287 if(new_dist[give_index(i,1,n-1)]==-1)continue;
293 if(i!=j)newR[give_index(1,i,n-1)]+=new_dist[give_index(i,j,n-1)];
294 if(1!=j)newR[give_index(1,i,n-1)]+=new_dist[give_index(1,j,n-1)];
295 newS[give_index(1,i,n-1)]++;
298 if(new_dist[give_index(i,j,n-1)]!=-1 && new_dist[give_index(1,j,n-1)]!=-1)
300 newS[give_index(1,i,n-1)]++;
301 newR[give_index(1,i,n-1)]+=new_dist[give_index(i,j,n-1)];
302 newR[give_index(1,i,n-1)]+=new_dist[give_index(1,j,n-1)];
306 //fill in the rest of R and S, again only if distance matrix still
310 {if(i==OTU1 || i==OTU2)continue;
312 {if(j==OTU1 || j==OTU2)continue;
313 newR[ij]=R[give_index(i,j,n)];
314 newS[ij]=s[give_index(i,j,n)];
318 //update newR and newS with the new taxa, again only if distance
319 //matrix is still incomplete
322 {if(new_dist[give_index(1,i,n-1)]==-1)continue;
323 for(j=i+1;j<=n-1;j++)
324 {if(new_dist[give_index(1,j,n-1)]==-1)continue;
325 if(new_dist[give_index(i,j,n-1)]==-1)continue;
326 newR[give_index(i,j,n-1)]+=(new_dist[give_index(1,i,n-1)]+new_dist[give_index(1,j,n-1)]);
327 newS[give_index(i,j,n-1)]++;
330 /* compute the branch lengths */
334 /* update before the next loop
335 (we are sure that OTU1 < OTU2) */
337 for (i = OTU1; i > 1; i--)
338 otu_label[i] = otu_label[i - 1];
340 for (i = OTU2; i < n; i++)
341 otu_label[i] = otu_label[i + 1];
342 otu_label[1] = cur_nod;
347 for (i = 0; i < n*(n - 1)/2; i++)
360 int dK=0;//number of known distances in final distance matrix
361 int iUK=-1;//index of unkown distance, if we have one missing distance
362 int iK=-1;//index of only known distance, only needed if dK==1
363 for (i = 0; i < 3; i++) {
364 edge1[*N*2 - 4 - i] = cur_nod;
365 edge2[*N*2 - 4 - i] = otu_label[i + 1];
366 if(D[i]!=-1){dK++;iK=i;}else{iUK=i;}
369 {//if two distances are known: assume our leaves are x,y,z, d(x,z) unknown
370 //and edge weights of three edges are a,b,c, then any b,c>0 that
371 //satisfy c-b=d(y,z)-d(x,y) a+c=d(y,z) are good edge weights, but for
372 //simplicity we assume a=c if d(yz)<d(xy) a=b otherwise, and after some
373 //algebra we get that we can set the missing distance equal to the
374 //maximum of the already present distances
378 if(D[i]>max)max=D[i];
383 {//through similar motivation as above, if we have just one known distance
384 //we set the other two distances equal to it
391 {//no distances are known, we just set them to 1
396 edge_length[*N*2 - 4] = (D[0] + D[1] - D[2])/2;
397 edge_length[*N*2 - 5] = (D[0] + D[2] - D[1])/2;
398 edge_length[*N*2 - 6] = (D[2] + D[1] - D[0])/2;