]> git.donarmstrong.com Git - ape.git/blob - src/mvrs.c
various bug fixes since the release of ape 3.0
[ape.git] / src / mvrs.c
1 /* mvrs.c    2012-02-17 */
2
3 /* Copyright 2011-2012 Andrei-Alin Popescu */
4
5 /* This file is part of the R-package `ape'. */
6 /* See the file ../COPYING for licensing issues. */
7
8 #include "ape.h"
9
10 void mvrs(double *D, double* v,int *N, int *edge1, int *edge2, double *edge_length,int* fsS)
11 {       //assume missing values are denoted by -1
12
13         double *S,*R ,*new_v, Sdist, Ndist, *new_dist, A, B, smallest_S, x, y;
14         int n, i, j, k, ij, smallest, OTU1, OTU2, cur_nod, o_l, *otu_label;
15         /*for(i=0;i<n*(n-1)/2;i++)
16           {if(isNA(D[i])){D[i]=-1;}
17           }*/
18         int *s;//s contains |Sxy|, which is all we need for agglomeration
19         double *newR;
20         int *newS;
21         int fS=*fsS;
22
23         R = &Sdist;
24         new_dist = &Ndist;
25         otu_label = &o_l;
26         n = *N;
27         cur_nod = 2*n - 2;
28
29         R = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
30         new_v = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
31         S = (double*)R_alloc(n + 1, sizeof(double));
32         newR = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
33         new_dist = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
34         otu_label = (int*)R_alloc(n + 1, sizeof(int));
35         s = (int*)R_alloc(n*(n - 1)/2, sizeof(int));
36         newS = (int*)R_alloc(n*(n - 1)/2, sizeof(int));
37
38         for (i = 1; i <= n; i++) otu_label[i] = i; /* otu_label[0] is not used */
39
40         k = 0;
41         //compute Sxy and Rxy
42
43         for(i=0;i<n*(n-1)/2;i++)
44           {newR[i]=0;
45            newS[i]=0;
46            s[i]=0;
47            R[i]=0;
48           }
49
50         for(i=1;i<n;i++)
51          for(j=i+1;j<=n;j++)
52          {//algorithm assumes i,j /in Sij, so skip pair if it is not known
53           if(D[give_index(i,j,n)]==-1)
54             {
55               continue;
56             }
57           for(k=1;k<=n;k++)
58            {//ij is the pair for which we compute
59             //skip k if we do not know the distances between it and i AND j
60
61              if(k==i || k==j)
62                {
63                   s[give_index(i,j,n)]++;
64                   //Rprintf("%i",s[give_index(i,j,n)]);
65
66                   continue;
67                }
68               if(D[give_index(i,k,n)]==-1 || D[give_index(j,k,n)]==-1)continue;
69               //Rprintf("%i\n",k);
70               s[give_index(i,j,n)]++;
71               R[give_index(i,j,n)]+=D[give_index(i,k,n)];
72               R[give_index(i,j,n)]+=D[give_index(j,k,n)];
73               //Rprintf("%i",s[give_index(i,j,n)]);
74               //Rprintf("%f",R[give_index(i,j,n)]);
75
76            }
77          }
78
79         /*for(i=1;i<n;i++)
80                   {
81                     for(j=i+1;j<=n;j++)
82                       {
83                         Rprintf("R[%i,%i]=%f ",i,j,R[give_index(i,j,n)]);
84                       }
85                     Rprintf("\n");
86                   }
87
88                 for(i=1;i<n;i++)
89                   {
90                     for(j=i+1;j<=n;j++)
91                       {
92                         Rprintf("s[%i,%i]=%i ",i,j,s[give_index(i,j,n)]);
93                       }
94                     Rprintf("\n");
95                   }*/
96         k=0;
97         int sw=1;//if 1 then incomplete
98         while (n > 3) {
99
100                 ij = 0;
101                 for(i=1;i<n;i++)
102                  for(j=i+1;j<=n;j++)
103                   {newR[give_index(i,j,n)]=0;
104                    newS[give_index(i,j,n)]=0;
105                   }
106
107                 smallest_S = -1e50;
108                 if(sw==0)
109                     for(i=1;i<=n;i++)
110                        {S[i]=0;
111                        }
112
113                 B=n-2;
114                 if(sw==1)
115                      {
116                       choosePair(D,n,R,s,&sw,&OTU1,&OTU2,fS);
117                      }
118                  else{ //Rprintf("distance matrix is now complete\n");
119                         for (i=1;i<=n;i++)
120                          for(j=1;j<=n;j++)
121                            {if(i==j)continue;
122                            //Rprintf("give_index(%i,%i)=%i\n",i,j,give_index(i,j,n));
123                            //Rprintf("D[%i,%i]=%f\n",i,j,D[give_index(i,j,n)]);
124                              S[i]+=D[give_index(i,j,n)];
125                            }
126                         B=n-2;
127                         //Rprintf("n=%i,B=%f",n,B);
128                         for (i = 1; i < n; i++) {
129                          for (j = i + 1; j <= n; j++) {
130                                  //Rprintf("S[%i]=%f, S[%i]=%f, D[%i,%i]=%f, B=%f",i,S[i],j,S[j],i,j,D[give_index(i,j,n)],B);
131                                 A=S[i]+S[j]-B*D[give_index(i,j,n)];
132                                 //Rprintf("Q[%i,%i]=%f\n",i,j,A);
133                                 if (A > smallest_S) {
134                                         OTU1 = i;
135                                         OTU2 = j;
136                                         smallest_S = A;
137                                         smallest = ij;
138                                 }
139                                 ij++;
140                         }
141                         }
142                      }
143                 if(s[give_index(OTU1,OTU2,n)]<=2)
144                   {error("distance information insufficient to construct a tree, leaves %i and %i isolated from tree",OTU1,OTU2);
145                   }
146                 //Rprintf("agglomerating %i and %i, Q=%f \n",OTU1,OTU2,smallest_S);
147
148                 /*for(i=1;i<n;i++)
149                   {
150                     for(j=i+1;j<=n;j++)
151                       {
152                         Rprintf("R[%i,%i]=%f ",i,j,R[give_index(i,j,n)]);
153                       }
154                     Rprintf("\n");
155                   }
156
157                 for(i=1;i<n;i++)
158                   {
159                     for(j=i+1;j<=n;j++)
160                       {
161                         Rprintf("s[%i,%i]=%i ",i,j,s[give_index(i,j,n)]);
162                       }
163                     Rprintf("\n");
164                   }
165
166                 for(i=1;i<n;i++)
167                   {
168                     for(j=i+1;j<=n;j++)
169                       {
170                         Rprintf("d[%i,%i]=%f ",i,j,D[give_index(i,j,n)]);
171                       }
172                     Rprintf("\n");
173                   }*/
174
175                 //update R and S, only if matrix still incomplete
176                 if(sw==1)
177                 for(i=1;i<n;i++)
178                 {if(i==OTU1 || i==OTU2)continue;
179                  for(j=i+1;j<=n;j++)
180                   {if(j==OTU1 || j==OTU2)continue;
181                     if(D[give_index(i,j,n)]==-1)continue;
182                      if(D[give_index(i,OTU1,n)]!=-1 && D[give_index(j,OTU1,n)]!=-1)
183                       {//OTU1 was considered for Rij, so now subtract
184                        R[give_index(i,j,n)]-=(D[give_index(i,OTU1,n)]+D[give_index(j,OTU1,n)]);
185                        s[give_index(i,j,n)]--;
186                       }
187                      if(D[give_index(i,OTU2,n)]!=-1 && D[give_index(j,OTU2,n)]!=-1)
188                       {//OTU2 was considered for Rij, so now subtract
189                        R[give_index(i,j,n)]-=(D[give_index(i,OTU2,n)]+D[give_index(j,OTU2,n)]);
190                        s[give_index(i,j,n)]--;
191                       }
192                   }
193                 }
194
195                 edge2[k] = otu_label[OTU1];
196                 edge2[k + 1] = otu_label[OTU2];
197                 edge1[k] = edge1[k + 1] = cur_nod;
198
199                 double miu=0;
200                 double miuSum=0;
201                 for(i=1;i<=n;i++)
202                  {
203                    if(i == OTU1 || i==OTU2)continue;
204                    if(D[give_index(i,OTU1,n)]==-1 || D[give_index(i,OTU2,n)]==-1)continue;
205                    //Rprintf("index(%i,%i)=%i index(%i,%i)=%i",i,OTU1,give_index(i,OTU1,n),i,OTU2,give_index(i,OTU2,n));
206                    miuSum+=(1/(v[give_index(i,OTU1,n)]+v[give_index(i,OTU2,n)]));
207                  }
208                 miuSum=1/miuSum;
209                 miu=miuSum/2;
210
211                 double eLenSum=0;
212                 for(i=1;i<=n;i++)
213                  {
214                    if(i == OTU1 || i==OTU2)continue;
215                    if(D[give_index(i,OTU1,n)]==-1 || D[give_index(i,OTU2,n)]==-1)continue;
216                    double wi=miu/(v[give_index(i,OTU1,n)]+v[give_index(i,OTU2,n)]);
217                    eLenSum+=wi*(D[give_index(i,OTU1,n)]-D[give_index(i,OTU2,n)]);
218                  }
219
220                 edge_length[k]=D[give_index(OTU1,OTU2,n)]/2 + eLenSum;
221
222                 eLenSum=0;
223                 for(i=1;i<=n;i++)
224                  {
225                    if(i == OTU1 || i==OTU2)continue;
226                    if(D[give_index(i,OTU1,n)]==-1 || D[give_index(i,OTU2,n)]==-1)continue;
227                    double wi=miu/(v[give_index(i,OTU1,n)]+v[give_index(i,OTU2,n)]);
228                    eLenSum+=wi*(D[give_index(i,OTU2,n)]-D[give_index(i,OTU1,n)]);
229                  }
230
231                 edge_length[k+1]=D[give_index(OTU1,OTU2,n)]/2 + eLenSum;
232
233                //no need to change distance matrix update for complete distance
234                //case, as pairs will automatically fall in the right cathegory
235
236                 //OTU1=x, OTU2=y from formulas
237                 A = D[give_index(OTU1,OTU2,n)];
238                 ij = 0;
239                 for (i = 1; i <= n; i++) {
240                         if (i == OTU1 || i == OTU2) continue;
241                         if(D[give_index(OTU1,i,n)]!=-1 && D[give_index(OTU2,i,n)]!=-1)
242                          {  double lamb=v[give_index(i,OTU2,n)]/(v[give_index(i,OTU2,n)]+v[give_index(i,OTU1,n)]);
243                             new_dist[ij]= lamb*(D[give_index(OTU1,i,n)]-edge_length[k])+(1-lamb)*(D[give_index(OTU2,i,n)]-edge_length[k+1]);
244                             new_v[ij]=(v[give_index(i,OTU2,n)]*v[give_index(i,OTU1,n)])/(v[give_index(i,OTU2,n)]+v[give_index(i,OTU1,n)]);
245                          }else{
246                          if(D[give_index(OTU1,i,n)]!=-1)
247                                 {
248                                  new_dist[ij]=D[give_index(OTU1,i,n)]-edge_length[k];
249                                  new_v[ij]=v[give_index(OTU1,i,n)];
250                                 }else{
251                                       if(D[give_index(OTU2,i,n)]!=-1)
252                                         {
253                                             new_dist[ij]=D[give_index(OTU2,i,n)]-edge_length[k+1];
254                                             new_v[ij]=v[give_index(OTU2,i,n)];
255                                         }else{new_dist[ij]=-1;new_v[ij]=-1;}
256                                      }
257                               }
258
259                         ij++;
260                 }
261
262                 for (i = 1; i < n; i++) {
263                         if (i == OTU1 || i == OTU2) continue;
264                         for (j = i + 1; j <= n; j++) {
265                                 if (j == OTU1 || j == OTU2) continue;
266                                 new_dist[ij] = D[DINDEX(i, j)];
267                                 new_v[ij]=v[give_index(i,j,n)];
268                                 ij++;
269                         }
270                 }
271
272                 /*for(i=1;i<n-1;i++)
273                 {
274                   for(j=i+1;j<=n-1;j++)
275                    {Rprintf("%f ",new_dist[give_index(i,j,n-1)]);
276                    }
277                   Rprintf("\n");
278                 }*/
279                 //compute Rui, only if distance matrix is still incomplete
280                 ij=0;
281                 if(sw==1)
282                 for(i=2;i<n;i++)
283                   {
284                    ij++;
285                    if(new_dist[give_index(i,1,n-1)]==-1)continue;
286
287                    for(j=1;j<n;j++)
288                      {
289                        if(j==1 || j==i)
290                        {
291                          newS[give_index(1,i,n-1)]++;
292                          continue;
293                        }
294                        if(new_dist[give_index(i,j,n-1)]!=-1 && new_dist[give_index(1,j,n-1)]!=-1)
295                         {
296                           newS[give_index(1,i,n-1)]++;
297                           newR[give_index(1,i,n-1)]+=new_dist[give_index(i,j,n-1)];
298                           newR[give_index(1,i,n-1)]+=new_dist[give_index(1,j,n-1)];
299                         }
300                      }
301                   }
302                 //fill in the rest of R and S, again only if distance matrix still
303                 //incomplete
304                 if(sw==1)
305                 for(i=1;i<n;i++)
306                 {if(i==OTU1 || i==OTU2)continue;
307                  for(j=i+1;j<=n;j++)
308                   {if(j==OTU1 || j==OTU2)continue;
309                    newR[ij]=R[give_index(i,j,n)];
310                    newS[ij]=s[give_index(i,j,n)];
311                    ij++;
312                   }
313                 }
314                 //update newR and newS with the new taxa, again only if distance
315                 //matrix is still incomplete
316                 if(sw==1)
317                 for(i=2;i<n-1;i++)
318                 {if(new_dist[give_index(1,i,n-1)]==-1)continue;
319                  for(j=i+1;j<=n-1;j++)
320                   {if(new_dist[give_index(1,j,n-1)]==-1)continue;
321                    newR[give_index(i,j,n-1)]+=(new_dist[give_index(1,i,n-1)]+new_dist[give_index(1,j,n-1)]);
322                    newS[give_index(i,j,n-1)]++;
323                   }
324                 }
325                 /* compute the branch lengths */
326
327
328
329                 /* update before the next loop
330                    (we are sure that OTU1 < OTU2) */
331                 if (OTU1 != 1)
332                         for (i = OTU1; i > 1; i--)
333                                 otu_label[i] = otu_label[i - 1];
334                 if (OTU2 != n)
335                         for (i = OTU2; i < n; i++)
336                                 otu_label[i] = otu_label[i + 1];
337                 otu_label[1] = cur_nod;
338
339
340
341                 n--;
342                 for (i = 0; i < n*(n - 1)/2; i++)
343                   {
344                     D[i] = new_dist[i];
345                     v[i] = new_v[i];
346                     if(sw==1)
347                        {
348                         R[i] = newR[i];
349                         s[i] = newS[i];
350                        }
351                   }
352                 cur_nod--;
353                 k = k + 2;
354         }
355         int dK=0;//number of known distances in final distance matrix
356         int iUK=-1;//index of unkown distance, if we have one missing distance
357         int iK=-1;//index of only known distance, only needed if dK==1
358         for (i = 0; i < 3; i++) {
359                 edge1[*N*2 - 4 - i] = cur_nod;
360                 edge2[*N*2 - 4 - i] = otu_label[i + 1];
361                 if(D[i]!=-1){dK++;iK=i;}else{iUK=i;}
362         }
363         if(dK==2)
364          {//if two distances are known: assume our leaves are x,y,z, d(x,z) unknown
365           //and edge weights of three edges are a,b,c, then any b,c>0 that
366           //satisfy c-b=d(y,z)-d(x,y) a+c=d(y,z) are good edge weights, but for
367           //simplicity we assume a=c if d(yz)<d(xy) a=b otherwise, and after some
368           //algebra we get that we can set the missing distance equal to the
369           //maximum of the already present distances
370             double max=-1e50;
371           for(i=0;i<3;i++)
372             {if(i==iUK)continue;
373              if(D[i]>max)max=D[i];
374             }
375           D[iUK]=max;
376          }
377         if(dK==1)
378          {//through similar motivation as above, if we have just one known distance
379           //we set the other two distances equal to it
380           for(i=0;i<3;i++)
381             {if(i==iK)continue;
382              D[i]=D[iK];
383             }
384          }
385         if(dK==0)
386          {//no distances are known, we just set them to 1
387           for(i=0;i<3;i++)
388            {D[i]=1;
389            }
390          }
391         edge_length[*N*2 - 4] = (D[0] + D[1] - D[2])/2;
392         edge_length[*N*2 - 5] = (D[0] + D[2] - D[1])/2;
393         edge_length[*N*2 - 6] = (D[2] + D[1] - D[0])/2;
394 }