]> git.donarmstrong.com Git - ape.git/blob - src/njs.c
final commit for ape 3.0-8
[ape.git] / src / njs.c
1 /* njs.c    2012-04-02 */
2
3 /* Copyright 2011-2012 Andrei-Alin Popescu */
4
5 /* This file is part of the R-package `ape'. */
6 /* See the file ../COPYING for licensing issues. */
7
8 #include "ape.h"
9
10 int H(double t)
11 {
12         if (t >= 0 - 1e-10) return 1;
13         return 0;
14 }
15
16 void choosePair(double* D,int n,double* R,int* s, int* sw, int* x, int* y,int fS)
17 {
18     int i=0,j=0,k=0;
19     int sww=0;
20     double cFS[fS];
21     int iFS[fS];
22     int jFS[fS];
23     for(k=0;k<fS;k++)
24               {cFS[k]=-1e50;
25                iFS[k]=0;
26                jFS[k]=0;
27               }
28     double max=-1e50;
29     for (i = 1; i < n; i++)
30       {
31        for (j = i + 1; j <= n; j++)
32          {if(D[give_index(i,j,n)]==-1){sww=1;continue;}
33            if(s[give_index(i,j,n)]<=2)continue;
34            //Rprintf("R[%i,%i]=%f\n",i,j,R[give_index(i,j,n)]);
35            //Rprintf("s[%i,%i]=%i\n",i,j,s[give_index(i,j,n)]);
36            //Rprintf("D[%i,%i]=%f\n",i,j,D[give_index(i,j,n)]);
37            int tr=0;
38             double numb=((R[give_index(i,j,n)])/(s[give_index(i,j,n)]-2))-D[give_index(i,j,n)];
39            //Rprintf("numb(%i,%i)=%f\n",i,j,numb);
40            for(k=0;k<fS, cFS[k]>numb;k++);
41            //Rprintf("k=%i ",k);
42            for(tr=fS-1;tr>k;tr--)
43              {cFS[tr]=cFS[tr-1];
44               iFS[tr]=iFS[tr-1];
45               jFS[tr]=jFS[tr-1];
46              }
47
48             if(k<fS){cFS[k]=numb;
49                      iFS[k]=i;
50                      jFS[k]=j;
51                     }
52           }
53       }
54
55     //no missing distances, just return the one with maximal Q-criterion
56    if(sww==0){*x=iFS[0];*y=jFS[0];*sw=0;return;
57              }
58    /*for(k=0;k<fS;k++)
59               {Rprintf("value[%i]=%f ",k,cFS[k]);
60                Rprintf("i=%i ",iFS[k]);
61                Rprintf("j=%i ",jFS[k]);
62                Rprintf("\n");
63               }*/
64   //calculate N*(x,y)
65   for(i=0;i<fS;i++)
66    {
67     if(iFS[i]==0 || jFS[i]==0)continue;
68     double nb=nxy(iFS[i],jFS[i],n,D);
69     if(nb>max){max=nb;}
70     cFS[i]=nb;
71    }
72   /*Rprintf("all N*(x,y)\n");
73   for(k=0;k<fS;k++)
74               {Rprintf("value[%i]=%f ",k,cFS[k]);
75                Rprintf("i=%i ",iFS[k]);
76                Rprintf("j=%i ",jFS[k]);
77                Rprintf("\n");
78               }*/
79   int dk=0;
80   //shift the max N*xy to the front of the array
81   for(i=0;i<fS;i++)
82    {
83       if(cFS[i]==max)
84         {
85           cFS[dk]=cFS[i];
86           iFS[dk]=iFS[i];
87           jFS[dk]=jFS[i];
88           dk++;
89         }
90    }
91   //if just one pair realises max N*xy, return it
92   if(dk==1){*x=iFS[0];*y=jFS[0];return;}
93   fS=dk;
94  /*Rprintf("max n*xy values\n");
95  for(k=0;k<fS;k++)
96               {Rprintf("value[%i]=%f ",k,cFS[k]);
97                Rprintf("i=%i ",iFS[k]);
98                Rprintf("j=%i ",jFS[k]);
99                Rprintf("\n");
100               }*/
101  max=-1e50;
102  //on the front of the array containing max N*xy values compute cxy
103  for(i=0;i<fS;i++)
104   {
105     if(iFS[i]==0 || jFS[i]==0)continue;
106     double nb=cxy(iFS[i],jFS[i],n,D);
107     if(nb>max){max=nb;}
108     cFS[i]=nb;
109   }
110  /*Rprintf("all c*xy values\n");
111  for(k=0;k<fS;k++)
112               {Rprintf("value[%i]=%f ",k,cFS[k]);
113                Rprintf("i=%i ",iFS[k]);
114                Rprintf("j=%i ",jFS[k]);
115                Rprintf("\n");
116               }*/
117   //and again shift maximal C*xy values at the fron of the array
118   dk=0;
119   for(i=0;i<fS;i++)
120    {
121       if(cFS[i]==max)
122         {
123           cFS[dk]=cFS[i];
124           iFS[dk]=iFS[i];
125           jFS[dk]=jFS[i];
126           dk++;
127         }
128    }
129  //if just one C*xy with maximal value, return pair realising it
130  if(dk==1){*x=iFS[0];*y=jFS[0];return;}
131  fS=dk;
132  /*Rprintf("max c*xy values\n");
133  for(k=0;k<fS;k++)
134               {Rprintf("value[%i]=%f ",k,cFS[k]);
135                Rprintf("i=%i ",iFS[k]);
136                Rprintf("j=%i ",jFS[k]);
137                Rprintf("\n");
138               }*/
139  max=-1e50;
140  //on the front of the array containing max C*xy compute m*xy
141  for(i=0;i<fS;i++)
142   {
143     if(iFS[i]==0 || jFS[i]==0)continue;
144     double nb=mxy(iFS[i],jFS[i],n,D);
145     if(nb>max){max=nb;}
146     cFS[i]=nb;
147   }
148  /*Rprintf("all m*xy values\n");
149  for(k=0;k<fS;k++)
150               {Rprintf("value[%i]=%f ",k,cFS[k]);
151                Rprintf("i=%i ",iFS[k]);
152                Rprintf("j=%i ",jFS[k]);
153                Rprintf("\n");
154               }*/
155   //again shift maximal m*xy values to the fron of the array
156   dk=0;
157   for(i=0;i<fS;i++)
158    {
159       if(cFS[i]==max)
160         {
161           cFS[dk]=cFS[i];
162           iFS[dk]=iFS[i];
163           jFS[dk]=jFS[i];
164           dk++;
165         }
166    }
167  //if just one maximal value for m*xy return the pair realising it, found at 0
168  if(dk==1){*x=iFS[0];*y=jFS[0];return;}
169  fS=dk;
170  /*Rprintf("max m*xy values\n");
171  for(k=0;k<fS;k++)
172               {Rprintf("value[%i]=%f ",k,cFS[k]);
173                Rprintf("i=%i ",iFS[k]);
174                Rprintf("j=%i ",jFS[k]);
175                Rprintf("\n");
176               }*/
177  //and calculate cnxy on these values, but this time we do not shift, but simply
178  //return the pair realising the maximum, stored at iPos
179  max=-1e50;
180  int iPos=0;
181  for(i=0;i<fS;i++)
182   {
183     if(iFS[i]==0 || jFS[i]==0)continue;
184     double nb=cnxy(iFS[i],jFS[i],n,D);
185     if(nb>max){max=nb;iPos=i;}
186     cFS[i]=nb;
187   }
188  /*Rprintf("cN*xy\n");
189  Rprintf("value[%i]=%f ",0,cFS[0]);
190  Rprintf("i=%i ",iFS[0]);
191  Rprintf("j=%i ",jFS[0]);
192  Rprintf("\n");*/
193  if(iFS[iPos]==0 || jFS[iPos]==0)
194    {
195      error("distance information insufficient to construct a tree, cannot calculate agglomeration criterion");
196    }
197  *x=iFS[iPos];*y=jFS[iPos];
198 }
199
200 double cnxy(int x, int y, int n,double* D)
201 {
202     int i=0;
203     int j=0;
204     double nMeanXY=0;
205     //Rprintf("cN[%i,%i]\n",x,y);
206     for(i=1;i<=n;i++)
207      {
208       for(j=1;j<=n;j++)
209       {if(i==j)continue;
210        if((i==x && j==y) || (j==x && i==y))continue;
211        double n1=0;
212        double n2=0;
213        if(i!=x)n1=D[give_index(i,x,n)];
214        if(j!=y)n2=D[give_index(j,y,n)];
215        if(n1==-1 || n2==-1 || D[give_index(i,j,n)]==-1)continue;
216        nMeanXY+=(n1+n2-D[give_index(x,y,n)]-D[give_index(i,j,n)]);
217        //Rprintf("cnMeanXY after (%i,%i)=%f\n",i,j,nMeanXY);
218       }
219      }
220     return nMeanXY;
221 }
222
223 int mxy(int x,int y,int n,double* D)
224 {
225     int i=0;
226     int mx[n+1];
227     int my[n+1];
228     for(i=1;i<=n;i++)
229       {
230         mx[i]=0;my[i]=0;
231       }
232     for(i=1;i<=n;i++)
233       {
234         if(i!=x && D[give_index(x,i,n)]==-1)
235           {
236             mx[i]=1;
237           }
238         if(i!=y && D[give_index(y,i,n)]==-1)
239           {
240             my[i]=1;
241           }
242       }
243     /*for(i=1;i<=n;i++)
244       {
245         Rprintf("mx[%i]=%i ",i,mx[i]);
246       }
247     Rprintf("\n");
248
249     for(i=1;i<=n;i++)
250       {
251         Rprintf("my[%i]=%i ",i,my[i]);
252       }
253     Rprintf("\n");*/
254
255     int xmy=0;
256     int ymx=0;
257     for(i=1;i<=n;i++)
258       {
259         if(i!=x && mx[i]==1 && my[i]==0)
260           {
261             xmy++;
262           }
263         if(i!=y && my[i]==1 && mx[i]==0)
264           {
265             ymx++;
266           }
267       }
268     //Rprintf("xmy=%i, ymx=%i, xmy+ymx=%i\n",xmy,ymx,xmy+ymx);
269     return xmy+ymx;
270 }
271 double nxy(int x, int y, int n,double* D)
272 {
273     int sCXY=0;
274     int i=0;
275     int j=0;
276     double nMeanXY=0;
277     //Rprintf("N[%i,%i]\n",x,y);
278     for(i=1;i<=n;i++)
279      {
280       for(j=1;j<=n;j++)
281       {if(i==j)continue;
282        if((i==x && j==y) || (j==x && i==y))continue;
283        double n1=0;
284        double n2=0;
285        if(i!=x)n1=D[give_index(i,x,n)];
286        if(j!=y)n2=D[give_index(j,y,n)];
287        if(n1==-1 || n2==-1 || D[give_index(i,j,n)]==-1)continue;
288        sCXY++;
289        //Rprintf("considered pair (%i,%i)\n",i,j);
290        nMeanXY+=H(n1+n2-D[give_index(x,y,n)]-D[give_index(i,j,n)]);
291        //Rprintf("nMeanXY after (%i,%i)=%f\n",i,j,nMeanXY);
292       }
293      }
294     //Rprintf("sCXY=%i",sCXY);
295     if(sCXY==0) return 0;
296     return nMeanXY/sCXY;
297 }
298
299 int cxy(int x, int y, int n,double* D)
300 {
301     int sCXY=0;
302     int i=0;
303     int j=0;
304
305     for(i=1;i<=n;i++)
306      {
307       for(j=1;j<=n;j++)
308       {if(i==j)continue;
309        if((i==x && j==y) || (j==x && i==y))continue;
310        double n1=0;
311        double n2=0;
312        if(i!=x)n1=D[give_index(i,x,n)];
313        if(j!=y)n2=D[give_index(j,y,n)];
314        if(n1==-1 || n2==-1 || D[give_index(i,j,n)]==-1)continue;
315        sCXY++;
316       }
317      }
318     return sCXY;
319 }
320
321 void njs(double *D, int *N, int *edge1, int *edge2, double *edge_length,int *fsS)
322 {       //assume missing values are denoted by -1
323         double *S,*R, Sdist, Ndist, *new_dist, A, B, smallest_S, x, y;
324         int n, i, j, k, ij, smallest, OTU1, OTU2, cur_nod, o_l, *otu_label;
325         /*for(i=0;i<n*(n-1)/2;i++)
326           {if(isNA(D[i])){D[i]=-1;}
327           }*/
328         int *s;//s contains |Sxy|, which is all we need for agglomeration
329         double *newR;
330         int *newS;
331         int fS=*fsS;
332
333         R = &Sdist;
334         new_dist = &Ndist;
335         otu_label = &o_l;
336         n = *N;
337         cur_nod = 2*n - 2;
338
339         R = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
340         S = (double*)R_alloc(n + 1, sizeof(double));
341         newR = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
342         new_dist = (double*)R_alloc(n*(n - 1)/2, sizeof(double));
343         otu_label = (int*)R_alloc(n + 1, sizeof(int));
344         s = (int*)R_alloc(n*(n - 1)/2, sizeof(int));
345         newS = (int*)R_alloc(n*(n - 1)/2, sizeof(int));
346
347         for (i = 1; i <= n; i++) otu_label[i] = i; /* otu_label[0] is not used */
348
349         k = 0;
350
351         //compute Sxy and Rxy
352         for(i=0;i<n*(n-1)/2;i++)
353           {newR[i]=0;
354            newS[i]=0;
355            s[i]=0;
356            R[i]=0;
357           }
358
359         for(i=1;i<n;i++)
360          for(j=i+1;j<=n;j++)
361          {//algorithm assumes i,j /in Sij, so skip pair if it is not known
362           if(D[give_index(i,j,n)]==-1)
363             {
364               continue;
365             }
366           //Rprintf("for %i and %i :\n",i,j);
367           for(k=1;k<=n;k++)
368            {//ij is the pair for which we compute
369             //skip k if we do not know the distances between it and i AND j
370               if(k==i || k==j)
371                {
372                   s[give_index(i,j,n)]++;
373                   if(i!=k)R[give_index(i,j,n)]+=D[give_index(i,k,n)];
374                   if(j!=k)R[give_index(i,j,n)]+=D[give_index(j,k,n)];
375                   continue;
376                }
377               if(D[give_index(i,k,n)]==-1 || D[give_index(j,k,n)]==-1)continue;
378               //Rprintf("%i\n",k);
379               s[give_index(i,j,n)]++;
380               R[give_index(i,j,n)]+=D[give_index(i,k,n)];
381               R[give_index(i,j,n)]+=D[give_index(j,k,n)];
382            }
383          }
384
385         /*for(i=1;i<n;i++)
386                   {
387                     for(j=i+1;j<=n;j++)
388                       {
389                         Rprintf("R[%i,%i]=%f ",i,j,R[give_index(i,j,n)]);
390                       }
391                     Rprintf("\n");
392                   }
393
394                 for(i=1;i<n;i++)
395                   {
396                     for(j=i+1;j<=n;j++)
397                       {
398                         Rprintf("s[%i,%i]=%i ",i,j,s[give_index(i,j,n)]);
399                       }
400                     Rprintf("\n");
401                   }*/
402
403         k=0;
404         int sw=1;//if 1 then incomplete
405         while (n > 3) {
406                 ij = 0;
407                 for(i=1;i<n;i++)
408                  for(j=i+1;j<=n;j++)
409                   {newR[give_index(i,j,n)]=0;
410                    newS[give_index(i,j,n)]=0;
411                   }
412                 smallest_S = -1e50;
413                 if(sw==0)
414                     for(i=1;i<=n;i++)
415                        {S[i]=0;
416                        }
417                 B=n-2;
418                 if(sw==1)
419                      {
420                       choosePair(D,n,R,s,&sw,&OTU1,&OTU2,fS);
421                      }
422                  else{ //Rprintf("distance matrix is now complete\n");
423                         for (i=1;i<=n;i++)
424                          for(j=1;j<=n;j++)
425                            {if(i==j)continue;
426                              //Rprintf("give_index(%i,%i)=%i\n",i,j,give_index(i,j,n));
427                              //Rprintf("D[%i,%i]=%f\n",i,j,D[give_index(i,j,n)]);
428                              S[i]+=D[give_index(i,j,n)];
429                            }
430                         B=n-2;
431                         //Rprintf("n=%i,B=%f",n,B);
432                         for (i = 1; i < n; i++) {
433                          for (j = i + 1; j <= n; j++) {
434                              //Rprintf("S[%i]=%f, S[%i]=%f, D[%i,%i]=%f, B=%f",i,S[i],j,S[j],i,j,D[give_index(i,j,n)],B);
435                                 A=S[i]+S[j]-B*D[give_index(i,j,n)];
436                                 //Rprintf("Q[%i,%i]=%f\n",i,j,A);
437                                 if (A > smallest_S) {
438                                         OTU1 = i;
439                                         OTU2 = j;
440                                         smallest_S = A;
441                                         smallest = ij;
442                                 }
443                                 ij++;
444                         }
445                         }
446                      }
447
448                 /*Rprintf("agglomerating %i and %i, Q=%f \n",OTU1,OTU2,smallest_S);
449
450                 for(i=1;i<n;i++)
451                   {
452                     for(j=i+1;j<=n;j++)
453                       {
454                         Rprintf("R[%i,%i]=%f ",i,j,R[give_index(i,j,n)]);
455                       }
456                     Rprintf("\n");
457                   }
458
459                 for(i=1;i<n;i++)
460                   {
461                     for(j=i+1;j<=n;j++)
462                       {
463                         Rprintf("s[%i,%i]=%i ",i,j,s[give_index(i,j,n)]);
464                       }
465                     Rprintf("\n");
466                   }
467
468                 for(i=1;i<n;i++)
469                   {
470                     for(j=i+1;j<=n;j++)
471                       {
472                         Rprintf("d[%i,%i]=%f ",i,j,D[give_index(i,j,n)]);
473                       }
474                     Rprintf("\n");
475                   }*/
476                 //update Rxy and Sxy, only if matrix still incomplete
477                 if(sw==1)
478                 for(i=1;i<n;i++)
479                 {if(i==OTU1 || i==OTU2)continue;
480                  for(j=i+1;j<=n;j++)
481                   {if(j==OTU1 || j==OTU2)continue;
482                     if(D[give_index(i,j,n)]==-1)continue;
483                      if(D[give_index(i,OTU1,n)]!=-1 && D[give_index(j,OTU1,n)]!=-1)
484                       {//OTU1 was considered for Rij, so now subtract
485                        R[give_index(i,j,n)]-=(D[give_index(i,OTU1,n)]+D[give_index(j,OTU1,n)]);
486                        s[give_index(i,j,n)]--;
487                       }
488                      if(D[give_index(i,OTU2,n)]!=-1 && D[give_index(j,OTU2,n)]!=-1)
489                       {//OTU2 was considered for Rij, so now subtract
490                        R[give_index(i,j,n)]-=(D[give_index(i,OTU2,n)]+D[give_index(j,OTU2,n)]);
491                        s[give_index(i,j,n)]--;
492                       }
493                   }
494                 }
495
496                 edge2[k] = otu_label[OTU1];
497                 edge2[k + 1] = otu_label[OTU2];
498                 edge1[k] = edge1[k + 1] = cur_nod;
499
500                 /* get the distances between all OTUs but the 2 selected ones
501                    and the latter:
502                    a) get the sum for both
503                    b) compute the distances for the new OTU */
504
505                 double sum=0;
506
507                 for(i=1;i<=n;i++)
508                  {if(i==OTU1 || i==OTU2)continue;
509                  if(D[give_index(OTU1,i,n)]==-1 || D[give_index(OTU2,i,n)]==-1)continue;
510                  sum+=(D[give_index(OTU1,i,n)]-D[give_index(OTU2,i,n)]);
511                  }
512                 //although s was updated above, s[otu1,otu2] has remained unchanged
513                 //so it is safe to use it here
514                 //if complete distanes, use N-2, else use S
515                 int down=B;
516                 if(sw==1){down=s[give_index(OTU1,OTU2,n)]-2;}
517                 if(down<=0)
518                   {error("distance information insufficient to construct a tree, leaves %i and %i isolated from tree",OTU1,OTU2);
519                   }
520                 //Rprintf("down=%i\n",down);
521                 sum*=(1.0/(2*(down)));
522                 //Rprintf("sum=%f\n",sum);
523                 double dxy=D[give_index(OTU1,OTU2,n)]/2;
524
525                 //Rprintf("R[%i,%i]:%f \n",OTU1,OTU2,sum);
526                 edge_length[k] = dxy+sum;//OTU1
527                 //Rprintf("l1:%f \n",edge_length[k]);
528                 edge_length[k + 1] = dxy-sum;//OTU2
529                 //Rprintf("l2:%f \n",edge_length[k+1]);
530                //no need to change distance matrix update for complete distance
531                //case, as pairs will automatically fall in the right cathegory
532                 A = D[give_index(OTU1,OTU2,n)];
533                 ij = 0;
534                 for (i = 1; i <= n; i++) {
535                         if (i == OTU1 || i == OTU2) continue;
536                         if(D[give_index(OTU1,i,n)]!=-1 && D[give_index(OTU2,i,n)]!=-1)
537                          {
538                             new_dist[ij]=0.5*(D[give_index(OTU1,i,n)]-edge_length[k]+D[give_index(OTU2,i,n)]-edge_length[k+1]);
539                          }else{
540                          if(D[give_index(OTU1,i,n)]!=-1)
541                                 {
542                                  new_dist[ij]=D[give_index(OTU1,i,n)]-edge_length[k];
543                                 }else{
544                                       if(D[give_index(OTU2,i,n)]!=-1)
545                                         {
546                                             new_dist[ij]=D[give_index(OTU2,i,n)]-edge_length[k+1];
547                                         }else{new_dist[ij]=-1;}
548                                      }
549                               }
550
551                         ij++;
552                 }
553
554                 for (i = 1; i < n; i++) {
555                         if (i == OTU1 || i == OTU2) continue;
556                         for (j = i + 1; j <= n; j++) {
557                                 if (j == OTU1 || j == OTU2) continue;
558                                 new_dist[ij] = D[DINDEX(i, j)];
559                                 ij++;
560                         }
561                 }
562
563                 /*for(i=1;i<n-1;i++)
564                 {
565                   for(j=i+1;j<=n-1;j++)
566                    {Rprintf("%f ",new_dist[give_index(i,j,n-1)]);
567                    }
568                   Rprintf("\n");
569                 }*/
570                 //compute Rui, only if distance matrix is still incomplete
571                 ij=0;
572                 if(sw==1)
573                 for(i=2;i<n;i++)
574                   {
575                    ij++;
576                    if(new_dist[give_index(i,1,n-1)]==-1)continue;
577
578                    for(j=1;j<n;j++)
579                      { if(j==1 || j==i)
580                        {
581                          if(i!=j)newR[give_index(1,i,n-1)]+=new_dist[give_index(i,j,n-1)];
582                          if(j!=1)newR[give_index(1,i,n-1)]+=new_dist[give_index(1,j,n-1)];
583                          newS[give_index(1,i,n-1)]++;
584                          continue;
585                        }
586                        if(new_dist[give_index(i,j,n-1)]!=-1 && new_dist[give_index(1,j,n-1)]!=-1)
587                         {
588                           newS[give_index(1,i,n-1)]++;
589                           newR[give_index(1,i,n-1)]+=new_dist[give_index(i,j,n-1)];
590                           newR[give_index(1,i,n-1)]+=new_dist[give_index(1,j,n-1)];
591                         }
592                      }
593                   }
594                 //fill in the rest of R and S, again only if distance matrix still
595                 //incomplete
596                 if(sw==1) /* added 2012-04-02 */
597                 for(i=1;i<n;i++)
598                 {if(i==OTU1 || i==OTU2)continue;
599                  for(j=i+1;j<=n;j++)
600                   {if(j==OTU1 || j==OTU2)continue;
601                    newR[ij]=R[give_index(i,j,n)];
602                    newS[ij]=s[give_index(i,j,n)];
603                    ij++;
604                   }
605                 }
606                 //update newR and newS with the new taxa, again only if distance
607                 //matrix is still incomplete
608                 if(sw==1)
609                 for(i=2;i<n-1;i++)
610                 {if(new_dist[give_index(1,i,n-1)]==-1)continue;
611                  for(j=i+1;j<=n-1;j++)
612                   {if(new_dist[give_index(1,j,n-1)]==-1)continue;
613                     if(new_dist[give_index(i,j,n-1)]==-1)continue;
614                    newR[give_index(i,j,n-1)]+=(new_dist[give_index(1,i,n-1)]+new_dist[give_index(1,j,n-1)]);
615                    newS[give_index(i,j,n-1)]++;
616                   }
617                 }
618
619                 /* update before the next loop
620                    (we are sure that OTU1 < OTU2) */
621                 if (OTU1 != 1)
622                         for (i = OTU1; i > 1; i--)
623                                 otu_label[i] = otu_label[i - 1];
624                 if (OTU2 != n)
625                         for (i = OTU2; i < n; i++)
626                                 otu_label[i] = otu_label[i + 1];
627                 otu_label[1] = cur_nod;
628
629                 n--;
630                 for (i = 0; i < n*(n - 1)/2; i++)
631                   {
632                     D[i] = new_dist[i];
633                     if(sw==1)
634                        {
635                         R[i] = newR[i];
636                         s[i] = newS[i];
637                        }
638                   }
639                 cur_nod--;
640                 k = k + 2;
641         }
642         int dK=0;//number of known distances in final distance matrix
643         int iUK=-1;//index of unkown distance, if we have one missing distance
644         int iK=-1;//index of only known distance, only needed if dK==1
645         for (i = 0; i < 3; i++) {
646                 edge1[*N*2 - 4 - i] = cur_nod;
647                 edge2[*N*2 - 4 - i] = otu_label[i + 1];
648                 if(D[i]!=-1){dK++;iK=i;}else{iUK=i;}
649         }
650         if(dK==2)
651          {//if two distances are known: assume our leaves are x,y,z, d(x,z) unknown
652           //and edge weights of three edges are a,b,c, then any b,c>0 that
653           //satisfy c-b=d(y,z)-d(x,y) a+c=d(y,z) are good edge weights, but for
654           //simplicity we assume a=c if d(yz)<d(xy) a=b otherwise, and after some
655           //algebra we get that we can set the missing distance equal to the
656           //maximum of the already present distances
657             double max=-1e50;
658           for(i=0;i<3;i++)
659             {if(i==iUK)continue;
660              if(D[i]>max)max=D[i];
661             }
662           D[iUK]=max;
663          }
664         if(dK==1)
665          {//through similar motivation as above, if we have just one known distance
666           //we set the other two distances equal to it
667           for(i=0;i<3;i++)
668             {if(i==iK)continue;
669              D[i]=D[iK];
670             }
671          }
672         if(dK==0)
673          {//no distances are known, we just set them to 1
674           for(i=0;i<3;i++)
675            {D[i]=1;
676            }
677          }
678         edge_length[*N*2 - 4] = (D[0] + D[1] - D[2])/2;
679         edge_length[*N*2 - 5] = (D[0] + D[2] - D[1])/2;
680         edge_length[*N*2 - 6] = (D[2] + D[1] - D[0])/2;
681 }