X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=src%2FBIONJ.c;h=56f2e51a0953a9b9679377ad79bfd4d52de9a7d5;hb=2014b83971be4b9cd1644d6127837df798e9335c;hp=d615c9136b139de800c8a11128c86634155b2d66;hpb=c827059eeafc8cbe41c812b26979543ab287803e;p=ape.git diff --git a/src/BIONJ.c b/src/BIONJ.c index d615c91..56f2e51 100644 --- a/src/BIONJ.c +++ b/src/BIONJ.c @@ -1,816 +1,341 @@ -/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; ; -; BIONJ program ; -; ; -; Olivier Gascuel ; -; ; -; GERAD - Montreal- Canada ; -; olivierg@crt.umontreal.ca ; -; ; -; LIRMM - Montpellier- France ; -; gascuel@lirmm.fr ; -; ; -; UNIX version, written in C ; -; by Hoa Sien Cuong (Univ. Montreal) ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - -//#include "BIONJ.h" +/* BIONJ.c 2012-04-30 */ + +/* Copyright 2007-2008 Olivier Gascuel, Hoa Sien Cuong, + R port by Vincent Lefort and Emmanuel Paradis */ + +/* This file is part of the R-package `ape'. */ +/* See the file ../COPYING for licensing issues. */ + +/* BIONJ program + + Olivier Gascuel + + GERAD - Montreal- Canada + olivierg@crt.umontreal.ca + + LIRMM - Montpellier- France + gascuel@lirmm.fr + + UNIX version, written in C + by Hoa Sien Cuong (Univ. Montreal) */ + #include "me.h" -void Initialize(float **delta, double *X, char **labels, int n, POINTERS *trees); -void Print_outputChar(int i, POINTERS *trees, char *output); -void bionj (double *X, int *N, char **labels, char **treeStr); -int Symmetrize(float **delta, int n); -void Concatenate(char chain1[MAX_LABEL_LENGTH], int ind, POINTERS *trees, int post); +void Initialize(float **delta, double *X, int n); +void bionj(double *X, int *N, int *edge1, int *edge2, double *el); float Distance(int i, int j, float **delta); float Variance(int i, int j, float **delta); int Emptied(int i, float **delta); float Sum_S(int i, float **delta); void Compute_sums_Sx(float **delta, int n); void Best_pair(float **delta, int r, int *a, int *b, int n); -float Finish_branch_length(int i, int j, int k, float **delta); -void FinishStr (float **delta, int n, POINTERS *trees, char *StrTree); float Agglomerative_criterion(int i, int j, float **delta, int r); float Branch_length(int a, int b, float **delta, int r); float Reduction4(int a, float la, int b, float lb, int i, float lamda, float **delta); float Reduction10(int a, int b, int i, float lamda, float vab, float **delta); float Lamda(int a, int b, float vab, float **delta, int n, int r); -/* void printMat(float **delta, int n); */ - -/*;;;;;;;;;;; INPUT, OUTPUT, INITIALIZATION ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; ; -; ; -; The delta matrix is read from the input-file. ; -; It is recommended to put it and the executable in ; -; a special directory. The input-file and output-file ; -; can be given as arguments to the executable by ; -; typing them after the executable (Bionj input-file ; -; output-file) or by typing them when asked by the ; -; program. The input-file has to be formated according ; -; the PHYLIP standard. The output file is formated ; -; according to the NEWWICK standard. ; -; ; -; The lower-half of the delta matrix is occupied by ; -; dissimilarities. The upper-half of the matrix is ; -; occupied by variances. The first column ; -; is initialized as 0; during the algorithm some ; -; indices are no more used, and the corresponding ; -; positions in the first column are set to 1. ; -; ; -; This delta matix is made symmetrical using the rule: ; -; Dij = Dji <- (Dij + Dji)/2. The diagonal is set to 0; ; -; during the further steps of the algorithm, it is used ; -; to store the sums Sx. ; -; ; -; A second array, trees, is used to store taxon names. ; -; During the further steps of the algoritm, some ; -; positions in this array are emptied while the others ; -; are used to store subtrees. ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - - -/*;;;;;;;;;;;;;;;;;;;;;;;;;; Initialize ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Description : This function reads an input file and return the ; -; delta matrix and trees: the list of taxa. ; -; ; -; input : ; -; float **delta : delta matrix ; -; FILE *input : pointer to input file ; -; int n : number of taxa ; -; char **trees : list of taxa ; -; ; -; return value: ; -; float **delta : delta matrix ; -; char *trees : list of taxa ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - -//void Initialize(float **delta, FILE *input, int n, POINTERS *trees) -void Initialize(float **delta, double *X, char **labels, int n, POINTERS *trees) -{ - int lig; /* matrix line */ - int col; /* matrix column */ -// float distance; - //char name_taxon[LEN]; /* taxon name */ - char name_taxon[MAX_LABEL_LENGTH]; - char format[MAX_DIGITS]; - WORD *name; - - snprintf (format, MAX_DIGITS, "%%%ds", MAX_LABEL_LENGTH); - - for(lig=1; lig <= n; lig++) - { - //fscanf(input,"%s",name_taxon); /* read taxon name */ - //fscanf (input, format, name_taxon); /* read taxon name */ - strncpy (name_taxon, labels[lig-1], MAX_LABEL_LENGTH); - name=(WORD *)calloc(1,sizeof(WORD)); /* taxon name is */ - if(name == NULL) /* put in trees */ - { - Rprintf("Out of memories !!"); - exit(0); - } - else - { - strncpy (name->name, name_taxon, MAX_LABEL_LENGTH); - name->suiv=NULL; - trees[lig].head=name; - trees[lig].tail=name; - for(col=lig; col <= n; col++) - { - //fscanf(input,"%f",&distance); /* read the distance */ -// &distance = X[XINDEX(lig,col)]; - delta[col][lig]=X[XINDEX(lig,col)]; - delta[lig][col]=X[XINDEX(lig,col)]; - if (lig==col) - delta[lig][col]=0; - } - } - } - return; -} +/* INPUT, OUTPUT, INITIALIZATION -/*;;;;;;;;;;;;;;;;;;;;;;;;;;; Print_output;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; ; -; Description : This function prints out the tree in the output file. ; -; ; -; input : ; -; POINTERS *trees : pointer to the subtrees. ; -; int i : indicate the subtree i to be printed. ; -: FILE *output : pointer to the output file. ; -; ; -; return value: The phylogenetic tree in the output file. ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - -void Print_outputChar(int i, POINTERS *trees, char *output) -{ - WORD *parcour; - parcour=trees[i].head; - while (parcour != NULL && (strlen (output) + strlen (parcour->name) < MAX_INPUT_SIZE)) - { - output = strncat (output, parcour->name, strlen (parcour->name)); - parcour=parcour->suiv; - } - return; -} + The lower-half of the delta matrix is occupied by + dissimilarities. The upper-half of the matrix is + occupied by variances. The first column + is initialized as 0; during the algorithm some + indices are no more used, and the corresponding + positions in the first column are set to 1. */ + +/* -- Initialize -- + + This function reads an input data and returns the delta matrix -/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -; ; -; Main program ; -; ; -; argc is the number of arguments ; -; **argv contains the arguments: ; -; the first argument has to be BIONJ; ; -; the second is the inptu-file; ; -; the third is the output-file. ; -; When the input and output files are ; -; not given, the user is asked for them. ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - - -//tree *bionj (FILE *input, boolean isNJ) -void bionj (double *X, int *N, char **labels, char **treeStr) + input: float **delta : delta matrix + double *X : distances sent from R as a lower triangle matrix + int n : number of taxa + + output: float **delta : delta matrix initialized */ + +void Initialize(float **delta, double *X, int n) { - POINTERS *trees; /* list of subtrees */ - tree *T; /* the returned tree */ - char *chain1; /* stringized branch-length */ -// char *chain2; /* idem */ - char *str; /* the string containing final tree */ - int *a, *b; /* pair to be agglomerated */ - float **delta; /* delta matrix */ - float la; /* first taxon branch-length */ - float lb; /* second taxon branch-length */ - float vab; /* variance of Dab */ - float lamda = 0.5; - int i; -// int ok; - int r; /* number of subtrees */ - int n; /* number of taxa */ - int x, y; -// double t; - a=(int*)calloc(1,sizeof(int)); - b=(int*)calloc(1,sizeof(int)); - chain1=(char *)calloc(MAX_LABEL_LENGTH,sizeof(char)); - str = (char *)R_alloc(MAX_INPUT_SIZE, sizeof(char)); - /* added by EP */ - if (strlen(str)) - strncpy(str, "", strlen(str)); - /* end */ - -// fscanf(input,"%d",&n); - n = *N; - /* Create the delta matrix */ - delta=(float **)calloc(n+1,sizeof(float*)); - for(i=1; i<= n; i++) - { - delta[i]=(float *)calloc(n+1, sizeof(float)); - if(delta[i] == NULL) - { - Rprintf("Out of memories!!"); - exit(0); - } - } - trees=(POINTERS *)calloc(n+1,sizeof(POINTERS)); - if(trees == NULL) - { - Rprintf("Out of memories!!"); - exit(0); - } - /* initialise and symmetrize the running delta matrix */ - r=n; - *a=0; - *b=0; - Initialize(delta, X, labels, n, trees); -// ok=Symmetrize(delta, n); - -// if(!ok) -// Rprintf("\n The matrix is not symmetric.\n "); - while (r > 3) /* until r=3 */ - { - Compute_sums_Sx(delta, n); /* compute the sum Sx */ - Best_pair(delta, r, a, b, n); /* find the best pair by */ - vab=Variance(*a, *b, delta); /* minimizing (1) */ - la=Branch_length(*a, *b, delta, r); /* compute branch-lengths */ - lb=Branch_length(*b, *a, delta, r); /* using formula (2) */ -// if (!isNJ) - lamda=Lamda(*a, *b, vab, delta, n, r); /* compute lambda* using (9)*/ - for(i=1; i <= n; i++) - { - if(!Emptied(i,delta) && (i != *a) && (i != *b)) - { - if(*a > i) - { - x=*a; - y=i; - } - else - { - x=i; - y=*a; /* apply reduction formulae */ - } /* 4 and 10 to delta */ - delta[x][y]=Reduction4(*a, la, *b, lb, i, lamda, delta); - delta[y][x]=Reduction10(*a, *b, i, lamda, vab, delta); - } - } - strncpy(chain1,"",1); /* agglomerate the subtrees */ - strncat(chain1,"(",1); /* a and b together with the*/ - Concatenate(chain1, *a, trees, 0); /* branch-lengths according */ - strncpy(chain1,"",1); /* to the NEWWICK format */ - strncat(chain1,":",1); - snprintf(chain1+strlen(chain1),MAX_LABEL_LENGTH,"%f",la); - - strncat(chain1,",",1); - Concatenate(chain1,*a, trees, 1); - trees[*a].tail->suiv=trees[*b].head; - trees[*a].tail=trees[*b].tail; - strncpy(chain1,"",1); - strncat(chain1,":",1); - snprintf(chain1+strlen(chain1),MAX_LABEL_LENGTH,"%f",lb); - - strncat(chain1,")",1); - Concatenate(chain1, *a, trees, 1); - delta[*b][0]=1.0; /* make the b line empty */ - trees[*b].head=NULL; - trees[*b].tail=NULL; - r=r-1; - } - - FinishStr (delta, n, trees, str); /* compute the branch-lengths*/ - /* of the last three subtrees*/ - /* and print the tree in the */ - /* output-file */ - T = readNewickString (str, n); - T = detrifurcate(T); -// compareSets(T,species); - partitionSizes(T); - *treeStr = (char *)R_alloc(MAX_INPUT_SIZE, sizeof(char)); - /* added by EP */ - if (strlen(*treeStr)) - strncpy(*treeStr, "", strlen(*treeStr)); - /* end */ - NewickPrintTreeStr (T, *treeStr); - - for(i=1; i<=n; i++) - { - delta[i][0]=0.0; - trees[i].head=NULL; - trees[i].tail=NULL; - } - free(delta); - free(trees); - /* free (str); */ - free (chain1); - free (a); - free (b); - freeTree(T); - T = NULL; -// return (ret); - return; + int i, j; /* matrix line and column indices */ + int k = 0; /* index along X */ + + for (i = 1; i < n; i++) + for (j = i + 1; j <= n; j++) + delta[i][j] = delta[j][i] = X[k++]; + + for (i = 1; i <= n; i++) + delta[i][i] = delta[i][0] = 0; } -/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Utilities ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - - - -/*;;;;;;;;;;;;;;;;;;;;;;;;;;; Symmetrize ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Description : This function verifies if the delta matrix is symmetric; ; -; if not the matrix is made symmetric. ; -; ; -; input : ; -; float **delta : delta matrix ; -; int n : number of taxa ; -; ; -; return value: ; -; int symmetric : indicate if the matrix has been made ; -; symmetric or not ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - -int Symmetrize(float **delta, int n) +void bionj(double *X, int *N, int *edge1, int *edge2, double *el) { - int lig; /* matrix line */ - int col; /* matrix column */ - float value; /* symmetrized value */ - int symmetric; - - symmetric=1; - for(lig=1; lig <= n; lig++) - { - for(col=1; col < lig; col++) - { - if(delta[lig][col] != delta[col][lig]) - { - value= (delta[lig][col]+delta[col][lig])/2; - delta[lig][col]=value; - delta[col][lig]=value; - symmetric=0; - } - } - } - return(symmetric); -} + int *a, *b; /* pair to be agglomerated */ + float **delta; /* delta matrix */ + float la; /* first taxon branch-length */ + float lb; /* second taxon branch-length */ + float vab; /* variance of Dab */ + float lamda = 0.5; + + int r; /* number of subtrees */ + int n; /* number of taxa */ + int i, x, y, curnod, k; + + int *ilab; /* indices of the tips (used as "labels") */ + + a = (int*)R_alloc(1, sizeof(int)); + b = (int*)R_alloc(1, sizeof(int)); + + n = *N; + + /* Create the delta matrix */ + delta = (float **)R_alloc(n + 1, sizeof(float*)); + for (i = 1; i <= n; i++) + delta[i] = (float *)R_alloc(n + 1, sizeof(float)); + + /* initialise */ + r = n; + *a = *b = 0; + Initialize(delta, X, n); + + ilab = (int *)R_alloc(n + 1, sizeof(int)); + for (i = 1; i <= n; i++) ilab[i] = i; + + curnod = 2 * n - 2; + k = 0; + + while (r > 3) { + + Compute_sums_Sx(delta, n); /* compute the sum Sx */ + Best_pair(delta, r, a, b, n); /* find the best pair by */ + vab = Variance(*a, *b, delta); /* minimizing (1) */ + la = Branch_length(*a, *b, delta, r); /* compute branch-lengths */ + lb = Branch_length(*b, *a, delta, r); /* using formula (2) */ + + lamda = Lamda(*a, *b, vab, delta, n, r); /* compute lambda* using (9)*/ + + edge1[k] = edge1[k + 1] = curnod; + edge2[k] = ilab[*a]; + edge2[k + 1] = ilab[*b]; + el[k] = la; + el[k + 1] = lb; + k = k + 2; + + for (i = 1; i <= n; i++) { + if (Emptied(i,delta) || (i == *a) || (i == *b)) continue; + if(*a > i) { + x = *a; + y = i; + } else { + x = i; + y = *a; + } + + /* apply reduction formulae 4 and 10 to delta */ + delta[x][y] = Reduction4(*a, la, *b, lb, i, lamda, delta); + delta[y][x] = Reduction10(*a, *b, i, lamda, vab, delta); + } + delta[*b][0] = 1.0; /* make the b line empty */ + ilab[*a] = curnod; + curnod--; + r = r - 1; + } + /* finalise the three basal edges */ + int last[3]; + i = 1; + k = 0; + while (k < 3) { + if (!Emptied(i, delta)) last[k++] = i; + i++; + } + for (i = 0, k = 2 * n - 4; i < 3; i++, k--) { + edge1[k] = curnod; /* <- the root at this stage */ + edge2[k] = ilab[last[i]]; + } -/*;;;;;;;;;;;;;;;;;;;;;;;;;;; Concatenate ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; ; -; Description : This function concatenates a string to another. ; -; ; -; input : ; -; char *chain1 : the string to be concatenated. ; -; int ind : indicate the subtree to which concatenate the ; -; string ; -; POINTERS *trees : pointer to subtrees. ; -; int post : position to which concatenate (front (0) or ; -; end (1)) ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ + double D[3]; + D[0] = Distance(last[0], last[1], delta); + D[1] = Distance(last[0], last[2], delta); + D[2] = Distance(last[1], last[2], delta); -//void Concatenate(char chain1[LEN], int ind, POINTERS *trees, int post) -void Concatenate(char chain1[MAX_LABEL_LENGTH], int ind, POINTERS *trees, int post) -{ - WORD *bran; - - bran=(WORD *)calloc(1,sizeof(WORD)); - if(bran == NULL) - { - Rprintf("Out of memories"); - exit(0); - } - else - { - strncpy(bran->name,chain1,MAX_LABEL_LENGTH); - bran->suiv=NULL; - } - if(post == 0) - { - bran->suiv=trees[ind].head; - trees[ind].head=bran; - } - else - { - trees[ind].tail->suiv=bran; - trees[ind].tail=trees[ind].tail->suiv; - } + el[2 * n - 4] = (D[0] + D[1] - D[2])/2; + el[2 * n - 5] = (D[0] + D[2] - D[1])/2; + el[2 * n - 6] = (D[2] + D[1] - D[0])/2; } +/* -- Distance -- + + This function retrieves and returns the distance between taxa + i and j from the delta matrix. -/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Distance;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Description : This function retrieve ant return de distance between taxa ; -; i and j from the delta matrix. ; -; ; -; input : ; -; int i : taxon i ; -; int j : taxon j ; -; float **delta : the delta matrix ; -; ; -; return value: ; -; float distance : dissimilarity between the two taxa ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ + input: int i : taxon i + int j : taxon j + float **delta : the delta matrix + + output: float distance : dissimilarity between the two taxa */ float Distance(int i, int j, float **delta) { - if(i > j) - return(delta[i][j]); - else - return(delta[j][i]); + if (i > j) return(delta[i][j]); + else return(delta[j][i]); } +/* -- Variance -- + + This function retrieves and returns the variance of the + distance between i and j, from the delta matrix. -/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Variance;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Description : This function retrieve and return the variance of the ; -; distance between i and j, from the delta matrix. ; -; ; -; input : ; -; int i : taxon i ; -; int j : taxon j ; -; float **delta : the delta matrix ; -; ; -; return value: ; -; float distance : the variance of Dij ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ + input: int i : taxon i + int j : taxon j + float **delta : the delta matrix + + output: float distance : the variance of Dij */ float Variance(int i, int j, float **delta) { - if(i > j) - return(delta[j][i]); - else - return(delta[i][j]); + if (i > j) return(delta[j][i]); + else return(delta[i][j]); } -/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Emptied ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Description : This function verifie if a line is emptied or not. ; -; ; -; input : ; -; int i : subtree (or line) i ; -; float **delta : the delta matrix ; -; ; -; return value: ; -; 0 : if not emptied. ; -; 1 : if emptied. ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - -int Emptied(int i, float **delta) /* test if the ith line is emptied */ -{ - return((int)delta[i][0]); -} +/* -- Emptied -- + + This function tests if a line is emptied or not. + input: int i : subtree (or line) i + float **delta : the delta matrix -/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Sum_S;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Description : This function retrieves the sum Sx from the diagonal ; -; of the delta matrix. ; -; ; -; input : ; -; int i : subtree i ; -; float **delta : the delta matrix ; -; ; -; return value: ; -; float delta[i][i] : sum Si ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - -float Sum_S(int i, float **delta) /* get sum Si form the diagonal */ + output: 0 : if not emptied + 1 : if emptied */ + +int Emptied(int i, float **delta) { - return(delta[i][i]); + return((int)delta[i][0]); } +/* -- Sum_S -- -/*;;;;;;;;;;;;;;;;;;;;;;;Compute_sums_Sx;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Description : This function computes the sums Sx and store them in the ; -; diagonal the delta matrix. ; -; ; -; input : ; -; float **delta : the delta matrix. ; -; int n : the number of taxa ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ + This function retrieves the sum Sx from the diagonal + of the delta matrix -void Compute_sums_Sx(float **delta, int n) + input: int i : subtree i + float **delta : the delta matrix + + output: float delta[i][i] : sum Si */ + +float Sum_S(int i, float **delta) { - float sum; - int i; - int j; - - for(i= 1; i <= n ; i++) - { - if(!Emptied(i,delta)) - { - sum=0; - for(j=1; j <=n; j++) - { - if(i != j && !Emptied(j,delta)) /* compute the sum Si */ - sum=sum + Distance(i,j,delta); - } - } - delta[i][i]=sum; /* store the sum Si in */ - } /* delta’s diagonal */ + return(delta[i][i]); } +/* -- Compute_sums_Sx -- -/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Best_pair;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Description : This function finds the best pair to be agglomerated by ; -; minimizing the agglomerative criterion (1). ; -; ; -; input : ; -; float **delta : the delta matrix ; -; int r : number of subtrees ; -; int *a : contain the first taxon of the pair ; -; int *b : contain the second taxon of the pair ; -; int n : number of taxa ; -; ; -; return value: ; -; int *a : the first taxon of the pair ; -; int *b : the second taxon of the pair ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ + This function computes the sums Sx and stores them in the + diagonal the delta matrix. -void Best_pair(float **delta, int r, int *a, int *b, int n) + input: float **delta : the delta matrix + int n : the number of taxa */ + + +void Compute_sums_Sx(float **delta, int n) { - float Qxy; /* value of the criterion calculated*/ - int x,y; /* the pair which is tested */ - float Qmin; /* current minimun of the criterion */ - - Qmin=1.0e300; - for(x=1; x <= n; x++) - { - if(!Emptied(x,delta)) - { - for(y=1; y < x; y++) - { - if(!Emptied(y,delta)) - { - Qxy=Agglomerative_criterion(x,y,delta,r); - if(Qxy < Qmin-0.000001) - { - Qmin=Qxy; - *a=x; - *b=y; - } + float sum; + int i, j; + + for (i = 1; i <= n ; i++) { + if (Emptied(i, delta)) continue; + sum = 0; + for (j = 1; j <= n; j++) { + if (i == j || Emptied(j, delta)) continue; + sum += Distance(i, j, delta); /* compute the sum Si */ } - } - } - } + delta[i][i] = sum; + } } -/*;;;;;;;;;;;;;;;;;;;;;;Finish_branch_length;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Description : Compute the length of the branch attached ; -; to the subtree i, during the final step ; -; ; -; input : ; -; int i : position of subtree i ; -; int j : position of subtree j ; -; int k : position of subtree k ; -; float **delta : ; -; ; -; return value: ; -; float length : The length of the branch ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - -float Finish_branch_length(int i, int j, int k, float **delta) -{ - float length; - length=0.5*(Distance(i,j,delta) + Distance(i,k,delta) - -Distance(j,k,delta)); - return(length); -} +/* -- Best_pair -- + This function finds the best pair to be agglomerated by + minimizing the agglomerative criterion (1). + + input: float **delta : the delta matrix + int r : number of subtrees + int *a : contain the first taxon of the pair + int *b : contain the second taxon of the pair + int n : number of taxa + + output: int *a : the first taxon of the pair + int *b : the second taxon of the pair */ -/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Finish;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Description : This function compute the length of the lasts three ; -; subtrees and write the tree in the output file. ; -; ; -; input : ; -; float **delta : the delta matrix ; -; int n : the number of taxa ; -; WORD *trees : list of subtrees ; -; ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ -/* -void Finish(float **delta, int n, POINTERS *trees, FILE *output) -{ - int l=1; - int i=0; - float length; - WORD *bidon; - WORD *ele; - int last[3]; // the last three subtrees - - while(l <= n) - { // find the last tree subtree - if(!Emptied(l, delta)) - { - last[i]=l; - i++; - } - l++; - } - - length=Finish_branch_length(last[0],last[1],last[2],delta); - fprintf(output,"("); - Print_output(last[0],trees,output); - fprintf(output,":"); -// gcvt(length,PREC, str); -// fprintf(output,"%s,",str); - fprintf(output,"%f,",length); - - length=Finish_branch_length(last[1],last[0],last[2],delta); - Print_output(last[1],trees,output); - fprintf(output,":"); -// gcvt(length,PREC, str); -// fprintf(output,"%s,",str); - fprintf(output,"%f,",length); - - length=Finish_branch_length(last[2],last[1],last[0],delta); - Print_output(last[2],trees,output); - fprintf(output,":"); -// gcvt(length,PREC,str); -// fprintf(output,"%s",str); - fprintf(output,"%f",length); - fprintf(output,");"); - fprintf(output,"\n"); - - for(i=0; i < 3; i++) - { - bidon=trees[last[i]].head; - ele=bidon; - while(bidon!=NULL) - { - ele=ele->suiv; - free(bidon); - bidon=ele; - } - } -} -*/ -void FinishStr (float **delta, int n, POINTERS *trees, char *StrTree) +void Best_pair(float **delta, int r, int *a, int *b, int n) { - int l=1; - int i=0; - float length; - char *tmp; - WORD *bidon; - WORD *ele; - int last[3]; /* the last three subtrees */ - - while(l <= n) - { /* find the last tree subtree */ - if(!Emptied(l, delta)) - { - last[i]=l; - i++; - } - l++; - } - tmp = (char*) calloc (12, sizeof(char)); - StrTree[0]='('; - - length=Finish_branch_length(last[0],last[1],last[2],delta); - Print_outputChar (last[0], trees, StrTree); - snprintf (tmp, 12, "%f,", length); - if (strlen (StrTree) + strlen (tmp) < MAX_INPUT_SIZE-1) { - strncat (StrTree, ":", 1); - strncat (StrTree, tmp, strlen (tmp)); - } - - length=Finish_branch_length(last[1],last[0],last[2],delta); - Print_outputChar (last[1], trees, StrTree); - snprintf (tmp, 12, "%f,", length); - if (strlen (StrTree) + strlen (tmp) < MAX_INPUT_SIZE-1) { - strncat (StrTree, ":", 1); - strncat (StrTree, tmp, strlen (tmp)); - } - - length=Finish_branch_length(last[2],last[1],last[0],delta); - Print_outputChar (last[2], trees, StrTree); - snprintf (tmp, 12, "%f", length); - if (strlen (StrTree) + strlen (tmp) < MAX_INPUT_SIZE-1) { - strncat (StrTree, ":", 1); - strncat (StrTree, tmp, strlen (tmp)); - } - - if (strlen (StrTree) < MAX_INPUT_SIZE-3) - strncat (StrTree, ");\n", 3); - - free (tmp); - for(i=0; i < 3; i++) - { - bidon=trees[last[i]].head; - ele=bidon; - while(bidon!=NULL) - { - ele=ele->suiv; - free(bidon); - bidon=ele; + float Qxy; /* value of the criterion calculated */ + int x, y; /* the pair which is tested */ + float Qmin; /* current minimun of the criterion */ + + Qmin = 1.0e300; + for (x = 1; x <= n; x++) { + if (Emptied(x, delta)) continue; + for (y = 1; y < x; y++) { + if (Emptied(y, delta)) continue; + Qxy = Agglomerative_criterion(x, y, delta, r); + if (Qxy < Qmin - 0.000001) { + Qmin = Qxy; + *a = x; + *b = y; + } + } } - } - return; } -/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*\ -; ; -; Formulae ; -; ; -\*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/ - +/* Formulae */ +/* Formula (1) */ float Agglomerative_criterion(int i, int j, float **delta, int r) { - float Qij; - Qij=(r-2)*Distance(i,j,delta) /* Formula (1) */ - -Sum_S(i,delta) - -Sum_S(j,delta); - - return(Qij); + return((r - 2) * Distance(i, j, delta) - + Sum_S(i, delta) - Sum_S(j, delta)); } - +/* Formula (2) */ float Branch_length(int a, int b, float **delta, int r) { - float length; - length=0.5*(Distance(a,b,delta) /* Formula (2) */ - +(Sum_S(a,delta) - -Sum_S(b,delta))/(r-2)); - return(length); + return(0.5 * (Distance(a, b, delta) + + (Sum_S(a, delta) - Sum_S(b, delta))/(r - 2))); } - +/* Formula (4) */ float Reduction4(int a, float la, int b, float lb, int i, float lamda, float **delta) { - float Dui; - Dui=lamda*(Distance(a,i,delta)-la) - +(1-lamda)*(Distance(b,i,delta)-lb); /* Formula (4) */ - return(Dui); + return(lamda * (Distance(a, i, delta) - la) + + (1 - lamda) * (Distance(b, i, delta) - lb)); } - +/* Formula (10) */ float Reduction10(int a, int b, int i, float lamda, float vab, float **delta) { - float Vci; - Vci=lamda*Variance(a,i,delta)+(1-lamda)*Variance(b,i,delta) - -lamda*(1-lamda)*vab; /*Formula (10) */ - return(Vci); + return(lamda * Variance(a, i, delta) + (1 - lamda) * Variance(b, i, delta) + - lamda * (1 - lamda) * vab); } float Lamda(int a, int b, float vab, float **delta, int n, int r) { - float lamda=0.0; - int i; - - if(vab==0.0) - lamda=0.5; - else - { - for(i=1; i <= n ; i++) - { - if(a != i && b != i && !Emptied(i,delta)) - lamda=lamda + (Variance(b,i,delta) - Variance(a,i,delta)); + float lamda = 0.0; + int i; + + if (vab == 0.0) lamda = 0.5; else { + for (i = 1; i <= n ; i++) { + if (a == i || b == i || Emptied(i, delta)) continue; + lamda += (Variance(b, i, delta) - Variance(a, i, delta)); + } + lamda = 0.5 + lamda/(2 * (r - 2) * vab); /* Formula (9) */ } - lamda=0.5 + lamda/(2*(r-2)*vab); - } /* Formula (9) and the */ - if(lamda > 1.0) /* constraint that lamda*/ - lamda = 1.0; /* belongs to [0,1] */ - if(lamda < 0.0) - lamda=0.0; - return(lamda); + if (lamda > 1.0) lamda = 1.0; /* force 0 < lamda < 1 */ + if (lamda < 0.0) lamda = 0.0; + return(lamda); } - -/* void printMat(float **delta, int n) */ -/* { */ -/* int i, j; */ -/* for (i=1; i<=n; i++) { */ -/* for (j=1; j<=n; j++) */ -/* Rprintf ("%f ", delta[i][j]); */ -/* Rprintf("\n"); */ -/* } */ -/* Rprintf("\n"); */ -/* return; */ -/* } */