6 *****************************************************************************
8 * Copyright (c) 2004, Luke Sheneman
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
15 * + Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * + Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in
19 * the documentation and/or other materials provided with the
21 * + The names of its contributors may not be used to endorse or promote
22 * products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
37 *****************************************************************************
42 * sheneman@cs.uidaho.edu
47 #ifndef _INC_CLEARCUT_H_
48 #define _INC_CLEARCUT_H_ 1
54 #define NJ_VERSION "1.0.9"
57 #define NJ_INTERNAL_NODE -1
60 #define NJ_INPUT_MODE_UNKNOWN 0
61 #define NJ_INPUT_MODE_DISTANCE 100
62 #define NJ_INPUT_MODE_UNALIGNED_SEQUENCES 101
63 #define NJ_INPUT_MODE_ALIGNED_SEQUENCES 102
65 #define NJ_MODEL_NONE 100
66 #define NJ_MODEL_JUKES 101
67 #define NJ_MODEL_KIMURA 102
73 * DMAT - Distance Matrix
75 * This is arguably the most important structure in the
76 * program. This is the distance matrix, and it is used
77 * by many functions throughout the application.
79 * The matrix is architected as a contiguously allocated
80 * upper-diagonal matrix of floats which include the
86 * 0 0.0 1.0 0.3 0.2 0.1 0.3
87 * 1 0.0 0.3 0.2 0.1 0.8
93 * The distance matrix shrinks with every join operation,
94 * so I track the original and working size of the matrix
97 * One fast optimization to shrink the distance matrix
98 * involves incrementing the "val" pointer. Thus, in
99 * addition to tracking the pointer to the distances,
100 * I also track the original pointer to that I can
101 * free the memory associated with the working distance
104 * This also applies to the r and r2 vectors which are
105 * used to compute the transformed distances in the
110 typedef struct _STRUCT_DMAT {
112 long int ntaxa; /* the original size of the distance matrix */
113 long int size; /* the current/effective size of the distance matrix */
115 char **taxaname; /* a pointer to an array of taxa name strings */
117 float *val; /* the distances */
118 float *valhandle; /* to track the orig. pointer to free memory */
120 float *r, *r2; /* r and r2 vectors (used to compute transformed dists) */
121 float *rhandle, *r2handle; /* track orig. pointers to free memory */
128 * NJ_TREE - The Tree Data Structure
131 * The tree is represented internally as a rooted
132 * binary tree. Each internal node has a left and a right child.
134 * Additionally, I track the distance between the current node
135 * and that node's parent (i.e. the branch length).
137 * Finally, I track the index of the taxa for leaf nodes.
140 typedef struct _STRUCT_NJ_TREE {
142 struct _STRUCT_NJ_TREE *left; /* left child */
143 struct _STRUCT_NJ_TREE *right; /* right child */
145 float dist; /* branch length. i.e. dist from node to parent */
147 long int taxa_index; /* for terminal nodes, track the taxon index */
156 * This structure is used for building trees. It is a vector
157 * which, represents the center of the star when building the RNJ/NJ
158 * tree through star-decomposition.
160 * It contains a vector of tree (node) pointers. These pointers
161 * get joined together by a new internal node, and the new internal
162 * node is placed back into the vector of nodes (which is now smaller).
164 * To keep this vector in sync. with the shrinking matrix, parts of
165 * the vector are shuffled around, and so a pointer to the originally
166 * allocated vector is stored such that it can be freed from memory
169 * The original and working sizes of the vector are also tracked.
172 typedef struct _STRUCT_NJ_VERTEX {
175 NJ_TREE **nodes_handle; /* original memory handle for freeing */
177 long int nactive; /* number of active nodes in the list */
178 long int size; /* the total size of the vertex */
187 /* some function prototypes */
189 /* core function for performing Relaxed Neighbor Joining */
191 NJ_relaxed_nj(NJ_ARGS *nj_args, DMAT *dmat);
193 /* function for performing traditional Neighbor-Joining */
195 NJ_neighbor_joining(NJ_ARGS *nj_args, DMAT *dmat);
197 /* print the distance matrix (for debugging) */
199 NJ_print_distance_matrix(DMAT *dmat);
201 /* output the computed tree to stdout or to the specified file */
203 NJ_output_tree(NJ_ARGS *nj_args,
208 /* the recursive function for outputting trees */
210 NJ_output_tree2(FILE *fp,
216 /* initialize vertex */
218 NJ_init_vertex(DMAT *dmat);
220 /* used to decompose the star topology and build the tree */
222 NJ_decompose(DMAT *dmat,
228 /* print the vertex vector (for debugging) */
230 NJ_print_vertex(NJ_VERTEX *vertex);
232 /* print taxa names (for debugging) */
234 NJ_print_taxanames(DMAT *dmat);
236 /* initialize r-vector prior to RNJ/NJ */
238 NJ_init_r(DMAT *dmat);
240 /* print the r-vector (for debugging) */
242 NJ_print_r(DMAT *dmat);
244 /* shuffle the distance matrix, usually after reading in input */
246 NJ_shuffle_distance_matrix(DMAT *dmat);
248 /* free memory from the tree */
250 NJ_free_tree(NJ_TREE *node);
252 /* print permutations (for debugging) */
254 NJ_print_permutation(long int *perm,
257 /* duplicate a distance matrix for multiple iterations */
259 NJ_dup_dmat(DMAT *src);
261 /* free the distance matrix */
263 NJ_free_dmat(DMAT *dmat);
265 /* free the vertex vector */
267 NJ_free_vertex(NJ_VERTEX *vertex);
269 /* for computing the global minimum transformed distance in traditional NJ */
271 NJ_min_transform(DMAT *dmat,
275 #endif /* _INC_CLEARCUT_H_ */