9 *****************************************************************************
11 * Copyright (c) 2004, Luke Sheneman
12 * All rights reserved.
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
18 * + Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * + Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in
22 * the documentation and/or other materials provided with the
24 * + The names of its contributors may not be used to endorse or promote
25 * products derived from this software without specific prior
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
40 *****************************************************************************
45 * sheneman@cs.uidaho.edu
50 #ifndef _INC_CLEARCUT_H_
51 #define _INC_CLEARCUT_H_ 1
58 #define NJ_VERSION "1.0.9"
61 #define NJ_INTERNAL_NODE -1
64 #define NJ_INPUT_MODE_UNKNOWN 0
65 #define NJ_INPUT_MODE_DISTANCE 100
66 #define NJ_INPUT_MODE_UNALIGNED_SEQUENCES 101
67 #define NJ_INPUT_MODE_ALIGNED_SEQUENCES 102
69 #define NJ_MODEL_NONE 100
70 #define NJ_MODEL_JUKES 101
71 #define NJ_MODEL_KIMURA 102
77 * DMAT - Distance Matrix
79 * This is arguably the most important structure in the
80 * program. This is the distance matrix, and it is used
81 * by many functions throughout the application.
83 * The matrix is architected as a contiguously allocated
84 * upper-diagonal matrix of floats which include the
90 * 0 0.0 1.0 0.3 0.2 0.1 0.3
91 * 1 0.0 0.3 0.2 0.1 0.8
97 * The distance matrix shrinks with every join operation,
98 * so I track the original and working size of the matrix
101 * One fast optimization to shrink the distance matrix
102 * involves incrementing the "val" pointer. Thus, in
103 * addition to tracking the pointer to the distances,
104 * I also track the original pointer to that I can
105 * free the memory associated with the working distance
108 * This also applies to the r and r2 vectors which are
109 * used to compute the transformed distances in the
114 typedef struct _STRUCT_DMAT {
116 long int ntaxa; /* the original size of the distance matrix */
117 long int size; /* the current/effective size of the distance matrix */
119 char **taxaname; /* a pointer to an array of taxa name strings */
121 float *val; /* the distances */
122 float *valhandle; /* to track the orig. pointer to free memory */
124 float *r, *r2; /* r and r2 vectors (used to compute transformed dists) */
125 float *rhandle, *r2handle; /* track orig. pointers to free memory */
132 * NJ_TREE - The Tree Data Structure
135 * The tree is represented internally as a rooted
136 * binary tree. Each internal node has a left and a right child.
138 * Additionally, I track the distance between the current node
139 * and that node's parent (i.e. the branch length).
141 * Finally, I track the index of the taxa for leaf nodes.
144 typedef struct _STRUCT_NJ_TREE {
146 struct _STRUCT_NJ_TREE *left; /* left child */
147 struct _STRUCT_NJ_TREE *right; /* right child */
149 float dist; /* branch length. i.e. dist from node to parent */
151 long int taxa_index; /* for terminal nodes, track the taxon index */
160 * This structure is used for building trees. It is a vector
161 * which, represents the center of the star when building the RNJ/NJ
162 * tree through star-decomposition.
164 * It contains a vector of tree (node) pointers. These pointers
165 * get joined together by a new internal node, and the new internal
166 * node is placed back into the vector of nodes (which is now smaller).
168 * To keep this vector in sync. with the shrinking matrix, parts of
169 * the vector are shuffled around, and so a pointer to the originally
170 * allocated vector is stored such that it can be freed from memory
173 * The original and working sizes of the vector are also tracked.
176 typedef struct _STRUCT_NJ_VERTEX {
179 NJ_TREE **nodes_handle; /* original memory handle for freeing */
181 long int nactive; /* number of active nodes in the list */
182 long int size; /* the total size of the vertex */
187 /* some function prototypes */
188 int clearcut_main(int, char**);
190 /* core function for performing Relaxed Neighbor Joining */
192 NJ_relaxed_nj(NJ_ARGS *nj_args, DMAT *dmat);
194 /* function for performing traditional Neighbor-Joining */
196 NJ_neighbor_joining(NJ_ARGS *nj_args, DMAT *dmat);
198 /* print the distance matrix (for debugging) */
200 NJ_print_distance_matrix(DMAT *dmat);
202 /* output the computed tree to stdout or to the specified file */
204 NJ_output_tree(NJ_ARGS *nj_args,
209 /* the recursive function for outputting trees */
211 NJ_output_tree2(FILE *fp,
217 /* initialize vertex */
219 NJ_init_vertex(DMAT *dmat);
221 /* used to decompose the star topology and build the tree */
223 NJ_decompose(DMAT *dmat,
229 /* print the vertex vector (for debugging) */
231 NJ_print_vertex(NJ_VERTEX *vertex);
233 /* print taxa names (for debugging) */
235 NJ_print_taxanames(DMAT *dmat);
237 /* initialize r-vector prior to RNJ/NJ */
239 NJ_init_r(DMAT *dmat);
241 /* print the r-vector (for debugging) */
243 NJ_print_r(DMAT *dmat);
245 /* shuffle the distance matrix, usually after reading in input */
247 NJ_shuffle_distance_matrix(DMAT *dmat);
249 /* free memory from the tree */
251 NJ_free_tree(NJ_TREE *node);
253 /* print permutations (for debugging) */
255 NJ_print_permutation(long int *perm,
258 /* duplicate a distance matrix for multiple iterations */
260 NJ_dup_dmat(DMAT *src);
262 /* free the distance matrix */
264 NJ_free_dmat(DMAT *dmat);
266 /* free the vertex vector */
268 NJ_free_vertex(NJ_VERTEX *vertex);
270 /* for computing the global minimum transformed distance in traditional NJ */
272 NJ_min_transform(DMAT *dmat,
278 #endif /* _INC_CLEARCUT_H_ */