A7118EE611CFCAC000CFDE03 /* degapseqscommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = degapseqscommand.cpp; sourceTree = "<group>"; };
A7118F0F11CFD5DC00CFDE03 /* getrelabundcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getrelabundcommand.h; sourceTree = "<group>"; };
A7118F1011CFD5DC00CFDE03 /* getrelabundcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getrelabundcommand.cpp; sourceTree = "<group>"; };
+ A71BA75F1225694E00C57506 /* clearcut.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clearcut.cpp; sourceTree = "<group>"; };
A71D924211AEB42400D00CBC /* clustersplitcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = clustersplitcommand.cpp; sourceTree = "<group>"; };
A71D924311AEB42400D00CBC /* clustersplitcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clustersplitcommand.h; sourceTree = "<group>"; };
A71D924411AEB42400D00CBC /* splitabundcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = splitabundcommand.cpp; sourceTree = "<group>"; };
A7DA217A113FECD400BF472F /* weighted.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = weighted.h; sourceTree = "<group>"; };
A7DA217B113FECD400BF472F /* whittaker.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = whittaker.cpp; sourceTree = "<group>"; };
A7DA217C113FECD400BF472F /* whittaker.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = whittaker.h; sourceTree = "<group>"; };
- A7DF0AD3121EBB14004A03EA /* clearcut.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = clearcut.c; sourceTree = "<group>"; };
A7DF0AD4121EBB14004A03EA /* clearcut.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = clearcut.h; sourceTree = "<group>"; };
- A7DF0AD5121EBB14004A03EA /* cmdargs.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = cmdargs.c; sourceTree = "<group>"; };
+ A7DF0AD5121EBB14004A03EA /* cmdargs.cpp */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.cpp; fileEncoding = 4; path = cmdargs.cpp; sourceTree = "<group>"; };
A7DF0AD6121EBB14004A03EA /* cmdargs.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = cmdargs.h; sourceTree = "<group>"; };
A7DF0AD7121EBB14004A03EA /* common.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = common.h; sourceTree = "<group>"; };
A7DF0AD8121EBB14004A03EA /* dayhoff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = dayhoff.h; sourceTree = "<group>"; };
- A7DF0AD9121EBB14004A03EA /* distclearcut.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = distclearcut.c; sourceTree = "<group>"; };
+ A7DF0AD9121EBB14004A03EA /* distclearcut.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = distclearcut.cpp; sourceTree = "<group>"; };
A7DF0ADA121EBB14004A03EA /* distclearcut.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = distclearcut.h; sourceTree = "<group>"; };
- A7DF0ADB121EBB14004A03EA /* dmat.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = dmat.c; sourceTree = "<group>"; };
+ A7DF0ADB121EBB14004A03EA /* dmat.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = dmat.cpp; sourceTree = "<group>"; };
A7DF0ADC121EBB14004A03EA /* dmat.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = dmat.h; sourceTree = "<group>"; };
- A7DF0ADD121EBB14004A03EA /* fasta.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = fasta.c; sourceTree = "<group>"; };
+ A7DF0ADD121EBB14004A03EA /* fasta.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = fasta.cpp; sourceTree = "<group>"; };
A7DF0ADE121EBB14004A03EA /* fasta.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fasta.h; sourceTree = "<group>"; };
- A7DF0ADF121EBB14004A03EA /* getopt_long.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = getopt_long.c; sourceTree = "<group>"; };
+ A7DF0ADF121EBB14004A03EA /* getopt_long.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = getopt_long.cpp; sourceTree = "<group>"; };
A7DF0AE0121EBB14004A03EA /* getopt_long.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = getopt_long.h; sourceTree = "<group>"; };
- A7DF0AE1121EBB14004A03EA /* prng.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = prng.c; sourceTree = "<group>"; };
+ A7DF0AE1121EBB14004A03EA /* prng.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = prng.cpp; sourceTree = "<group>"; };
A7DF0AE2121EBB14004A03EA /* prng.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = prng.h; sourceTree = "<group>"; };
A7E8338B115BBDAA00739EC4 /* parsesffcommand.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parsesffcommand.cpp; sourceTree = "<group>"; };
A7E8338C115BBDAA00739EC4 /* parsesffcommand.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parsesffcommand.h; sourceTree = "<group>"; };
isa = PBXGroup;
children = (
A7DF0AD4121EBB14004A03EA /* clearcut.h */,
- A7DF0AD3121EBB14004A03EA /* clearcut.c */,
- A7DF0AD6121EBB14004A03EA /* cmdargs.h */,
+ A71BA75F1225694E00C57506 /* clearcut.cpp */,
A7DF0AD7121EBB14004A03EA /* common.h */,
A7DF0AD8121EBB14004A03EA /* dayhoff.h */,
- A7DF0AD5121EBB14004A03EA /* cmdargs.c */,
- A7DF0AD9121EBB14004A03EA /* distclearcut.c */,
+ A7DF0AD6121EBB14004A03EA /* cmdargs.h */,
+ A7DF0AD5121EBB14004A03EA /* cmdargs.cpp */,
+ A7DF0AD9121EBB14004A03EA /* distclearcut.cpp */,
A7DF0ADA121EBB14004A03EA /* distclearcut.h */,
- A7DF0ADB121EBB14004A03EA /* dmat.c */,
+ A7DF0ADB121EBB14004A03EA /* dmat.cpp */,
A7DF0ADC121EBB14004A03EA /* dmat.h */,
- A7DF0ADD121EBB14004A03EA /* fasta.c */,
+ A7DF0ADD121EBB14004A03EA /* fasta.cpp */,
A7DF0ADE121EBB14004A03EA /* fasta.h */,
- A7DF0ADF121EBB14004A03EA /* getopt_long.c */,
+ A7DF0ADF121EBB14004A03EA /* getopt_long.cpp */,
A7DF0AE0121EBB14004A03EA /* getopt_long.h */,
A7DF0AE2121EBB14004A03EA /* prng.h */,
- A7DF0AE1121EBB14004A03EA /* prng.c */,
+ A7DF0AE1121EBB14004A03EA /* prng.cpp */,
);
name = clearcutsource;
sourceTree = "<group>";
+++ /dev/null
-/*
- * clearcut.c
- *
- * $Id$
- *
- *****************************************************************************
- *
- * Copyright (c) 2004, Luke Sheneman
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * + Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * + Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * + The names of its contributors may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- *****************************************************************************
- *
- * An implementation of the Relaxed Neighbor-Joining algorithm
- * of Evans, J., Sheneman, L., and Foster, J.
- *
- *
- * AUTHOR:
- *
- * Luke Sheneman
- * sheneman@cs.uidaho.edu
- *
- */
-
-
-
-#include <stdio.h>
-#include <string.h>
-#include <limits.h>
-#include <stdlib.h>
-#include <math.h>
-#include <time.h>
-#include <sys/time.h>
-#include <float.h>
-
-#include "dist.h"
-#include "dmat.h"
-#include "fasta.h"
-#include "cmdargs.h"
-#include "common.h"
-#include "clearcut.h"
-#include "prng.h"
-
-
-
-/*
- * main() -
- *
- * The entry point to the program.
- *
- */
-int
-main(int argc,
- char *argv[]) {
-
- DMAT *dmat; /* The working distance matrix */
- DMAT *dmat_backup = NULL;/* A backup distance matrix */
- NJ_TREE *tree; /* The phylogenetic tree */
- NJ_ARGS *nj_args; /* Structure for holding command-line arguments */
- long int i;
-
- /* some variables for tracking time */
- struct timeval tv;
- unsigned long long startUs, endUs;
-
-
- /* check and parse supplied command-line arguments */
- nj_args = NJ_handle_args(argc, argv);
- if(!nj_args) {
- fprintf(stderr, "Clearcut: Error processing command-line arguments.\n");
- exit(-1);
- }
-
- /* for verbose reporting, print the random number seed to stdout */
- if(nj_args->verbose_flag) {
- printf("PRNG SEED: %d\n", nj_args->seed);
- }
-
- /* Initialize Mersenne Twister PRNG */
- init_genrand(nj_args->seed);
-
-
- switch(nj_args->input_mode) {
-
- /* If the input type is a distance matrix */
- case NJ_INPUT_MODE_DISTANCE:
-
- /* parse the distance matrix */
- dmat = NJ_parse_distance_matrix(nj_args);
- if(!dmat) {
- exit(-1);
- }
-
- break;
-
- /* If the input type is a multiple sequence alignment */
- case NJ_INPUT_MODE_ALIGNED_SEQUENCES:
-
- /* build a distance matrix from a multiple sequence alignment */
- dmat = NJ_build_distance_matrix(nj_args);
- if(!dmat) {
- fprintf(stderr, "Clearcut: Failed to build distance matrix from alignment.\n");
- exit(-1);
- }
-
- break;
-
- default:
-
- fprintf(stderr, "Clearcut: Could not determine how to process input\n");
- exit(-1);
- }
-
- /*
- * Output the computed distance matrix,
- * if the user specified one.
- */
- if(nj_args->matrixout) {
- NJ_output_matrix(nj_args, dmat);
- }
-
- /*
- * If we are going to generate multiple trees from
- * the same distance matrix, we need to make a backup
- * of the original distance matrix.
- */
- if(nj_args->ntrees > 1) {
- dmat_backup = NJ_dup_dmat(dmat);
- }
-
- /* process n trees */
- for(i=0;i<nj_args->ntrees;i++) {
-
- /*
- * If the user has specified matrix shuffling, we need
- * to randomize the distance matrix
- */
- if(nj_args->shuffle) {
- NJ_shuffle_distance_matrix(dmat);
- }
-
- /* RECORD THE PRECISE TIME OF THE START OF THE NEIGHBOR-JOINING */
- gettimeofday(&tv, NULL);
- startUs = ((unsigned long long) tv.tv_sec * 1000000ULL)
- + ((unsigned long long) tv.tv_usec);
-
-
- /*
- * Invoke either the Relaxed Neighbor-Joining algorithm (default)
- * or the "traditional" Neighbor-Joining algorithm
- */
- if(nj_args->neighbor) {
- tree = NJ_neighbor_joining(nj_args, dmat);
- } else {
- tree = NJ_relaxed_nj(nj_args, dmat);
- }
-
- if(!tree) {
- fprintf(stderr, "Clearcut: Failed to construct tree.\n");
- exit(0);
- }
-
- /* RECORD THE PRECISE TIME OF THE END OF THE NEIGHBOR-JOINING */
- gettimeofday(&tv, NULL);
- endUs = ((unsigned long long) tv.tv_sec * 1000000ULL)
- + ((unsigned long long) tv.tv_usec);
-
- /* print the time taken to perform the neighbor join */
- if(nj_args->verbose_flag) {
- if(nj_args->neighbor) {
- fprintf(stderr, "NJ tree built in %llu.%06llu secs\n",
- (endUs - startUs) / 1000000ULL,
- (endUs - startUs) % 1000000ULL);
- } else {
- fprintf(stderr, "RNJ tree built in %llu.%06llu secs\n",
- (endUs - startUs) / 1000000ULL,
- (endUs - startUs) % 1000000ULL);
- }
- }
-
- /* Output the neighbor joining tree here */
- NJ_output_tree(nj_args, tree, dmat, i);
-
- NJ_free_tree(tree); /* Free the tree */
- NJ_free_dmat(dmat); /* Free the working distance matrix */
-
- /*
- * If we need to do another iteration, lets re-initialize
- * our working distance matrix.
- */
- if(nj_args->ntrees > 1 && i<(nj_args->ntrees-1) ) {
- dmat = NJ_dup_dmat(dmat_backup);
- }
- }
-
- /* Free the backup distance matrix */
- if(nj_args->ntrees > 1) {
- NJ_free_dmat(dmat_backup);
- }
-
- /* If verbosity, describe where the tree output is */
- if(nj_args->verbose_flag) {
- if(nj_args->neighbor) {
- printf("NJ tree(s) in %s\n", nj_args->outfilename);
- } else {
- printf("Relaxed NJ tree(s) in %s\n", nj_args->outfilename);
- }
- }
-
- exit(0);
-}
-
-
-
-
-
-/*
- * NJ_find_hmin() - Find minimum transformed values along horizontal
- *
- *
- * INPUTS:
- * -------
- * dmat -- The distance matrix
- * a -- The index of the specific taxon in the distance matrix
- *
- * RETURNS:
- * --------
- * <float> -- The value of the selected minimum
- * min -- Used to transport the index of the minima out
- * of the function (by reference)
- * hmincount -- Return the number of minima along the horizontal
- * (by reference)
- *
- *
- * DESCRIPTION:
- * ------------
- *
- * A fast, inline function to find the smallest transformed value
- * along the "horizontal" portion of an entry in a distance matrix.
- *
- * Distance matrices are stored internally as continguously-allocated
- * upper-diagonal structures. With the exception of the taxa at
- * row 0 of this upper-diagonal matrix, all taxa have both a horizontal
- * and vertical component in the distance matrix. This function
- * scans the horizonal portion of the entry in the distance matrix
- * for the specified taxon and finds the minimum transformed value
- * along that horizontal component.
- *
- * Since multiple minima can exist along the horizontal portion
- * of the entry, I consider all minima and break ties
- * stochastically to help avoid systematic bias.
- *
- * Just searching along the horizontal portion of a row is very fast
- * since the data is stored linearly and contiguously in memory and
- * cache locality is exploited in the distance matrix representation.
- *
- * Look at nj.h for more information on how the distance matrix
- * is architected.
- *
- */
-static inline
-float
-NJ_find_hmin(DMAT *dmat,
- long int a,
- long int *min,
- long int *hmincount) {
-
- long int i; /* index variable for looping */
- int size; /* current size of distance matrix */
- int mindex = 0; /* holds the current index to the chosen minimum */
- float curval; /* used to hold current transformed values */
- float hmin; /* the value of the transformed minimum */
-
- float *ptr, *r2, *val; /* pointers used to reduce dereferencing in inner loop */
-
- /* values used for stochastic selection among multiple minima */
- float p, x;
- long int smallcnt;
-
- /* initialize the min to something large */
- hmin = (float)HUGE_VAL;
-
- /* setup some pointers to limit dereferencing later */
- r2 = dmat->r2;
- val = dmat->val;
- size = dmat->size;
-
- /* initialize values associated with minima tie breaking */
- p = 1.0;
- smallcnt = 0;
-
-
- ptr = &(val[NJ_MAP(a, a+1, size)]); /* at the start of the horiz. part */
- for(i=a+1;i<size;i++) {
-
- curval = *(ptr++) - (r2[a] + r2[i]); /* compute transformed distance */
-
- if(NJ_FLT_EQ(curval, hmin)) { /* approx. equal */
-
- smallcnt++;
-
- p = 1.0/(float)smallcnt;
- x = genrand_real2();
-
- /* select this minimum in a way which is proportional to
- the number of minima found along the row so far */
- if( x < p ) {
- mindex = i;
- }
-
- } else if (curval < hmin) {
-
- smallcnt = 1;
- hmin = curval;
- mindex = i;
- }
- }
-
- /* save off the the minimum index to be returned via reference */
- *min = mindex;
-
- /* save off the number of minima */
- *hmincount = smallcnt;
-
- /* return the value of the smallest tranformed distance */
- return(hmin);
-}
-
-
-
-
-
-
-
-
-/*
- * NJ_find_vmin() - Find minimum transformed distance along vertical
- *
- *
- * INPUTS:
- * -------
- * dmat -- The distance matrix
- * a -- The index of the specific taxon in the distance matrix
- *
- *
- * RETURNS:
- * --------
- * <float> -- The value of the selected minimum
- * min -- Used to transport the index of the minima out
- * of the function (by reference)
- * vmincount -- The number of minima along the vertical
- * return by reference.
- *
- * DESCRIPTION:
- * ------------
- *
- * A fast, inline function to find the smallest transformed value
- * along the "vertical" portion of an entry in a distance matrix.
- *
- * Distance matrices are stored internally as continguously-allocated
- * upper-diagonal matrices. With the exception of the taxa at
- * row 0 of this upper-diagonal matrix, all taxa have both a horizontal
- * and vertical component in the distance matrix. This function
- * scans the vertical portion of the entry in the distance matrix
- * for the specified taxon and finds the minimum transformed value
- * along that vertical component.
- *
- * Since multiple minima can exist along the vertical portion
- * of the entry, I consider all minima and break ties
- * stochastically to help avoid systematic bias.
- *
- * Due to cache locality reasons, searching along the vertical
- * component is going to be considerably slower than searching
- * along the horizontal.
- *
- * Look at nj.h for more information on how the distance matrix
- * is architected.
- *
- */
-static inline
-float
-NJ_find_vmin(DMAT *dmat,
- long int a,
- long int *min,
- long int *vmincount) {
-
- long int i; /* index variable used for looping */
- long int size; /* track the size of the matrix */
- long int mindex = 0;/* track the index to the minimum */
- float curval; /* track value of current transformed distance */
- float vmin; /* the index to the smallest "vertical" minimum */
-
- /* pointers which are used to reduce pointer dereferencing in inner loop */
- float *ptr, *r2, *val;
-
- /* values used in stochastically breaking ties */
- float p, x;
- long int smallcnt;
-
- /* initialize the vertical min to something really big */
- vmin = (float)HUGE_VAL;
-
- /* save off some values to limit dereferencing later */
- r2 = dmat->r2;
- val = dmat->val;
- size = dmat->size;
-
- p = 1.0;
- smallcnt = 0;
-
- /* start on the first row and work down */
- ptr = &(val[NJ_MAP(0, a, size)]);
- for(i=0;i<a;i++) {
-
- curval = *ptr - (r2[i] + r2[a]); /* compute transformed distance */
-
- if(NJ_FLT_EQ(curval, vmin)) { /* approx. equal */
-
- smallcnt++;
-
- p = 1.0/(float)smallcnt;
- x = genrand_real2();
-
- /* break ties stochastically to avoid systematic bias */
- if( x < p ) {
- mindex = i;
- }
-
- } else if (curval < vmin) {
-
- smallcnt = 1;
- vmin = curval;
- mindex = i;
- }
-
- /* increment our working pointer to the next row down */
- ptr += size-i-1;
- }
-
- /* pass back the index to the minimum found so far (by reference) */
- *min = mindex;
-
- /* pass back the number of minima along the vertical */
- *vmincount = smallcnt;
-
- /* return the value of the smallest transformed distance */
- return(vmin);
-}
-
-
-
-
-/*
- * NJ_permute() - Generate random permutation using the provably unbiased
- * Fisher-Yates Shuffle.
- *
- * INPUTS:
- * -------
- * perm -- A pointer to the array of long ints which will be filled.
- * size -- the length of the permutation vector
- *
- *
- * OUTPUTS:
- * -------
- * NONE
- *
- *
- * DESCRIPTION:
- * ------------
- *
- * Return a permuted list of numbers from 0 through size.
- * This is accomplished by initializing the permutation
- * as an ordered list of integers and then iterating
- * through and swapping two values selected according to the
- * Fisher-Yates method.
- *
- * This unbiased method for random permutation generation is
- * discussed in:
- *
- * Donald E. Knuth, The Art of Computer Programming,
- * Addison-Wesley, Volumes 1, 2, and 3, 3rd edition, 1998
- *
- */
-static inline
-void
-NJ_permute(long int *perm,
- long int size) {
-
- long int i; /* index used for looping */
- long int swap; /* we swap values to generate permutation */
- long int tmp; /* used for swapping values */
-
-
- /* check to see if vector of long ints is valid */
- if(!perm) {
- fprintf(stderr, "Clearcut: NULL permutation pointer in NJ_permute()\n");
- exit(-1);
- }
-
- /* init permutation as an ordered list of integers */
- for(i=0;i<size;i++) {
- perm[i] = i;
- }
-
- /*
- * Iterate across the array from i = 0 to size -1, swapping ith element
- * with a randomly chosen element from a changing range of possible values
- */
- for(i=0;i<size;i++) {
-
- /* choose which element we will swap with */
- swap = i + NJ_genrand_int31_top(size-i);
-
- /* swap elements here */
- if(i != swap) {
- tmp = perm[swap];
- perm[swap] = perm[i];
- perm[i] = tmp;
- }
- }
-
- return;
-}
-
-
-
-
-
-/*
- * NJ_compute_r() - Compute post-join changes to r-vector. In this
- * case, we decrement all of the accumulated distances
- * in the r-vector for the two nodes that were
- * recently joined (i.e. x, y)
- *
- * INPUTS:
- * -------
- * dmat -- The distance matrix
- * a -- The index of one of the taxa that were joined
- * b -- The index of the other taxa that was joined
- *
- * RETURNS:
- * --------
- * NONE
- *
- * DESCRIPTION:
- * ------------
- *
- * This vector of floats is used as a summary of overall distances from
- * each entry in the distance matrix to every other entry. These values
- * are then used when computing the transformed distances from which
- * decisions concerning joining are made.
- *
- * For speed, we don't recompute r from scratch. Instead, we decrement
- * all entries in r by the appropriate amount. That is, r[i] -= dist(i, a)
- * and r[i] -= dist(i, b).
- *
- * As a speed optimization, I process the rows altogether for cache locality
- * purposes, and then process columns.
- *
- * The processing of the scaled r matrix (r2) is handled on-the-fly elsewhere.
- *
- */
-static inline
-void
-NJ_compute_r(DMAT *dmat,
- long int a,
- long int b) {
-
- long int i; /* a variable used in indexing */
- float *ptrx, *ptry; /* pointers into the distance matrix */
-
- /* some variables to limit pointer dereferencing in loop */
- long int size;
- float *r, *val;
-
- /* to limit pointer dereferencing */
- size = dmat->size;
- val = dmat->val;
- r = dmat->r+a+1;
-
- /*
- * Loop through the rows and decrement the stored r values
- * by the distances stored in the rows and columns of the distance
- * matrix which are being removed post-join.
- *
- * We do the rows altogether in order to benefit from cache locality.
- */
- ptrx = &(val[NJ_MAP(a, a+1, size)]);
- ptry = &(val[NJ_MAP(b, b+1, size)]);
-
- for(i=a+1;i<size;i++) {
- *r -= *(ptrx++);
-
- if(i>b) {
- *r -= *(ptry++);
- }
-
- r++;
- }
-
- /* Similar to the above loop, we now do the columns */
- ptrx = &(val[NJ_MAP(0, a, size)]);
- ptry = &(val[NJ_MAP(0, b, size)]);
- r = dmat->r;
- for(i=0;i<b;i++) {
- if(i<a) {
- *r -= *ptrx;
- ptrx += size-i-1;
- }
-
- *r -= *ptry;
- ptry += size-i-1;
- r++;
- }
-
- return;
-}
-
-
-
-
-
-/*
- * NJ_check_additivity() - Check to make sure that addivity preserved by join
- *
- *
- * INPUTS:
- * -------
- * dmat -- distance matrix
- * a -- index into dmat for one of the rows to be joined
- * b -- index into dmat for another row to be joined
- *
- * OUTPUTS:
- * --------
- * int 1 if join adheres to additivity constraint
- * 0 if join does breaks additivity
- *
- * DESCRIPTION:
- * ------------
- *
- * Here we perform the check to make sure that by joining a and b we do not
- * also break consistency (i.e. preserves additivity) with the distances between
- * the taxa in the new clade and other nodes in the tree. This is done quite
- * efficiently by looking up the untransformed distance between node b and
- * some other "target" taxa in the distance matrix (which is not a nor b) and
- * comparing that distance to the distance computed by finding the distance
- * from node a to the proposed internal node "x" which joins (a,b).
- *
- * If dist(x,b) + dist (b, target) == dist(b, target) then additivity is
- * preserved, otherwise, additivity is not preserved. If we are in
- * additivity mode, this join should be rejected.
- *
- */
-static inline
-int
-NJ_check_additivity(DMAT *dmat,
- long int a,
- long int b) {
-
- float a2clade, b2clade;
- float clade_dist;
- long int target;
-
-
- /* determine target taxon here */
- if(b == dmat->size-1) {
- /* if we can't do a row here, lets do a column */
- if(a==0) {
- if(b==1) {
- target = 2;
- } else {
- target = 1;
- }
- } else {
- target = 0;
- }
- } else {
- target = b+1;
- }
-
-
- /* distance between a and the root of clade (a,b) */
- a2clade =
- ( (dmat->val[NJ_MAP(a, b, dmat->size)]) +
- (dmat->r2[a] - dmat->r2[b]) ) / 2.0;
-
- /* distance between b and the root of clade (a,b) */
- b2clade =
- ( (dmat->val[NJ_MAP(a, b, dmat->size)]) +
- (dmat->r2[b] - dmat->r2[a]) ) / 2.0;
-
- /* distance between the clade (a,b) and the target taxon */
- if(b<target) {
-
- /* compute the distance from the clade root to the target */
- clade_dist =
- ( (dmat->val[NJ_MAP(a, target, dmat->size)] - a2clade) +
- (dmat->val[NJ_MAP(b, target, dmat->size)] - b2clade) ) / 2.0;
-
- /*
- * Check to see that distance from clade root to target + distance from
- * b to clade root are equal to the distance from b to the target
- */
- if(NJ_FLT_EQ(dmat->val[NJ_MAP(b, target, dmat->size)],
- (clade_dist + b2clade))) {
- return(1); /* join is legitimate */
- } else {
- return(0); /* join is illigitimate */
- }
-
- } else {
-
- /* compute the distance from the clade root to the target */
- clade_dist =
- ( (dmat->val[NJ_MAP(target, a, dmat->size)] - a2clade) +
- (dmat->val[NJ_MAP(target, b, dmat->size)] - b2clade) ) / 2.0;
-
- /*
- * Check to see that distance from clade root to target + distance from
- * b to clade root are equal to the distance from b to the target
- */
- if(NJ_FLT_EQ(dmat->val[NJ_MAP(target, b, dmat->size)],
- (clade_dist + b2clade))) {
- return(1); /* join is legitimate */
- } else {
- return(0); /* join is illegitimate */
- }
- }
-}
-
-
-
-
-
-
-
-/*
- * NJ_check() - Check to see if two taxa can be joined
-
- *
- * INPUTS:
- * -------
- * nj_args -- Pointer to the data structure holding command-line args
- * dmat -- distance matrix
- * a -- index into dmat for one of the rows to be joined
- * b -- index into dmat for another row to be joined
- * min -- the minimum value found
- * additivity -- a flag (0 = not additive mode, 1 = additive mode)
- *
- * OUTPUTS:
- * --------
- * int 1 if join is okay
- * 0 if join is not okay
- *
- * DESCRIPTION:
- * ------------
- *
- * This function ultimately takes two rows and makes sure that the
- * intersection of those two rows, which has a transformed distance of
- * "min", is actually the smallest (or equal to the smallest)
- * transformed distance for both rows (a, b). If so, it returns
- * 1, else it returns 0.
- *
- * Basically, we want to join two rows only if the minimum
- * transformed distance on either row is at the intersection of
- * those two rows.
- *
- */
-static inline
-int
-NJ_check(NJ_ARGS *nj_args,
- DMAT *dmat,
- long int a,
- long int b,
- float min,
- int additivity) {
-
-
- long int i, size;
- float *ptr, *val, *r2;
-
-
- /* some aliases for speed and readability reasons */
- val = dmat->val;
- r2 = dmat->r2;
- size = dmat->size;
-
-
- /* now determine if joining a, b will result in broken distances */
- if(additivity) {
- if(!NJ_check_additivity(dmat, a, b)) {
- return(0);
- }
- }
-
- /* scan the horizontal of row b, punt if anything < min */
- ptr = &(val[NJ_MAP(b, b+1, size)]);
- for(i=b+1;i<size;i++) {
- if( NJ_FLT_LT( (*ptr - (r2[b] + r2[i])), min) ) {
- return(0);
- }
- ptr++;
- }
-
- /* scan the vertical component of row a, punt if anything < min */
- if(nj_args->norandom) { /* if we are doing random joins, we checked this */
- ptr = val + a;
- for(i=0;i<a;i++) {
- if( NJ_FLT_LT( (*ptr - (r2[i] + r2[a])), min) ) {
- return(0);
- }
- ptr += size-i-1;
- }
- }
-
- /* scan the vertical component of row b, punt if anything < min */
- ptr = val + b;
- for(i=0;i<b;i++) {
- if( NJ_FLT_LT( (*ptr - (r2[i] + r2[b])), min) && i!=a) {
- return(0);
- }
- ptr += size-i-1;
- }
-
- return(1);
-}
-
-
-
-
-
-
-
-/*
- * NJ_collapse() - Collapse the distance matrix by removing
- * rows a and b from the distance matrix and
- * replacing them with a single new row which
- * represents the internal node joining a and b
- *
- *
- * INPUTS:
- * -------
- * dmat -- A pointer to the distance matrix
- * vertex -- A pointer to the vertex vector (vector of tree nodes)
- * which is used in constructing the tree
- * a -- An index to a row in the distance matrix from which we
- * joined. This row will be collapsed.
- * b -- An index to a row in the distance matrix from which we
- * joined. This row will be collapsed.
- *
- * RETURNS:
- * --------
- * NONE
- *
- *
- * DESCRIPTION:
- * ------------
- *
- * This function collapses the distance matrix in a way which optimizes
- * cache locality and ultimately gives us a speed improvement due to
- * cache. At this point, we've decided to join rows a and b from
- * the distance matrix. We will remove rows a and b from the distance
- * matrix and replace them with a new row which represents the internal
- * node which joins rows a and b together.
- *
- * We always keep the matrix as compact as possible in order to
- * get good performance from our cache in subsequent operations. Cache
- * is the key to good performance here.
- *
- * Key Steps:
- * ----------
- *
- * 1) Fill the "a" row with the new distances of the internal node
- * joining a and b to all other rows.
- * 2) Copy row 0 into what was row b
- * 3) Increment the pointer to the start of the distance matrix
- * by one row.
- * 4) Decrement the size of the matrix by one row.
- * 5) Do roughly the same thing to the r vector in order to
- * keep it in sync with the distance matrix.
- * 6) Compute the scaled r vector (r2) based on the updated
- * r vector
- *
- * This keeps the distance matrix as compact as possible in memory, and
- * is a relatively fast operation.
- *
- * This function requires that a < b
- *
- */
-static inline
-void
-NJ_collapse(DMAT *dmat,
- NJ_VERTEX *vertex,
- long int a,
- long int b) {
-
-
- long int i; /* index used for looping */
- long int size; /* size of dmat --> reduce pointer dereferencing */
- float a2clade; /* distance from a to the new node that joins a and b */
- float b2clade; /* distance from b to the new node that joins a and b */
- float cval; /* stores distance information during loop */
- float *vptr; /* pointer to elements in first row of dist matrix */
- float *ptra; /* pointer to elements in row a of distance matrix */
- float *ptrb; /* pointer to elements in row b of distance matrix */
-
- float *val, *r, *r2; /* simply used to limit pointer dereferencing */
-
-
- /* We must assume that a < b */
- if(a >= b) {
- fprintf(stderr, "Clearcut: (a<b) constraint check failed in NJ_collapse()\n");
- exit(0);
- }
-
- /* some shortcuts to help limit dereferencing */
- val = dmat->val;
- r = dmat->r;
- r2 = dmat->r2;
- size = dmat->size;
-
- /* compute the distance from the clade components (a, b) to the new node */
- a2clade =
- ( (val[NJ_MAP(a, b, size)]) + (dmat->r2[a] - dmat->r2[b]) ) / 2.0;
- b2clade =
- ( (val[NJ_MAP(a, b, size)]) + (dmat->r2[b] - dmat->r2[a]) ) / 2.0;
-
-
- r[a] = 0.0; /* we are removing row a, so clear dist. in r */
-
- /*
- * Fill the horizontal part of the "a" row and finish computing r and r2
- * we handle the horizontal component first to maximize cache locality
- */
- ptra = &(val[NJ_MAP(a, a+1, size)]); /* start ptra at the horiz. of a */
- ptrb = &(val[NJ_MAP(a+1, b, size)]); /* start ptrb at comparable place */
- for(i=a+1;i<size;i++) {
-
- /*
- * Compute distance from new internal node to others in
- * the distance matrix.
- */
- cval =
- ( (*ptra - a2clade) +
- (*ptrb - b2clade) ) / 2.0;
-
- /* incr. row b pointer differently depending on where i is in loop */
- if(i<b) {
- ptrb += size-i-1; /* traverse vertically by incrementing by row */
- } else {
- ptrb++; /* traverse horiz. by incrementing by column */
- }
-
- /* assign the newly computed distance and increment a ptr by a column */
- *(ptra++) = cval;
-
- /* accumulate the distance onto the r vector */
- r[a] += cval;
- r[i] += cval;
-
- /* scale r2 on the fly here */
- r2[i] = r[i]/(float)(size-3);
- }
-
- /* fill the vertical part of the "a" column and finish computing r and r2 */
- ptra = val + a; /* start at the top of the columb for "a" */
- ptrb = val + b; /* start at the top of the columb for "b" */
- for(i=0;i<a;i++) {
-
- /*
- * Compute distance from new internal node to others in
- * the distance matrix.
- */
- cval =
- ( (*ptra - a2clade) +
- (*ptrb - b2clade) ) / 2.0;
-
- /* assign the newly computed distance and increment a ptr by a column */
- *ptra = cval;
-
- /* accumulate the distance onto the r vector */
- r[a] += cval;
- r[i] += cval;
-
- /* scale r2 on the fly here */
- r2[i] = r[i]/(float)(size-3);
-
- /* here, always increment by an entire row */
- ptra += size-i-1;
- ptrb += size-i-1;
- }
-
-
- /* scale r2 on the fly here */
- r2[a] = r[a]/(float)(size-3);
-
-
-
- /*
- * Copy row 0 into row b. Again, the code is structured into two
- * loops to maximize cache locality for writes along the horizontal
- * component of row b.
- */
- vptr = val;
- ptrb = val + b;
- for(i=0;i<b;i++) {
- *ptrb = *(vptr++);
- ptrb += size-i-1;
- }
- vptr++; /* skip over the diagonal */
- ptrb = &(val[NJ_MAP(b, b+1, size)]);
- for(i=b+1;i<size;i++) {
- *(ptrb++) = *(vptr++);
- }
-
- /*
- * Collapse r here by copying contents of r[0] into r[b] and
- * incrementing pointer to the beginning of r by one row
- */
- r[b] = r[0];
- dmat->r = r+1;
-
-
- /*
- * Collapse r2 here by copying contents of r2[0] into r2[b] and
- * incrementing pointer to the beginning of r2 by one row
- */
- r2[b] = r2[0];
- dmat->r2 = r2+1;
-
- /* increment dmat pointer to next row */
- dmat->val += size;
-
- /* decrement the total size of the distance matrix by one row */
- dmat->size--;
-
- return;
-}
-
-
-
-
-
-
-
-
-
-/*
- * NJ_neighbor_joining() - Perform a traditional Neighbor-Joining
- *
- *
- * INPUTS:
- * -------
- * nj_args -- A pointer to a structure containing the command-line arguments
- * dmat -- A pointer to the distance matrix
- *
- * RETURNS:
- * --------
- * NJ_TREE * -- A pointer to the Neighbor-Joining tree.
- *
- * DESCRIPTION:
- * ------------
- *
- * This function performs a traditional Neighbor-Joining operation in which
- * the distance matrix is exhaustively searched for the global minimum
- * transformed distance. The two nodes which intersect at the global
- * minimum transformed distance are then joined and the distance
- * matrix is collapsed. This process continues until there are only
- * two nodes left, at which point those nodes are joined.
- *
- */
-NJ_TREE *
-NJ_neighbor_joining(NJ_ARGS *nj_args,
- DMAT *dmat) {
-
-
- NJ_TREE *tree = NULL;
- NJ_VERTEX *vertex = NULL;
-
- long int a, b;
- float min;
-
-
- /* initialize the r and r2 vectors */
- NJ_init_r(dmat);
-
- /* allocate and initialize our vertex vector used for tree construction */
- vertex = NJ_init_vertex(dmat);
- if(!vertex) {
- fprintf(stderr, "Clearcut: Could not initialize vertex in NJ_neighbor_joining()\n");
- return(NULL);
- }
-
- /* we iterate until the working distance matrix has only 2 entries */
- while(vertex->nactive > 2) {
-
- /*
- * Find the global minimum transformed distance from the distance matrix
- */
- min = NJ_min_transform(dmat, &a, &b);
-
- /*
- * Build the tree by removing nodes a and b from the vertex array
- * and inserting a new internal node which joins a and b. Collapse
- * the vertex array similarly to how the distance matrix and r and r2
- * are compacted.
- */
- NJ_decompose(dmat, vertex, a, b, 0);
-
- /* decrement the r and r2 vectors by the distances corresponding to a, b */
- NJ_compute_r(dmat, a, b);
-
- /* compact the distance matrix and the r and r2 vectors */
- NJ_collapse(dmat, vertex, a, b);
- }
-
- /* Properly join the last two nodes on the vertex list */
- tree = NJ_decompose(dmat, vertex, 0, 1, NJ_LAST);
-
- /* return the computed tree to the calling function */
- return(tree);
-}
-
-
-
-
-
-
-
-
-/*
- * NJ_relaxed_nj() - Construct a tree using the Relaxed Neighbor-Joining
- *
- * INPUTS:
- * -------
- * nj_args -- A pointer to a data structure containing the command-line args
- * dmat -- A pointer to the distance matrix
- *
- * RETURNS:
- * --------
- *
- * NJ_TREE * -- A pointer to a Relaxed Neighbor-Joining tree
- *
- * DESCRIPTION:
- * ------------
- *
- * This function implements the Relaxed Neighbor-Joining algorithm of
- * Evans, J., Sheneman, L., and Foster, J.
- *
- * Relaxed Neighbor-Joining works by choosing a local minimum transformed
- * distance when determining when to join two nodes. (Traditional
- * Neighbor-Joining chooses a global minimum transformed distance).
- *
- * The algorithm shares the property with traditional NJ that if the
- * input distances are additive (self-consistent), then the algorithm
- * will manage to construct the true tree consistent with the additive
- * distances. Additivity state is tracked and every proposed join is checked
- * to make sure it maintains additivity constraints. If no
- * additivity-preserving join is possible in a single pass, then the distance
- * matrix is non-additive, and additivity checking is abandoned.
- *
- * The algorithm will either attempt joins randomly, or it will perform joins
- * in a particular order. The default behavior is to perform joins randomly,
- * but this can be switched off with a command-line switch.
- *
- * For randomized joins, all attempts are made to alleviate systematic bias
- * for the choice of rows to joins. All tie breaking is done in a way which
- * is virtually free of bias.
- *
- * To perform randomized joins, a random permutation is constructed which
- * specifies the order in which to attempt joins. I iterate through the
- * random permutation, and for each row in the random permutation, I find
- * the minimum transformed distance for that row. If there are multiple
- * minima, I break ties evenly. For the row which intersects our
- * randomly chosen row at the chosen minimum, if we are are still in
- * additivity mode, I check to see if joining the two rows will break
- * our additivity constraints. If not, I check to see if there exists
- * a transformed distance which is smaller than the minimum found on the
- * original row. If there is, then we proceed through the random permutation
- * trying additional rows in the random order specified in the permutation.
- * If there is no smaller minimum transformed distance on either of the
- * two rows, then we join them, collapse the distance matrix, and compute
- * a new random permutation.
- *
- * If the entire random permutation is traversed and no joins are possible
- * due to additivity constraints, then the distance matrix is not
- * additive, and additivity constraint-checking is disabled.
- *
- */
-NJ_TREE *
-NJ_relaxed_nj(NJ_ARGS *nj_args,
- DMAT *dmat) {
-
-
- NJ_TREE *tree;
- NJ_VERTEX *vertex;
- long int a, b, t, bh, bv, i;
- float hmin, vmin, hvmin;
- float p, q, x;
- int join_flag;
- int additivity_mode;
- long int hmincount, vmincount;
- long int *permutation = NULL;
-
-
-
- /* initialize the r and r2 vectors */
- NJ_init_r(dmat);
-
- additivity_mode = 1;
-
- /* allocate the permutation vector, if we are in randomize mode */
- if(!nj_args->norandom) {
- permutation = (long int *)calloc(dmat->size, sizeof(long int));
- if(!permutation) {
- fprintf(stderr, "Clearcut: Memory allocation error in NJ_relaxed_nj()\n");
- return(NULL);
- }
- }
-
- /* allocate and initialize our vertex vector used for tree construction */
- vertex = NJ_init_vertex(dmat);
-
- /* loop until there are only 2 nodes left to join */
- while(vertex->nactive > 2) {
-
- switch(nj_args->norandom) {
-
- /* RANDOMIZED JOINS */
- case 0:
-
- join_flag = 0;
-
- NJ_permute(permutation, dmat->size-1);
- for(i=0;i<dmat->size-1 && (vertex->nactive>2) ;i++) {
-
- a = permutation[i];
-
- /* find min trans dist along horiz. of row a */
- hmin = NJ_find_hmin(dmat, a, &bh, &hmincount);
- if(a) {
- /* find min trans dist along vert. of row a */
- vmin = NJ_find_vmin(dmat, a, &bv, &vmincount);
- } else {
- vmin = hmin;
- bv = bh;
- vmincount = 0;
- }
-
- if(NJ_FLT_EQ(hmin, vmin)) {
-
- /*
- * The minima along the vertical and horizontal are
- * the same. Compute the proportion of minima along
- * the horizonal (p) and the proportion of minima
- * along the vertical (q).
- *
- * If the same minima exist along the horizonal and
- * vertical, we break the tie in a way which is
- * non-biased. That is, we break the tie based on the
- * proportion of horiz. minima versus vertical minima.
- *
- */
- p = (float)hmincount / ((float)hmincount + (float)vmincount);
- q = 1.0 - p;
- x = genrand_real2();
-
- if(x < p) {
- hvmin = hmin;
- b = bh;
- } else {
- hvmin = vmin;
- b = bv;
- }
- } else if(NJ_FLT_LT(hmin, vmin) ) {
- hvmin = hmin;
- b = bh;
- } else {
- hvmin = vmin;
- b = bv;
- }
-
- if(NJ_check(nj_args, dmat, a, b, hvmin, additivity_mode)) {
-
- /* swap a and b, if necessary, to make sure a < b */
- if(b < a) {
- t = a;
- a = b;
- b = t;
- }
-
- join_flag = 1;
-
- /* join taxa from rows a and b */
- NJ_decompose(dmat, vertex, a, b, 0);
-
- /* collapse matrix */
- NJ_compute_r(dmat, a, b);
- NJ_collapse(dmat, vertex, a, b);
-
- NJ_permute(permutation, dmat->size-1);
- }
- }
-
- /* turn off additivity if go through an entire cycle without joining */
- if(!join_flag) {
- additivity_mode = 0;
- }
-
- break;
-
-
-
- /* DETERMINISTIC JOINS */
- case 1:
-
- join_flag = 0;
-
- for(a=0;a<dmat->size-1 && (vertex->nactive > 2) ;) {
-
- /* find the min along the horizontal of row a */
- hmin = NJ_find_hmin(dmat, a, &b, &hmincount);
-
- if(NJ_check(nj_args, dmat, a, b, hmin, additivity_mode)) {
-
- join_flag = 1;
-
- /* join taxa from rows a and b */
- NJ_decompose(dmat, vertex, a, b, 0);
-
- /* collapse matrix */
- NJ_compute_r(dmat, a, b);
- NJ_collapse(dmat, vertex, a, b);
-
- if(a) {
- a--;
- }
-
- } else {
- a++;
- }
- }
-
- /* turn off additivity if go through an entire cycle without joining */
- if(!join_flag) {
- additivity_mode = 0;
- }
-
- break;
- }
-
- } /* WHILE */
-
- /* Join the last two nodes on the vertex list */
- tree = NJ_decompose(dmat, vertex, 0, 1, NJ_LAST);
-
- if(nj_args->verbose_flag) {
- if(additivity_mode) {
- printf("Tree is additive\n");
- } else {
- printf("Tree is not additive\n");
- }
- }
-
- if(vertex) {
- NJ_free_vertex(vertex);
- }
-
- if(!nj_args->norandom && permutation) {
- free(permutation);
- }
-
- return(tree);
-}
-
-
-
-
-
-
-/*
- * NJ_print_distance_matrix() -
- *
- * Print a distance matrix
- *
- */
-void
-NJ_print_distance_matrix(DMAT *dmat) {
-
- long int i, j;
-
- printf("ntaxa: %ld\n", dmat->ntaxa);
- printf(" size: %ld\n", dmat->size);
-
- for(i=0;i<dmat->size;i++) {
-
- for(j=0;j<dmat->size;j++) {
- if(j>i) {
- printf(" %0.4f", dmat->val[NJ_MAP(i, j, dmat->size)]);
- } else {
- printf(" -");
- }
- }
-
-
- if(dmat->r && dmat->r2) {
- printf("\t\t%0.4f", dmat->r[i]);
- printf("\t%0.4f", dmat->r2[i]);
-
-
-
- printf("\n");
-
- for(j=0;j<dmat->size;j++) {
- if(j>i) {
- printf(" %0.4f", dmat->val[NJ_MAP(i, j, dmat->size)] - (dmat->r2[i] + dmat->r2[j]));
- } else {
- printf(" ");
- }
- }
-
- printf("\n\n");
- }
- }
-
- printf("\n");
-
- return;
-}
-
-
-
-
-
-
-
-/*
- * NJ_output_tree() -
- *
- * A wrapper for the function that really prints the tree,
- * basically to get a newline in there conveniently. :-)
- *
- * Print n trees, as specified in command-args
- * using "count" variable from 0 to (n-1)
- *
- */
-void
-NJ_output_tree(NJ_ARGS *nj_args,
- NJ_TREE *tree,
- DMAT *dmat,
- long int count) {
-
- FILE *fp;
-
- if(nj_args->stdout_flag) {
- fp = stdout;
- } else {
-
- if(count == 0) {
- fp = fopen(nj_args->outfilename, "w"); /* open for writing */
- } else {
- fp = fopen(nj_args->outfilename, "a"); /* open for appending */
- }
-
- if(!fp) {
- fprintf(stderr, "Clearcut: Failed to open outfile %s\n", nj_args->outfilename);
- exit(-1);
- }
- }
-
- NJ_output_tree2(fp, nj_args, tree, tree, dmat);
- fprintf(fp, ";\n");
-
- if(!nj_args->stdout_flag) {
- fclose(fp);
- }
-
- return;
-}
-
-
-
-
-
-/*
- * NJ_output_tree2() -
- *
- *
- */
-void
-NJ_output_tree2(FILE *fp,
- NJ_ARGS *nj_args,
- NJ_TREE *tree,
- NJ_TREE *root,
- DMAT *dmat) {
-
- if(!tree) {
- return;
- }
-
- if(tree->taxa_index != NJ_INTERNAL_NODE) {
-
- if(nj_args->expblen) {
- fprintf(fp, "%s:%e",
- dmat->taxaname[tree->taxa_index],
- tree->dist);
- } else {
- fprintf(fp, "%s:%f",
- dmat->taxaname[tree->taxa_index],
- tree->dist);
- }
-
- } else {
-
-
- if(tree->left && tree->right) {
- fprintf(fp, "(");
- }
- if(tree->left) {
- NJ_output_tree2(fp, nj_args, tree->left, root, dmat);
- }
-
- if(tree->left && tree->right) {
- fprintf(fp, ",");
- }
- if(tree->right) {
- NJ_output_tree2(fp, nj_args, tree->right, root, dmat);
- }
-
- if(tree != root->left) {
- if(tree->left && tree->right) {
- if(tree != root) {
- if(nj_args->expblen) {
- fprintf(fp, "):%e", tree->dist);
- } else {
- fprintf(fp, "):%f", tree->dist);
- }
- } else {
- fprintf(fp, ")");
- }
- }
- } else {
- fprintf(fp, ")");
- }
- }
-
- return;
-}
-
-
-
-
-
-
-
-/*
- * NJ_init_r()
- *
- * This function computes the r column in our matrix
- *
- */
-void
-NJ_init_r(DMAT *dmat) {
-
- long int i, j, size;
- long int index;
- float *r, *r2, *val;
- long int size1;
- float size2;
-
- r = dmat->r;
- r2 = dmat->r2;
- val = dmat->val;
- size = dmat->size;
- size1 = size-1;
- size2 = (float)(size-2);
-
- index = 0;
- for(i=0;i<size1;i++) {
- index++;
- for(j=i+1;j<size;j++) {
- r[i] += val[index];
- r[j] += val[index];
- index++;
- }
-
- r2[i] = r[i]/size2;
- }
-
- return;
-}
-
-
-
-
-
-
-
-
-
-
-
-/*
- * NJ_init_vertex() -
- *
- * Construct a vertex, which we will use to construct our tree
- * in a true bottom-up approach. The vertex construct is
- * basically the center node in the initial star topology.
- *
- */
-NJ_VERTEX *
-NJ_init_vertex(DMAT *dmat) {
-
- long int i;
- NJ_VERTEX *vertex;
-
- /* allocate the vertex here */
- vertex = (NJ_VERTEX *)calloc(1, sizeof(NJ_VERTEX));
-
- /* allocate the nodes in the vertex */
- vertex->nodes = (NJ_TREE **)calloc(dmat->ntaxa, sizeof(NJ_TREE *));
- vertex->nodes_handle = vertex->nodes;
-
- /* initialize our size and active variables */
- vertex->nactive = dmat->ntaxa;
- vertex->size = dmat->ntaxa;
-
- /* initialize the nodes themselves */
- for(i=0;i<dmat->ntaxa;i++) {
-
- vertex->nodes[i] = (NJ_TREE *)calloc(1, sizeof(NJ_TREE));
-
- vertex->nodes[i]->left = NULL;
- vertex->nodes[i]->right = NULL;
-
- vertex->nodes[i]->taxa_index = i;
- }
-
- return(vertex);
-}
-
-
-
-
-
-/*
- * NJ_decompose() -
- *
- * This function decomposes the star by creating new internal nodes
- * and joining two existing tree nodes to it
- *
- */
-NJ_TREE *
-NJ_decompose(DMAT *dmat,
- NJ_VERTEX *vertex,
- long int x,
- long int y,
- int last_flag) {
-
- NJ_TREE *new_node;
- float x2clade, y2clade;
-
- /* compute the distance from the clade components to the new node */
- if(last_flag) {
- x2clade =
- (dmat->val[NJ_MAP(x, y, dmat->size)]);
- } else {
- x2clade =
- (dmat->val[NJ_MAP(x, y, dmat->size)])/2 +
- ((dmat->r2[x] - dmat->r2[y])/2);
- }
-
- vertex->nodes[x]->dist = x2clade;
-
- if(last_flag) {
- y2clade =
- (dmat->val[NJ_MAP(x, y, dmat->size)]);
- } else {
- y2clade =
- (dmat->val[NJ_MAP(x, y, dmat->size)])/2 +
- ((dmat->r2[y] - dmat->r2[x])/2);
- }
-
- vertex->nodes[y]->dist = y2clade;
-
- /* allocate new node to connect two sub-clades */
- new_node = (NJ_TREE *)calloc(1, sizeof(NJ_TREE));
-
- new_node->left = vertex->nodes[x];
- new_node->right = vertex->nodes[y];
- new_node->taxa_index = NJ_INTERNAL_NODE; /* this is not a terminal node, no taxa index */
-
- if(last_flag) {
- return(new_node);
- }
-
- vertex->nodes[x] = new_node;
- vertex->nodes[y] = vertex->nodes[0];
-
- vertex->nodes = &(vertex->nodes[1]);
-
- vertex->nactive--;
-
- return(new_node);
-}
-
-
-
-/*
- * NJ_print_vertex() -
- *
- * For debugging, print the contents of the vertex
- *
- */
-void
-NJ_print_vertex(NJ_VERTEX *vertex) {
-
- long int i;
-
- printf("Number of active nodes: %ld\n", vertex->nactive);
-
- for(i=0;i<vertex->nactive;i++) {
- printf("%ld ", vertex->nodes[i]->taxa_index);
- }
- printf("\n");
-
- return;
-}
-
-
-
-
-
-
-
-
-
-/*
- * NJ_print_r() -
- *
- */
-void
-NJ_print_r(DMAT *dmat) {
-
- long int i;
-
- printf("\n");
- for(i=0;i<dmat->size;i++) {
- printf("r[%ld] = %0.2f\n", i, dmat->r[i]);
- }
- printf("\n");
-
- return;
-}
-
-
-
-
-
-/*
- * NJ_print_taxanames() -
- *
- * Print taxa names here
- *
- */
-void
-NJ_print_taxanames(DMAT *dmat) {
-
- long int i;
-
- printf("Number of taxa: %ld\n", dmat->ntaxa);
-
- for(i=0;i<dmat->ntaxa;i++) {
- printf("%ld) %s\n", i, dmat->taxaname[i]);
- }
-
- printf("\n");
-
- return;
-}
-
-
-
-
-/*
- * NJ_shuffle_distance_matrix() -
- *
- * Randomize a distance matrix here
- *
- */
-void
-NJ_shuffle_distance_matrix(DMAT *dmat) {
-
-
- long int *perm = NULL;
- char **tmp_taxaname = NULL;
- float *tmp_val = NULL;
- long int i, j;
-
-
- /* alloc the random permutation and a new matrix to hold the shuffled vals */
- perm = (long int *)calloc(dmat->size, sizeof(long int));
- tmp_taxaname = (char **)calloc(dmat->size, sizeof(char *));
- tmp_val = (float *)calloc(NJ_NCELLS(dmat->ntaxa), sizeof(float));
- if(!tmp_taxaname || !perm || !tmp_val) {
- fprintf(stderr, "Clearcut: Memory allocation error in NJ_shuffle_distance_matrix()\n");
- exit(-1);
- }
-
- /* compute a permutation which will describe how to shuffle the matrix */
- NJ_permute(perm, dmat->size);
-
- for(i=0;i<dmat->size;i++) {
- for(j=i+1;j<dmat->size;j++) {
-
- if(perm[j] < perm[i]) {
- tmp_val[NJ_MAP(i, j, dmat->size)] = dmat->val[NJ_MAP(perm[j], perm[i], dmat->size)];
- } else {
- tmp_val[NJ_MAP(i, j, dmat->size)] = dmat->val[NJ_MAP(perm[i], perm[j], dmat->size)];
- }
-
- }
-
- tmp_taxaname[i] = dmat->taxaname[perm[i]];
- }
-
- /* free our random permutation */
- if(perm) {
- free(perm);
- }
-
- /* free the old value matrix */
- if(dmat->val) {
- free(dmat->val);
- }
-
- /* re-assign the value matrix pointers */
- dmat->val = tmp_val;
- dmat->valhandle = dmat->val;
-
- /*
- * Free our old taxaname with its particular ordering
- * and re-assign to the new.
- */
- if(dmat->taxaname) {
- free(dmat->taxaname);
- }
- dmat->taxaname = tmp_taxaname;
-
- return;
-}
-
-
-
-/*
- * NJ_free_tree() -
- *
- * Free a given NJ tree
- */
-void
-NJ_free_tree(NJ_TREE *node) {
-
- if(!node) {
- return;
- }
-
- if(node->left) {
- NJ_free_tree(node->left);
- }
-
- if(node->right) {
- NJ_free_tree(node->right);
- }
-
- free(node);
-
- return;
-}
-
-
-
-
-
-
-
-
-
-/*
- * NJ_print_permutation()
- *
- * Print a permutation
- *
- */
-void
-NJ_print_permutation(long int *perm,
- long int size) {
-
- long int i;
-
- for(i=0;i<size-1;i++) {
- printf("%ld,", perm[i]);
- }
- printf("%ld\n", perm[size-1]);
-
- return;
-}
-
-
-
-
-/*
- * NJ_dup_dmat() -
- *
- * Duplicate a distance matrix
- *
- */
-DMAT *
-NJ_dup_dmat(DMAT *src) {
-
- long int i;
- DMAT *dest;
-
- /* allocate the resulting distance matrix */
- dest = (DMAT *)calloc(1, sizeof(DMAT));
- if(!dest) {
- fprintf(stderr, "Clearcut: Memory allocation error in NJ_dup_dmat()\n");
- goto XIT_BAD;
- }
-
- dest->ntaxa = src->ntaxa;
- dest->size = src->size;
-
- /* allocate space for array of pointers to taxanames */
- dest->taxaname = (char **)calloc(dest->ntaxa, sizeof(char *));
- if(!dest->taxaname) {
- fprintf(stderr, "Clearcut: Memory allocation error in NJ_dup_dmat()\n");
- goto XIT_BAD;
- }
-
- /* allocate space for the taxanames themselves */
- for(i=0;i<src->ntaxa;i++) {
- dest->taxaname[i] = (char *)calloc(strlen(src->taxaname[i])+1, sizeof(char));
- if(!dest->taxaname[i]) {
- fprintf(stderr, "Clearcut: Memory allocation error in NJ_dup_dmat()\n");
- goto XIT_BAD;
- }
- }
-
- /* allocate space for the distance values */
- dest->val = (float *)calloc(NJ_NCELLS(src->ntaxa), sizeof(float));
- if(!dest->val) {
- fprintf(stderr, "Clearcut: Memory allocation error in NJ_dup_dmat()\n");
- goto XIT_BAD;
- }
-
- /* allocate space for the r and r2 vectors */
- dest->r = (float *)calloc(src->ntaxa, sizeof(float));
- dest->r2 = (float *)calloc(src->ntaxa, sizeof(float));
-
- /* copy titles */
- for(i=0;i<src->ntaxa;i++) {
- strcpy(dest->taxaname[i], src->taxaname[i]);
- }
-
- /* copy values */
- memcpy(dest->val, src->valhandle, NJ_NCELLS(src->ntaxa)*sizeof(float));
-
- /* copy r and r2 */
- memcpy(dest->r, src->rhandle, src->ntaxa*sizeof(float));
- memcpy(dest->r2, src->r2handle, src->ntaxa*sizeof(float));
-
- /* track some memory addresses */
- dest->valhandle = dest->val;
- dest->rhandle = dest->r;
- dest->r2handle = dest->r2;
-
- return(dest);
-
- XIT_BAD:
-
- /* free what we may have allocated */
- NJ_free_dmat(dest);
-
- return(NULL);
-}
-
-
-
-
-/*
- * NJ_free_dmat() -
- */
-void
-NJ_free_dmat(DMAT *dmat) {
-
- long int i;
-
- if(dmat) {
-
- if(dmat->taxaname) {
-
- for(i=0;i<dmat->ntaxa;i++) {
- if(dmat->taxaname[i]) {
- free(dmat->taxaname[i]);
- }
- }
-
- free(dmat->taxaname);
- }
-
- if(dmat->valhandle) {
- free(dmat->valhandle);
- }
-
- if(dmat->rhandle) {
- free(dmat->rhandle);
- }
-
- if(dmat->r2handle) {
- free(dmat->r2handle);
- }
-
- free(dmat);
- }
-
- return;
-}
-
-
-
-
-
-/*
- * NJ_free_vertex() -
- *
- * Free the vertex data structure
- *
- */
-void
-NJ_free_vertex(NJ_VERTEX *vertex) {
-
- if(vertex) {
- if(vertex->nodes_handle) {
- free(vertex->nodes_handle);
- }
- free(vertex);
- }
-
- return;
-}
-
-
-
-
-
-
-
-
-
-/*
- *
- * NJ_min_transform() - Find the smallest transformed value to identify
- * which nodes to join.
- *
- * INPUTS:
- * -------
- * dmat -- The distance matrix
- *
- * RETURNS:
- * --------
- * <float> -- The minimimum transformed distance
- * ret_i -- The row of the smallest transformed distance (by reference)
- * ret_j -- The col of the smallest transformed distance (by reference)
- *
- *
- * DESCRIPTION:
- * ------------
- *
- * Used only with traditional Neighbor-Joining, this function checks the entire
- * working distance matrix and identifies the smallest transformed distance.
- * This requires traversing the entire diagonal matrix, which is itself a
- * O(N^2) operation.
- *
- */
-float
-NJ_min_transform(DMAT *dmat,
- long int *ret_i,
- long int *ret_j) {
-
- long int i, j; /* indices used for looping */
- long int tmp_i = 0;/* to limit pointer dereferencing */
- long int tmp_j = 0;/* to limit pointer dereferencing */
- float smallest; /* track the smallest trans. dist */
- float curval; /* the current trans. dist in loop */
-
- float *ptr; /* pointer into distance matrix */
- float *r2; /* pointer to r2 matrix for computing transformed dists */
-
- smallest = (float)HUGE_VAL;
-
- /* track these here to limit pointer dereferencing in inner loop */
- ptr = dmat->val;
- r2 = dmat->r2;
-
- /* for every row */
- for(i=0;i<dmat->size;i++) {
- ptr++; /* skip diagonal */
- for(j=i+1;j<dmat->size;j++) { /* for every column */
-
- /* find transformed distance in matrix at i, j */
- curval = *(ptr++) - (r2[i] + r2[j]);
-
- /* if the transformed distanance is less than the known minimum */
- if(curval < smallest) {
-
- smallest = curval;
- tmp_i = i;
- tmp_j = j;
- }
- }
- }
-
- /* pass back (by reference) the coords of the min. transformed distance */
- *ret_i = tmp_i;
- *ret_j = tmp_j;
-
- return(smallest); /* return the min transformed distance */
-}
-
-
-
-
-
--- /dev/null
+
+/*
+ * clearcut.c
+ *
+ * $Id$
+ *
+ *****************************************************************************
+ *
+ * Copyright (c) 2004, Luke Sheneman
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * + Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * + Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * + The names of its contributors may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************
+ *
+ * An implementation of the Relaxed Neighbor-Joining algorithm
+ * of Evans, J., Sheneman, L., and Foster, J.
+ *
+ *
+ * AUTHOR:
+ *
+ * Luke Sheneman
+ * sheneman@cs.uidaho.edu
+ *
+ */
+
+
+
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+#include <sys/time.h>
+#include <float.h>
+
+#include "distclearcut.h"
+#include "dmat.h"
+#include "fasta.h"
+#include "cmdargs.h"
+#include "common.h"
+#include "clearcut.h"
+#include "prng.h"
+
+
+/*
+ * main() -
+ *
+ * The entry point to the program.
+ *
+ */
+int clearcut_main(int argc, char *argv[]) {
+
+ DMAT *dmat; /* The working distance matrix */
+ DMAT *dmat_backup = NULL;/* A backup distance matrix */
+ NJ_TREE *tree; /* The phylogenetic tree */
+ NJ_ARGS *nj_args; /* Structure for holding command-line arguments */
+ long int i;
+
+ /* some variables for tracking time */
+ struct timeval tv;
+ unsigned long long startUs, endUs;
+
+
+ /* check and parse supplied command-line arguments */
+ nj_args = NJ_handle_args(argc, argv);
+
+ if(!nj_args) {
+ fprintf(stderr, "Clearcut: Error processing command-line arguments.\n");
+ exit(-1);
+ }
+
+ /* for verbose reporting, print the random number seed to stdout */
+ if(nj_args->verbose_flag) {
+ printf("PRNG SEED: %d\n", nj_args->seed);
+ }
+
+ /* Initialize Mersenne Twister PRNG */
+ init_genrand(nj_args->seed);
+
+
+ switch(nj_args->input_mode) {
+
+ /* If the input type is a distance matrix */
+ case NJ_INPUT_MODE_DISTANCE:
+
+ /* parse the distance matrix */
+ dmat = NJ_parse_distance_matrix(nj_args);
+ if(!dmat) {
+ exit(-1);
+ }
+
+ break;
+
+ /* If the input type is a multiple sequence alignment */
+ case NJ_INPUT_MODE_ALIGNED_SEQUENCES:
+
+ /* build a distance matrix from a multiple sequence alignment */
+ dmat = NJ_build_distance_matrix(nj_args);
+ if(!dmat) {
+ fprintf(stderr, "Clearcut: Failed to build distance matrix from alignment.\n");
+ exit(-1);
+ }
+
+ break;
+
+ default:
+
+ fprintf(stderr, "Clearcut: Could not determine how to process input\n");
+ exit(-1);
+ }
+
+ /*
+ * Output the computed distance matrix,
+ * if the user specified one.
+ */
+ if(nj_args->matrixout) {
+ NJ_output_matrix(nj_args, dmat);
+ }
+
+ /*
+ * If we are going to generate multiple trees from
+ * the same distance matrix, we need to make a backup
+ * of the original distance matrix.
+ */
+ if(nj_args->ntrees > 1) {
+ dmat_backup = NJ_dup_dmat(dmat);
+ }
+
+ /* process n trees */
+ for(i=0;i<nj_args->ntrees;i++) {
+
+ /*
+ * If the user has specified matrix shuffling, we need
+ * to randomize the distance matrix
+ */
+ if(nj_args->shuffle) {
+ NJ_shuffle_distance_matrix(dmat);
+ }
+
+ /* RECORD THE PRECISE TIME OF THE START OF THE NEIGHBOR-JOINING */
+ gettimeofday(&tv, NULL);
+ startUs = ((unsigned long long) tv.tv_sec * 1000000ULL)
+ + ((unsigned long long) tv.tv_usec);
+
+
+ /*
+ * Invoke either the Relaxed Neighbor-Joining algorithm (default)
+ * or the "traditional" Neighbor-Joining algorithm
+ */
+ if(nj_args->neighbor) {
+ tree = NJ_neighbor_joining(nj_args, dmat);
+ } else {
+ tree = NJ_relaxed_nj(nj_args, dmat);
+ }
+
+ if(!tree) {
+ fprintf(stderr, "Clearcut: Failed to construct tree.\n");
+ exit(0);
+ }
+
+ /* RECORD THE PRECISE TIME OF THE END OF THE NEIGHBOR-JOINING */
+ gettimeofday(&tv, NULL);
+ endUs = ((unsigned long long) tv.tv_sec * 1000000ULL)
+ + ((unsigned long long) tv.tv_usec);
+
+ /* print the time taken to perform the neighbor join */
+ if(nj_args->verbose_flag) {
+ if(nj_args->neighbor) {
+ fprintf(stderr, "NJ tree built in %llu.%06llu secs\n",
+ (endUs - startUs) / 1000000ULL,
+ (endUs - startUs) % 1000000ULL);
+ } else {
+ fprintf(stderr, "RNJ tree built in %llu.%06llu secs\n",
+ (endUs - startUs) / 1000000ULL,
+ (endUs - startUs) % 1000000ULL);
+ }
+ }
+
+ /* Output the neighbor joining tree here */
+ NJ_output_tree(nj_args, tree, dmat, i);
+
+ NJ_free_tree(tree); /* Free the tree */
+ NJ_free_dmat(dmat); /* Free the working distance matrix */
+
+ /*
+ * If we need to do another iteration, lets re-initialize
+ * our working distance matrix.
+ */
+ if(nj_args->ntrees > 1 && i<(nj_args->ntrees-1) ) {
+ dmat = NJ_dup_dmat(dmat_backup);
+ }
+ }
+
+ /* Free the backup distance matrix */
+ if(nj_args->ntrees > 1) {
+ NJ_free_dmat(dmat_backup);
+ }
+
+ /* If verbosity, describe where the tree output is */
+ if(nj_args->verbose_flag) {
+ if(nj_args->neighbor) {
+ printf("NJ tree(s) in %s\n", nj_args->outfilename);
+ } else {
+ printf("Relaxed NJ tree(s) in %s\n", nj_args->outfilename);
+ }
+ }
+
+ //exit(0);
+}
+
+
+
+
+
+/*
+ * NJ_find_hmin() - Find minimum transformed values along horizontal
+ *
+ *
+ * INPUTS:
+ * -------
+ * dmat -- The distance matrix
+ * a -- The index of the specific taxon in the distance matrix
+ *
+ * RETURNS:
+ * --------
+ * <float> -- The value of the selected minimum
+ * min -- Used to transport the index of the minima out
+ * of the function (by reference)
+ * hmincount -- Return the number of minima along the horizontal
+ * (by reference)
+ *
+ *
+ * DESCRIPTION:
+ * ------------
+ *
+ * A fast, inline function to find the smallest transformed value
+ * along the "horizontal" portion of an entry in a distance matrix.
+ *
+ * Distance matrices are stored internally as continguously-allocated
+ * upper-diagonal structures. With the exception of the taxa at
+ * row 0 of this upper-diagonal matrix, all taxa have both a horizontal
+ * and vertical component in the distance matrix. This function
+ * scans the horizonal portion of the entry in the distance matrix
+ * for the specified taxon and finds the minimum transformed value
+ * along that horizontal component.
+ *
+ * Since multiple minima can exist along the horizontal portion
+ * of the entry, I consider all minima and break ties
+ * stochastically to help avoid systematic bias.
+ *
+ * Just searching along the horizontal portion of a row is very fast
+ * since the data is stored linearly and contiguously in memory and
+ * cache locality is exploited in the distance matrix representation.
+ *
+ * Look at nj.h for more information on how the distance matrix
+ * is architected.
+ *
+ */
+static inline
+float
+NJ_find_hmin(DMAT *dmat,
+ long int a,
+ long int *min,
+ long int *hmincount) {
+
+ long int i; /* index variable for looping */
+ int size; /* current size of distance matrix */
+ int mindex = 0; /* holds the current index to the chosen minimum */
+ float curval; /* used to hold current transformed values */
+ float hmin; /* the value of the transformed minimum */
+
+ float *ptr, *r2, *val; /* pointers used to reduce dereferencing in inner loop */
+
+ /* values used for stochastic selection among multiple minima */
+ float p, x;
+ long int smallcnt;
+
+ /* initialize the min to something large */
+ hmin = (float)HUGE_VAL;
+
+ /* setup some pointers to limit dereferencing later */
+ r2 = dmat->r2;
+ val = dmat->val;
+ size = dmat->size;
+
+ /* initialize values associated with minima tie breaking */
+ p = 1.0;
+ smallcnt = 0;
+
+
+ ptr = &(val[NJ_MAP(a, a+1, size)]); /* at the start of the horiz. part */
+ for(i=a+1;i<size;i++) {
+
+ curval = *(ptr++) - (r2[a] + r2[i]); /* compute transformed distance */
+
+ if(NJ_FLT_EQ(curval, hmin)) { /* approx. equal */
+
+ smallcnt++;
+
+ p = 1.0/(float)smallcnt;
+ x = genrand_real2();
+
+ /* select this minimum in a way which is proportional to
+ the number of minima found along the row so far */
+ if( x < p ) {
+ mindex = i;
+ }
+
+ } else if (curval < hmin) {
+
+ smallcnt = 1;
+ hmin = curval;
+ mindex = i;
+ }
+ }
+
+ /* save off the the minimum index to be returned via reference */
+ *min = mindex;
+
+ /* save off the number of minima */
+ *hmincount = smallcnt;
+
+ /* return the value of the smallest tranformed distance */
+ return(hmin);
+}
+
+
+
+
+
+
+
+
+/*
+ * NJ_find_vmin() - Find minimum transformed distance along vertical
+ *
+ *
+ * INPUTS:
+ * -------
+ * dmat -- The distance matrix
+ * a -- The index of the specific taxon in the distance matrix
+ *
+ *
+ * RETURNS:
+ * --------
+ * <float> -- The value of the selected minimum
+ * min -- Used to transport the index of the minima out
+ * of the function (by reference)
+ * vmincount -- The number of minima along the vertical
+ * return by reference.
+ *
+ * DESCRIPTION:
+ * ------------
+ *
+ * A fast, inline function to find the smallest transformed value
+ * along the "vertical" portion of an entry in a distance matrix.
+ *
+ * Distance matrices are stored internally as continguously-allocated
+ * upper-diagonal matrices. With the exception of the taxa at
+ * row 0 of this upper-diagonal matrix, all taxa have both a horizontal
+ * and vertical component in the distance matrix. This function
+ * scans the vertical portion of the entry in the distance matrix
+ * for the specified taxon and finds the minimum transformed value
+ * along that vertical component.
+ *
+ * Since multiple minima can exist along the vertical portion
+ * of the entry, I consider all minima and break ties
+ * stochastically to help avoid systematic bias.
+ *
+ * Due to cache locality reasons, searching along the vertical
+ * component is going to be considerably slower than searching
+ * along the horizontal.
+ *
+ * Look at nj.h for more information on how the distance matrix
+ * is architected.
+ *
+ */
+static inline
+float
+NJ_find_vmin(DMAT *dmat,
+ long int a,
+ long int *min,
+ long int *vmincount) {
+
+ long int i; /* index variable used for looping */
+ long int size; /* track the size of the matrix */
+ long int mindex = 0;/* track the index to the minimum */
+ float curval; /* track value of current transformed distance */
+ float vmin; /* the index to the smallest "vertical" minimum */
+
+ /* pointers which are used to reduce pointer dereferencing in inner loop */
+ float *ptr, *r2, *val;
+
+ /* values used in stochastically breaking ties */
+ float p, x;
+ long int smallcnt;
+
+ /* initialize the vertical min to something really big */
+ vmin = (float)HUGE_VAL;
+
+ /* save off some values to limit dereferencing later */
+ r2 = dmat->r2;
+ val = dmat->val;
+ size = dmat->size;
+
+ p = 1.0;
+ smallcnt = 0;
+
+ /* start on the first row and work down */
+ ptr = &(val[NJ_MAP(0, a, size)]);
+ for(i=0;i<a;i++) {
+
+ curval = *ptr - (r2[i] + r2[a]); /* compute transformed distance */
+
+ if(NJ_FLT_EQ(curval, vmin)) { /* approx. equal */
+
+ smallcnt++;
+
+ p = 1.0/(float)smallcnt;
+ x = genrand_real2();
+
+ /* break ties stochastically to avoid systematic bias */
+ if( x < p ) {
+ mindex = i;
+ }
+
+ } else if (curval < vmin) {
+
+ smallcnt = 1;
+ vmin = curval;
+ mindex = i;
+ }
+
+ /* increment our working pointer to the next row down */
+ ptr += size-i-1;
+ }
+
+ /* pass back the index to the minimum found so far (by reference) */
+ *min = mindex;
+
+ /* pass back the number of minima along the vertical */
+ *vmincount = smallcnt;
+
+ /* return the value of the smallest transformed distance */
+ return(vmin);
+}
+
+
+
+
+/*
+ * NJ_permute() - Generate random permutation using the provably unbiased
+ * Fisher-Yates Shuffle.
+ *
+ * INPUTS:
+ * -------
+ * perm -- A pointer to the array of long ints which will be filled.
+ * size -- the length of the permutation vector
+ *
+ *
+ * OUTPUTS:
+ * -------
+ * NONE
+ *
+ *
+ * DESCRIPTION:
+ * ------------
+ *
+ * Return a permuted list of numbers from 0 through size.
+ * This is accomplished by initializing the permutation
+ * as an ordered list of integers and then iterating
+ * through and swapping two values selected according to the
+ * Fisher-Yates method.
+ *
+ * This unbiased method for random permutation generation is
+ * discussed in:
+ *
+ * Donald E. Knuth, The Art of Computer Programming,
+ * Addison-Wesley, Volumes 1, 2, and 3, 3rd edition, 1998
+ *
+ */
+static inline
+void
+NJ_permute(long int *perm,
+ long int size) {
+
+ long int i; /* index used for looping */
+ long int swap; /* we swap values to generate permutation */
+ long int tmp; /* used for swapping values */
+
+
+ /* check to see if vector of long ints is valid */
+ if(!perm) {
+ fprintf(stderr, "Clearcut: NULL permutation pointer in NJ_permute()\n");
+ exit(-1);
+ }
+
+ /* init permutation as an ordered list of integers */
+ for(i=0;i<size;i++) {
+ perm[i] = i;
+ }
+
+ /*
+ * Iterate across the array from i = 0 to size -1, swapping ith element
+ * with a randomly chosen element from a changing range of possible values
+ */
+ for(i=0;i<size;i++) {
+
+ /* choose which element we will swap with */
+ swap = i + NJ_genrand_int31_top(size-i);
+
+ /* swap elements here */
+ if(i != swap) {
+ tmp = perm[swap];
+ perm[swap] = perm[i];
+ perm[i] = tmp;
+ }
+ }
+
+ return;
+}
+
+
+
+
+
+/*
+ * NJ_compute_r() - Compute post-join changes to r-vector. In this
+ * case, we decrement all of the accumulated distances
+ * in the r-vector for the two nodes that were
+ * recently joined (i.e. x, y)
+ *
+ * INPUTS:
+ * -------
+ * dmat -- The distance matrix
+ * a -- The index of one of the taxa that were joined
+ * b -- The index of the other taxa that was joined
+ *
+ * RETURNS:
+ * --------
+ * NONE
+ *
+ * DESCRIPTION:
+ * ------------
+ *
+ * This vector of floats is used as a summary of overall distances from
+ * each entry in the distance matrix to every other entry. These values
+ * are then used when computing the transformed distances from which
+ * decisions concerning joining are made.
+ *
+ * For speed, we don't recompute r from scratch. Instead, we decrement
+ * all entries in r by the appropriate amount. That is, r[i] -= dist(i, a)
+ * and r[i] -= dist(i, b).
+ *
+ * As a speed optimization, I process the rows altogether for cache locality
+ * purposes, and then process columns.
+ *
+ * The processing of the scaled r matrix (r2) is handled on-the-fly elsewhere.
+ *
+ */
+static inline
+void
+NJ_compute_r(DMAT *dmat,
+ long int a,
+ long int b) {
+
+ long int i; /* a variable used in indexing */
+ float *ptrx, *ptry; /* pointers into the distance matrix */
+
+ /* some variables to limit pointer dereferencing in loop */
+ long int size;
+ float *r, *val;
+
+ /* to limit pointer dereferencing */
+ size = dmat->size;
+ val = dmat->val;
+ r = dmat->r+a+1;
+
+ /*
+ * Loop through the rows and decrement the stored r values
+ * by the distances stored in the rows and columns of the distance
+ * matrix which are being removed post-join.
+ *
+ * We do the rows altogether in order to benefit from cache locality.
+ */
+ ptrx = &(val[NJ_MAP(a, a+1, size)]);
+ ptry = &(val[NJ_MAP(b, b+1, size)]);
+
+ for(i=a+1;i<size;i++) {
+ *r -= *(ptrx++);
+
+ if(i>b) {
+ *r -= *(ptry++);
+ }
+
+ r++;
+ }
+
+ /* Similar to the above loop, we now do the columns */
+ ptrx = &(val[NJ_MAP(0, a, size)]);
+ ptry = &(val[NJ_MAP(0, b, size)]);
+ r = dmat->r;
+ for(i=0;i<b;i++) {
+ if(i<a) {
+ *r -= *ptrx;
+ ptrx += size-i-1;
+ }
+
+ *r -= *ptry;
+ ptry += size-i-1;
+ r++;
+ }
+
+ return;
+}
+
+
+
+
+
+/*
+ * NJ_check_additivity() - Check to make sure that addivity preserved by join
+ *
+ *
+ * INPUTS:
+ * -------
+ * dmat -- distance matrix
+ * a -- index into dmat for one of the rows to be joined
+ * b -- index into dmat for another row to be joined
+ *
+ * OUTPUTS:
+ * --------
+ * int 1 if join adheres to additivity constraint
+ * 0 if join does breaks additivity
+ *
+ * DESCRIPTION:
+ * ------------
+ *
+ * Here we perform the check to make sure that by joining a and b we do not
+ * also break consistency (i.e. preserves additivity) with the distances between
+ * the taxa in the new clade and other nodes in the tree. This is done quite
+ * efficiently by looking up the untransformed distance between node b and
+ * some other "target" taxa in the distance matrix (which is not a nor b) and
+ * comparing that distance to the distance computed by finding the distance
+ * from node a to the proposed internal node "x" which joins (a,b).
+ *
+ * If dist(x,b) + dist (b, target) == dist(b, target) then additivity is
+ * preserved, otherwise, additivity is not preserved. If we are in
+ * additivity mode, this join should be rejected.
+ *
+ */
+static inline
+int
+NJ_check_additivity(DMAT *dmat,
+ long int a,
+ long int b) {
+
+ float a2clade, b2clade;
+ float clade_dist;
+ long int target;
+
+
+ /* determine target taxon here */
+ if(b == dmat->size-1) {
+ /* if we can't do a row here, lets do a column */
+ if(a==0) {
+ if(b==1) {
+ target = 2;
+ } else {
+ target = 1;
+ }
+ } else {
+ target = 0;
+ }
+ } else {
+ target = b+1;
+ }
+
+
+ /* distance between a and the root of clade (a,b) */
+ a2clade =
+ ( (dmat->val[NJ_MAP(a, b, dmat->size)]) +
+ (dmat->r2[a] - dmat->r2[b]) ) / 2.0;
+
+ /* distance between b and the root of clade (a,b) */
+ b2clade =
+ ( (dmat->val[NJ_MAP(a, b, dmat->size)]) +
+ (dmat->r2[b] - dmat->r2[a]) ) / 2.0;
+
+ /* distance between the clade (a,b) and the target taxon */
+ if(b<target) {
+
+ /* compute the distance from the clade root to the target */
+ clade_dist =
+ ( (dmat->val[NJ_MAP(a, target, dmat->size)] - a2clade) +
+ (dmat->val[NJ_MAP(b, target, dmat->size)] - b2clade) ) / 2.0;
+
+ /*
+ * Check to see that distance from clade root to target + distance from
+ * b to clade root are equal to the distance from b to the target
+ */
+ if(NJ_FLT_EQ(dmat->val[NJ_MAP(b, target, dmat->size)],
+ (clade_dist + b2clade))) {
+ return(1); /* join is legitimate */
+ } else {
+ return(0); /* join is illigitimate */
+ }
+
+ } else {
+
+ /* compute the distance from the clade root to the target */
+ clade_dist =
+ ( (dmat->val[NJ_MAP(target, a, dmat->size)] - a2clade) +
+ (dmat->val[NJ_MAP(target, b, dmat->size)] - b2clade) ) / 2.0;
+
+ /*
+ * Check to see that distance from clade root to target + distance from
+ * b to clade root are equal to the distance from b to the target
+ */
+ if(NJ_FLT_EQ(dmat->val[NJ_MAP(target, b, dmat->size)],
+ (clade_dist + b2clade))) {
+ return(1); /* join is legitimate */
+ } else {
+ return(0); /* join is illegitimate */
+ }
+ }
+}
+
+
+
+
+
+
+
+/*
+ * NJ_check() - Check to see if two taxa can be joined
+
+ *
+ * INPUTS:
+ * -------
+ * nj_args -- Pointer to the data structure holding command-line args
+ * dmat -- distance matrix
+ * a -- index into dmat for one of the rows to be joined
+ * b -- index into dmat for another row to be joined
+ * min -- the minimum value found
+ * additivity -- a flag (0 = not additive mode, 1 = additive mode)
+ *
+ * OUTPUTS:
+ * --------
+ * int 1 if join is okay
+ * 0 if join is not okay
+ *
+ * DESCRIPTION:
+ * ------------
+ *
+ * This function ultimately takes two rows and makes sure that the
+ * intersection of those two rows, which has a transformed distance of
+ * "min", is actually the smallest (or equal to the smallest)
+ * transformed distance for both rows (a, b). If so, it returns
+ * 1, else it returns 0.
+ *
+ * Basically, we want to join two rows only if the minimum
+ * transformed distance on either row is at the intersection of
+ * those two rows.
+ *
+ */
+static inline
+int
+NJ_check(NJ_ARGS *nj_args,
+ DMAT *dmat,
+ long int a,
+ long int b,
+ float min,
+ int additivity) {
+
+
+ long int i, size;
+ float *ptr, *val, *r2;
+
+
+ /* some aliases for speed and readability reasons */
+ val = dmat->val;
+ r2 = dmat->r2;
+ size = dmat->size;
+
+
+ /* now determine if joining a, b will result in broken distances */
+ if(additivity) {
+ if(!NJ_check_additivity(dmat, a, b)) {
+ return(0);
+ }
+ }
+
+ /* scan the horizontal of row b, punt if anything < min */
+ ptr = &(val[NJ_MAP(b, b+1, size)]);
+ for(i=b+1;i<size;i++) {
+ if( NJ_FLT_LT( (*ptr - (r2[b] + r2[i])), min) ) {
+ return(0);
+ }
+ ptr++;
+ }
+
+ /* scan the vertical component of row a, punt if anything < min */
+ if(nj_args->norandom) { /* if we are doing random joins, we checked this */
+ ptr = val + a;
+ for(i=0;i<a;i++) {
+ if( NJ_FLT_LT( (*ptr - (r2[i] + r2[a])), min) ) {
+ return(0);
+ }
+ ptr += size-i-1;
+ }
+ }
+
+ /* scan the vertical component of row b, punt if anything < min */
+ ptr = val + b;
+ for(i=0;i<b;i++) {
+ if( NJ_FLT_LT( (*ptr - (r2[i] + r2[b])), min) && i!=a) {
+ return(0);
+ }
+ ptr += size-i-1;
+ }
+
+ return(1);
+}
+
+
+
+
+
+
+
+/*
+ * NJ_collapse() - Collapse the distance matrix by removing
+ * rows a and b from the distance matrix and
+ * replacing them with a single new row which
+ * represents the internal node joining a and b
+ *
+ *
+ * INPUTS:
+ * -------
+ * dmat -- A pointer to the distance matrix
+ * vertex -- A pointer to the vertex vector (vector of tree nodes)
+ * which is used in constructing the tree
+ * a -- An index to a row in the distance matrix from which we
+ * joined. This row will be collapsed.
+ * b -- An index to a row in the distance matrix from which we
+ * joined. This row will be collapsed.
+ *
+ * RETURNS:
+ * --------
+ * NONE
+ *
+ *
+ * DESCRIPTION:
+ * ------------
+ *
+ * This function collapses the distance matrix in a way which optimizes
+ * cache locality and ultimately gives us a speed improvement due to
+ * cache. At this point, we've decided to join rows a and b from
+ * the distance matrix. We will remove rows a and b from the distance
+ * matrix and replace them with a new row which represents the internal
+ * node which joins rows a and b together.
+ *
+ * We always keep the matrix as compact as possible in order to
+ * get good performance from our cache in subsequent operations. Cache
+ * is the key to good performance here.
+ *
+ * Key Steps:
+ * ----------
+ *
+ * 1) Fill the "a" row with the new distances of the internal node
+ * joining a and b to all other rows.
+ * 2) Copy row 0 into what was row b
+ * 3) Increment the pointer to the start of the distance matrix
+ * by one row.
+ * 4) Decrement the size of the matrix by one row.
+ * 5) Do roughly the same thing to the r vector in order to
+ * keep it in sync with the distance matrix.
+ * 6) Compute the scaled r vector (r2) based on the updated
+ * r vector
+ *
+ * This keeps the distance matrix as compact as possible in memory, and
+ * is a relatively fast operation.
+ *
+ * This function requires that a < b
+ *
+ */
+static inline
+void
+NJ_collapse(DMAT *dmat,
+ NJ_VERTEX *vertex,
+ long int a,
+ long int b) {
+
+
+ long int i; /* index used for looping */
+ long int size; /* size of dmat --> reduce pointer dereferencing */
+ float a2clade; /* distance from a to the new node that joins a and b */
+ float b2clade; /* distance from b to the new node that joins a and b */
+ float cval; /* stores distance information during loop */
+ float *vptr; /* pointer to elements in first row of dist matrix */
+ float *ptra; /* pointer to elements in row a of distance matrix */
+ float *ptrb; /* pointer to elements in row b of distance matrix */
+
+ float *val, *r, *r2; /* simply used to limit pointer dereferencing */
+
+
+ /* We must assume that a < b */
+ if(a >= b) {
+ fprintf(stderr, "Clearcut: (a<b) constraint check failed in NJ_collapse()\n");
+ exit(0);
+ }
+
+ /* some shortcuts to help limit dereferencing */
+ val = dmat->val;
+ r = dmat->r;
+ r2 = dmat->r2;
+ size = dmat->size;
+
+ /* compute the distance from the clade components (a, b) to the new node */
+ a2clade =
+ ( (val[NJ_MAP(a, b, size)]) + (dmat->r2[a] - dmat->r2[b]) ) / 2.0;
+ b2clade =
+ ( (val[NJ_MAP(a, b, size)]) + (dmat->r2[b] - dmat->r2[a]) ) / 2.0;
+
+
+ r[a] = 0.0; /* we are removing row a, so clear dist. in r */
+
+ /*
+ * Fill the horizontal part of the "a" row and finish computing r and r2
+ * we handle the horizontal component first to maximize cache locality
+ */
+ ptra = &(val[NJ_MAP(a, a+1, size)]); /* start ptra at the horiz. of a */
+ ptrb = &(val[NJ_MAP(a+1, b, size)]); /* start ptrb at comparable place */
+ for(i=a+1;i<size;i++) {
+
+ /*
+ * Compute distance from new internal node to others in
+ * the distance matrix.
+ */
+ cval =
+ ( (*ptra - a2clade) +
+ (*ptrb - b2clade) ) / 2.0;
+
+ /* incr. row b pointer differently depending on where i is in loop */
+ if(i<b) {
+ ptrb += size-i-1; /* traverse vertically by incrementing by row */
+ } else {
+ ptrb++; /* traverse horiz. by incrementing by column */
+ }
+
+ /* assign the newly computed distance and increment a ptr by a column */
+ *(ptra++) = cval;
+
+ /* accumulate the distance onto the r vector */
+ r[a] += cval;
+ r[i] += cval;
+
+ /* scale r2 on the fly here */
+ r2[i] = r[i]/(float)(size-3);
+ }
+
+ /* fill the vertical part of the "a" column and finish computing r and r2 */
+ ptra = val + a; /* start at the top of the columb for "a" */
+ ptrb = val + b; /* start at the top of the columb for "b" */
+ for(i=0;i<a;i++) {
+
+ /*
+ * Compute distance from new internal node to others in
+ * the distance matrix.
+ */
+ cval =
+ ( (*ptra - a2clade) +
+ (*ptrb - b2clade) ) / 2.0;
+
+ /* assign the newly computed distance and increment a ptr by a column */
+ *ptra = cval;
+
+ /* accumulate the distance onto the r vector */
+ r[a] += cval;
+ r[i] += cval;
+
+ /* scale r2 on the fly here */
+ r2[i] = r[i]/(float)(size-3);
+
+ /* here, always increment by an entire row */
+ ptra += size-i-1;
+ ptrb += size-i-1;
+ }
+
+
+ /* scale r2 on the fly here */
+ r2[a] = r[a]/(float)(size-3);
+
+
+
+ /*
+ * Copy row 0 into row b. Again, the code is structured into two
+ * loops to maximize cache locality for writes along the horizontal
+ * component of row b.
+ */
+ vptr = val;
+ ptrb = val + b;
+ for(i=0;i<b;i++) {
+ *ptrb = *(vptr++);
+ ptrb += size-i-1;
+ }
+ vptr++; /* skip over the diagonal */
+ ptrb = &(val[NJ_MAP(b, b+1, size)]);
+ for(i=b+1;i<size;i++) {
+ *(ptrb++) = *(vptr++);
+ }
+
+ /*
+ * Collapse r here by copying contents of r[0] into r[b] and
+ * incrementing pointer to the beginning of r by one row
+ */
+ r[b] = r[0];
+ dmat->r = r+1;
+
+
+ /*
+ * Collapse r2 here by copying contents of r2[0] into r2[b] and
+ * incrementing pointer to the beginning of r2 by one row
+ */
+ r2[b] = r2[0];
+ dmat->r2 = r2+1;
+
+ /* increment dmat pointer to next row */
+ dmat->val += size;
+
+ /* decrement the total size of the distance matrix by one row */
+ dmat->size--;
+
+ return;
+}
+
+
+
+
+
+
+
+
+
+/*
+ * NJ_neighbor_joining() - Perform a traditional Neighbor-Joining
+ *
+ *
+ * INPUTS:
+ * -------
+ * nj_args -- A pointer to a structure containing the command-line arguments
+ * dmat -- A pointer to the distance matrix
+ *
+ * RETURNS:
+ * --------
+ * NJ_TREE * -- A pointer to the Neighbor-Joining tree.
+ *
+ * DESCRIPTION:
+ * ------------
+ *
+ * This function performs a traditional Neighbor-Joining operation in which
+ * the distance matrix is exhaustively searched for the global minimum
+ * transformed distance. The two nodes which intersect at the global
+ * minimum transformed distance are then joined and the distance
+ * matrix is collapsed. This process continues until there are only
+ * two nodes left, at which point those nodes are joined.
+ *
+ */
+NJ_TREE *
+NJ_neighbor_joining(NJ_ARGS *nj_args,
+ DMAT *dmat) {
+
+
+ NJ_TREE *tree = NULL;
+ NJ_VERTEX *vertex = NULL;
+
+ long int a, b;
+ float min;
+
+
+ /* initialize the r and r2 vectors */
+ NJ_init_r(dmat);
+
+ /* allocate and initialize our vertex vector used for tree construction */
+ vertex = NJ_init_vertex(dmat);
+ if(!vertex) {
+ fprintf(stderr, "Clearcut: Could not initialize vertex in NJ_neighbor_joining()\n");
+ return(NULL);
+ }
+
+ /* we iterate until the working distance matrix has only 2 entries */
+ while(vertex->nactive > 2) {
+
+ /*
+ * Find the global minimum transformed distance from the distance matrix
+ */
+ min = NJ_min_transform(dmat, &a, &b);
+
+ /*
+ * Build the tree by removing nodes a and b from the vertex array
+ * and inserting a new internal node which joins a and b. Collapse
+ * the vertex array similarly to how the distance matrix and r and r2
+ * are compacted.
+ */
+ NJ_decompose(dmat, vertex, a, b, 0);
+
+ /* decrement the r and r2 vectors by the distances corresponding to a, b */
+ NJ_compute_r(dmat, a, b);
+
+ /* compact the distance matrix and the r and r2 vectors */
+ NJ_collapse(dmat, vertex, a, b);
+ }
+
+ /* Properly join the last two nodes on the vertex list */
+ tree = NJ_decompose(dmat, vertex, 0, 1, NJ_LAST);
+
+ /* return the computed tree to the calling function */
+ return(tree);
+}
+
+
+
+
+
+
+
+
+/*
+ * NJ_relaxed_nj() - Construct a tree using the Relaxed Neighbor-Joining
+ *
+ * INPUTS:
+ * -------
+ * nj_args -- A pointer to a data structure containing the command-line args
+ * dmat -- A pointer to the distance matrix
+ *
+ * RETURNS:
+ * --------
+ *
+ * NJ_TREE * -- A pointer to a Relaxed Neighbor-Joining tree
+ *
+ * DESCRIPTION:
+ * ------------
+ *
+ * This function implements the Relaxed Neighbor-Joining algorithm of
+ * Evans, J., Sheneman, L., and Foster, J.
+ *
+ * Relaxed Neighbor-Joining works by choosing a local minimum transformed
+ * distance when determining when to join two nodes. (Traditional
+ * Neighbor-Joining chooses a global minimum transformed distance).
+ *
+ * The algorithm shares the property with traditional NJ that if the
+ * input distances are additive (self-consistent), then the algorithm
+ * will manage to construct the true tree consistent with the additive
+ * distances. Additivity state is tracked and every proposed join is checked
+ * to make sure it maintains additivity constraints. If no
+ * additivity-preserving join is possible in a single pass, then the distance
+ * matrix is non-additive, and additivity checking is abandoned.
+ *
+ * The algorithm will either attempt joins randomly, or it will perform joins
+ * in a particular order. The default behavior is to perform joins randomly,
+ * but this can be switched off with a command-line switch.
+ *
+ * For randomized joins, all attempts are made to alleviate systematic bias
+ * for the choice of rows to joins. All tie breaking is done in a way which
+ * is virtually free of bias.
+ *
+ * To perform randomized joins, a random permutation is constructed which
+ * specifies the order in which to attempt joins. I iterate through the
+ * random permutation, and for each row in the random permutation, I find
+ * the minimum transformed distance for that row. If there are multiple
+ * minima, I break ties evenly. For the row which intersects our
+ * randomly chosen row at the chosen minimum, if we are are still in
+ * additivity mode, I check to see if joining the two rows will break
+ * our additivity constraints. If not, I check to see if there exists
+ * a transformed distance which is smaller than the minimum found on the
+ * original row. If there is, then we proceed through the random permutation
+ * trying additional rows in the random order specified in the permutation.
+ * If there is no smaller minimum transformed distance on either of the
+ * two rows, then we join them, collapse the distance matrix, and compute
+ * a new random permutation.
+ *
+ * If the entire random permutation is traversed and no joins are possible
+ * due to additivity constraints, then the distance matrix is not
+ * additive, and additivity constraint-checking is disabled.
+ *
+ */
+NJ_TREE *
+NJ_relaxed_nj(NJ_ARGS *nj_args,
+ DMAT *dmat) {
+
+
+ NJ_TREE *tree;
+ NJ_VERTEX *vertex;
+ long int a, b, t, bh, bv, i;
+ float hmin, vmin, hvmin;
+ float p, q, x;
+ int join_flag;
+ int additivity_mode;
+ long int hmincount, vmincount;
+ long int *permutation = NULL;
+
+
+
+ /* initialize the r and r2 vectors */
+ NJ_init_r(dmat);
+
+ additivity_mode = 1;
+
+ /* allocate the permutation vector, if we are in randomize mode */
+ if(!nj_args->norandom) {
+ permutation = (long int *)calloc(dmat->size, sizeof(long int));
+ if(!permutation) {
+ fprintf(stderr, "Clearcut: Memory allocation error in NJ_relaxed_nj()\n");
+ return(NULL);
+ }
+ }
+
+ /* allocate and initialize our vertex vector used for tree construction */
+ vertex = NJ_init_vertex(dmat);
+
+ /* loop until there are only 2 nodes left to join */
+ while(vertex->nactive > 2) {
+
+ switch(nj_args->norandom) {
+
+ /* RANDOMIZED JOINS */
+ case 0:
+
+ join_flag = 0;
+
+ NJ_permute(permutation, dmat->size-1);
+ for(i=0;i<dmat->size-1 && (vertex->nactive>2) ;i++) {
+
+ a = permutation[i];
+
+ /* find min trans dist along horiz. of row a */
+ hmin = NJ_find_hmin(dmat, a, &bh, &hmincount);
+ if(a) {
+ /* find min trans dist along vert. of row a */
+ vmin = NJ_find_vmin(dmat, a, &bv, &vmincount);
+ } else {
+ vmin = hmin;
+ bv = bh;
+ vmincount = 0;
+ }
+
+ if(NJ_FLT_EQ(hmin, vmin)) {
+
+ /*
+ * The minima along the vertical and horizontal are
+ * the same. Compute the proportion of minima along
+ * the horizonal (p) and the proportion of minima
+ * along the vertical (q).
+ *
+ * If the same minima exist along the horizonal and
+ * vertical, we break the tie in a way which is
+ * non-biased. That is, we break the tie based on the
+ * proportion of horiz. minima versus vertical minima.
+ *
+ */
+ p = (float)hmincount / ((float)hmincount + (float)vmincount);
+ q = 1.0 - p;
+ x = genrand_real2();
+
+ if(x < p) {
+ hvmin = hmin;
+ b = bh;
+ } else {
+ hvmin = vmin;
+ b = bv;
+ }
+ } else if(NJ_FLT_LT(hmin, vmin) ) {
+ hvmin = hmin;
+ b = bh;
+ } else {
+ hvmin = vmin;
+ b = bv;
+ }
+
+ if(NJ_check(nj_args, dmat, a, b, hvmin, additivity_mode)) {
+
+ /* swap a and b, if necessary, to make sure a < b */
+ if(b < a) {
+ t = a;
+ a = b;
+ b = t;
+ }
+
+ join_flag = 1;
+
+ /* join taxa from rows a and b */
+ NJ_decompose(dmat, vertex, a, b, 0);
+
+ /* collapse matrix */
+ NJ_compute_r(dmat, a, b);
+ NJ_collapse(dmat, vertex, a, b);
+
+ NJ_permute(permutation, dmat->size-1);
+ }
+ }
+
+ /* turn off additivity if go through an entire cycle without joining */
+ if(!join_flag) {
+ additivity_mode = 0;
+ }
+
+ break;
+
+
+
+ /* DETERMINISTIC JOINS */
+ case 1:
+
+ join_flag = 0;
+
+ for(a=0;a<dmat->size-1 && (vertex->nactive > 2) ;) {
+
+ /* find the min along the horizontal of row a */
+ hmin = NJ_find_hmin(dmat, a, &b, &hmincount);
+
+ if(NJ_check(nj_args, dmat, a, b, hmin, additivity_mode)) {
+
+ join_flag = 1;
+
+ /* join taxa from rows a and b */
+ NJ_decompose(dmat, vertex, a, b, 0);
+
+ /* collapse matrix */
+ NJ_compute_r(dmat, a, b);
+ NJ_collapse(dmat, vertex, a, b);
+
+ if(a) {
+ a--;
+ }
+
+ } else {
+ a++;
+ }
+ }
+
+ /* turn off additivity if go through an entire cycle without joining */
+ if(!join_flag) {
+ additivity_mode = 0;
+ }
+
+ break;
+ }
+
+ } /* WHILE */
+
+ /* Join the last two nodes on the vertex list */
+ tree = NJ_decompose(dmat, vertex, 0, 1, NJ_LAST);
+
+ if(nj_args->verbose_flag) {
+ if(additivity_mode) {
+ printf("Tree is additive\n");
+ } else {
+ printf("Tree is not additive\n");
+ }
+ }
+
+ if(vertex) {
+ NJ_free_vertex(vertex);
+ }
+
+ if(!nj_args->norandom && permutation) {
+ free(permutation);
+ }
+
+ return(tree);
+}
+
+
+
+
+
+
+/*
+ * NJ_print_distance_matrix() -
+ *
+ * Print a distance matrix
+ *
+ */
+void
+NJ_print_distance_matrix(DMAT *dmat) {
+
+ long int i, j;
+
+ printf("ntaxa: %ld\n", dmat->ntaxa);
+ printf(" size: %ld\n", dmat->size);
+
+ for(i=0;i<dmat->size;i++) {
+
+ for(j=0;j<dmat->size;j++) {
+ if(j>i) {
+ printf(" %0.4f", dmat->val[NJ_MAP(i, j, dmat->size)]);
+ } else {
+ printf(" -");
+ }
+ }
+
+
+ if(dmat->r && dmat->r2) {
+ printf("\t\t%0.4f", dmat->r[i]);
+ printf("\t%0.4f", dmat->r2[i]);
+
+
+
+ printf("\n");
+
+ for(j=0;j<dmat->size;j++) {
+ if(j>i) {
+ printf(" %0.4f", dmat->val[NJ_MAP(i, j, dmat->size)] - (dmat->r2[i] + dmat->r2[j]));
+ } else {
+ printf(" ");
+ }
+ }
+
+ printf("\n\n");
+ }
+ }
+
+ printf("\n");
+
+ return;
+}
+
+
+
+
+
+
+
+/*
+ * NJ_output_tree() -
+ *
+ * A wrapper for the function that really prints the tree,
+ * basically to get a newline in there conveniently. :-)
+ *
+ * Print n trees, as specified in command-args
+ * using "count" variable from 0 to (n-1)
+ *
+ */
+void
+NJ_output_tree(NJ_ARGS *nj_args,
+ NJ_TREE *tree,
+ DMAT *dmat,
+ long int count) {
+
+ FILE *fp;
+
+ if(nj_args->stdout_flag) {
+ fp = stdout;
+ } else {
+
+ if(count == 0) {
+ fp = fopen(nj_args->outfilename, "w"); /* open for writing */
+ } else {
+ fp = fopen(nj_args->outfilename, "a"); /* open for appending */
+ }
+
+ if(!fp) {
+ fprintf(stderr, "Clearcut: Failed to open outfile %s\n", nj_args->outfilename);
+ exit(-1);
+ }
+ }
+
+ NJ_output_tree2(fp, nj_args, tree, tree, dmat);
+ fprintf(fp, ";\n");
+
+ if(!nj_args->stdout_flag) {
+ fclose(fp);
+ }
+
+ return;
+}
+
+
+
+
+
+/*
+ * NJ_output_tree2() -
+ *
+ *
+ */
+void
+NJ_output_tree2(FILE *fp,
+ NJ_ARGS *nj_args,
+ NJ_TREE *tree,
+ NJ_TREE *root,
+ DMAT *dmat) {
+
+ if(!tree) {
+ return;
+ }
+
+ if(tree->taxa_index != NJ_INTERNAL_NODE) {
+
+ if(nj_args->expblen) {
+ fprintf(fp, "%s:%e",
+ dmat->taxaname[tree->taxa_index],
+ tree->dist);
+ } else {
+ fprintf(fp, "%s:%f",
+ dmat->taxaname[tree->taxa_index],
+ tree->dist);
+ }
+
+ } else {
+
+
+ if(tree->left && tree->right) {
+ fprintf(fp, "(");
+ }
+ if(tree->left) {
+ NJ_output_tree2(fp, nj_args, tree->left, root, dmat);
+ }
+
+ if(tree->left && tree->right) {
+ fprintf(fp, ",");
+ }
+ if(tree->right) {
+ NJ_output_tree2(fp, nj_args, tree->right, root, dmat);
+ }
+
+ if(tree != root->left) {
+ if(tree->left && tree->right) {
+ if(tree != root) {
+ if(nj_args->expblen) {
+ fprintf(fp, "):%e", tree->dist);
+ } else {
+ fprintf(fp, "):%f", tree->dist);
+ }
+ } else {
+ fprintf(fp, ")");
+ }
+ }
+ } else {
+ fprintf(fp, ")");
+ }
+ }
+
+ return;
+}
+
+
+
+
+
+
+
+/*
+ * NJ_init_r()
+ *
+ * This function computes the r column in our matrix
+ *
+ */
+void
+NJ_init_r(DMAT *dmat) {
+
+ long int i, j, size;
+ long int index;
+ float *r, *r2, *val;
+ long int size1;
+ float size2;
+
+ r = dmat->r;
+ r2 = dmat->r2;
+ val = dmat->val;
+ size = dmat->size;
+ size1 = size-1;
+ size2 = (float)(size-2);
+
+ index = 0;
+ for(i=0;i<size1;i++) {
+ index++;
+ for(j=i+1;j<size;j++) {
+ r[i] += val[index];
+ r[j] += val[index];
+ index++;
+ }
+
+ r2[i] = r[i]/size2;
+ }
+
+ return;
+}
+
+
+
+
+
+
+
+
+
+
+
+/*
+ * NJ_init_vertex() -
+ *
+ * Construct a vertex, which we will use to construct our tree
+ * in a true bottom-up approach. The vertex construct is
+ * basically the center node in the initial star topology.
+ *
+ */
+NJ_VERTEX *
+NJ_init_vertex(DMAT *dmat) {
+
+ long int i;
+ NJ_VERTEX *vertex;
+
+ /* allocate the vertex here */
+ vertex = (NJ_VERTEX *)calloc(1, sizeof(NJ_VERTEX));
+
+ /* allocate the nodes in the vertex */
+ vertex->nodes = (NJ_TREE **)calloc(dmat->ntaxa, sizeof(NJ_TREE *));
+ vertex->nodes_handle = vertex->nodes;
+
+ /* initialize our size and active variables */
+ vertex->nactive = dmat->ntaxa;
+ vertex->size = dmat->ntaxa;
+
+ /* initialize the nodes themselves */
+ for(i=0;i<dmat->ntaxa;i++) {
+
+ vertex->nodes[i] = (NJ_TREE *)calloc(1, sizeof(NJ_TREE));
+
+ vertex->nodes[i]->left = NULL;
+ vertex->nodes[i]->right = NULL;
+
+ vertex->nodes[i]->taxa_index = i;
+ }
+
+ return(vertex);
+}
+
+
+
+
+
+/*
+ * NJ_decompose() -
+ *
+ * This function decomposes the star by creating new internal nodes
+ * and joining two existing tree nodes to it
+ *
+ */
+NJ_TREE *
+NJ_decompose(DMAT *dmat,
+ NJ_VERTEX *vertex,
+ long int x,
+ long int y,
+ int last_flag) {
+
+ NJ_TREE *new_node;
+ float x2clade, y2clade;
+
+ /* compute the distance from the clade components to the new node */
+ if(last_flag) {
+ x2clade =
+ (dmat->val[NJ_MAP(x, y, dmat->size)]);
+ } else {
+ x2clade =
+ (dmat->val[NJ_MAP(x, y, dmat->size)])/2 +
+ ((dmat->r2[x] - dmat->r2[y])/2);
+ }
+
+ vertex->nodes[x]->dist = x2clade;
+
+ if(last_flag) {
+ y2clade =
+ (dmat->val[NJ_MAP(x, y, dmat->size)]);
+ } else {
+ y2clade =
+ (dmat->val[NJ_MAP(x, y, dmat->size)])/2 +
+ ((dmat->r2[y] - dmat->r2[x])/2);
+ }
+
+ vertex->nodes[y]->dist = y2clade;
+
+ /* allocate new node to connect two sub-clades */
+ new_node = (NJ_TREE *)calloc(1, sizeof(NJ_TREE));
+
+ new_node->left = vertex->nodes[x];
+ new_node->right = vertex->nodes[y];
+ new_node->taxa_index = NJ_INTERNAL_NODE; /* this is not a terminal node, no taxa index */
+
+ if(last_flag) {
+ return(new_node);
+ }
+
+ vertex->nodes[x] = new_node;
+ vertex->nodes[y] = vertex->nodes[0];
+
+ vertex->nodes = &(vertex->nodes[1]);
+
+ vertex->nactive--;
+
+ return(new_node);
+}
+
+
+
+/*
+ * NJ_print_vertex() -
+ *
+ * For debugging, print the contents of the vertex
+ *
+ */
+void
+NJ_print_vertex(NJ_VERTEX *vertex) {
+
+ long int i;
+
+ printf("Number of active nodes: %ld\n", vertex->nactive);
+
+ for(i=0;i<vertex->nactive;i++) {
+ printf("%ld ", vertex->nodes[i]->taxa_index);
+ }
+ printf("\n");
+
+ return;
+}
+
+
+
+
+
+
+
+
+
+/*
+ * NJ_print_r() -
+ *
+ */
+void
+NJ_print_r(DMAT *dmat) {
+
+ long int i;
+
+ printf("\n");
+ for(i=0;i<dmat->size;i++) {
+ printf("r[%ld] = %0.2f\n", i, dmat->r[i]);
+ }
+ printf("\n");
+
+ return;
+}
+
+
+
+
+
+/*
+ * NJ_print_taxanames() -
+ *
+ * Print taxa names here
+ *
+ */
+void
+NJ_print_taxanames(DMAT *dmat) {
+
+ long int i;
+
+ printf("Number of taxa: %ld\n", dmat->ntaxa);
+
+ for(i=0;i<dmat->ntaxa;i++) {
+ printf("%ld) %s\n", i, dmat->taxaname[i]);
+ }
+
+ printf("\n");
+
+ return;
+}
+
+
+
+
+/*
+ * NJ_shuffle_distance_matrix() -
+ *
+ * Randomize a distance matrix here
+ *
+ */
+void
+NJ_shuffle_distance_matrix(DMAT *dmat) {
+
+
+ long int *perm = NULL;
+ char **tmp_taxaname = NULL;
+ float *tmp_val = NULL;
+ long int i, j;
+
+
+ /* alloc the random permutation and a new matrix to hold the shuffled vals */
+ perm = (long int *)calloc(dmat->size, sizeof(long int));
+ tmp_taxaname = (char **)calloc(dmat->size, sizeof(char *));
+ tmp_val = (float *)calloc(NJ_NCELLS(dmat->ntaxa), sizeof(float));
+ if(!tmp_taxaname || !perm || !tmp_val) {
+ fprintf(stderr, "Clearcut: Memory allocation error in NJ_shuffle_distance_matrix()\n");
+ exit(-1);
+ }
+
+ /* compute a permutation which will describe how to shuffle the matrix */
+ NJ_permute(perm, dmat->size);
+
+ for(i=0;i<dmat->size;i++) {
+ for(j=i+1;j<dmat->size;j++) {
+
+ if(perm[j] < perm[i]) {
+ tmp_val[NJ_MAP(i, j, dmat->size)] = dmat->val[NJ_MAP(perm[j], perm[i], dmat->size)];
+ } else {
+ tmp_val[NJ_MAP(i, j, dmat->size)] = dmat->val[NJ_MAP(perm[i], perm[j], dmat->size)];
+ }
+
+ }
+
+ tmp_taxaname[i] = dmat->taxaname[perm[i]];
+ }
+
+ /* free our random permutation */
+ if(perm) {
+ free(perm);
+ }
+
+ /* free the old value matrix */
+ if(dmat->val) {
+ free(dmat->val);
+ }
+
+ /* re-assign the value matrix pointers */
+ dmat->val = tmp_val;
+ dmat->valhandle = dmat->val;
+
+ /*
+ * Free our old taxaname with its particular ordering
+ * and re-assign to the new.
+ */
+ if(dmat->taxaname) {
+ free(dmat->taxaname);
+ }
+ dmat->taxaname = tmp_taxaname;
+
+ return;
+}
+
+
+
+/*
+ * NJ_free_tree() -
+ *
+ * Free a given NJ tree
+ */
+void
+NJ_free_tree(NJ_TREE *node) {
+
+ if(!node) {
+ return;
+ }
+
+ if(node->left) {
+ NJ_free_tree(node->left);
+ }
+
+ if(node->right) {
+ NJ_free_tree(node->right);
+ }
+
+ free(node);
+
+ return;
+}
+
+
+
+
+
+
+
+
+
+/*
+ * NJ_print_permutation()
+ *
+ * Print a permutation
+ *
+ */
+void
+NJ_print_permutation(long int *perm,
+ long int size) {
+
+ long int i;
+
+ for(i=0;i<size-1;i++) {
+ printf("%ld,", perm[i]);
+ }
+ printf("%ld\n", perm[size-1]);
+
+ return;
+}
+
+
+
+
+/*
+ * NJ_dup_dmat() -
+ *
+ * Duplicate a distance matrix
+ *
+ */
+DMAT *
+NJ_dup_dmat(DMAT *src) {
+
+ long int i;
+ DMAT *dest;
+
+ /* allocate the resulting distance matrix */
+ dest = (DMAT *)calloc(1, sizeof(DMAT));
+ if(!dest) {
+ fprintf(stderr, "Clearcut: Memory allocation error in NJ_dup_dmat()\n");
+ goto XIT_BAD;
+ }
+
+ dest->ntaxa = src->ntaxa;
+ dest->size = src->size;
+
+ /* allocate space for array of pointers to taxanames */
+ dest->taxaname = (char **)calloc(dest->ntaxa, sizeof(char *));
+ if(!dest->taxaname) {
+ fprintf(stderr, "Clearcut: Memory allocation error in NJ_dup_dmat()\n");
+ goto XIT_BAD;
+ }
+
+ /* allocate space for the taxanames themselves */
+ for(i=0;i<src->ntaxa;i++) {
+ dest->taxaname[i] = (char *)calloc(strlen(src->taxaname[i])+1, sizeof(char));
+ if(!dest->taxaname[i]) {
+ fprintf(stderr, "Clearcut: Memory allocation error in NJ_dup_dmat()\n");
+ goto XIT_BAD;
+ }
+ }
+
+ /* allocate space for the distance values */
+ dest->val = (float *)calloc(NJ_NCELLS(src->ntaxa), sizeof(float));
+ if(!dest->val) {
+ fprintf(stderr, "Clearcut: Memory allocation error in NJ_dup_dmat()\n");
+ goto XIT_BAD;
+ }
+
+ /* allocate space for the r and r2 vectors */
+ dest->r = (float *)calloc(src->ntaxa, sizeof(float));
+ dest->r2 = (float *)calloc(src->ntaxa, sizeof(float));
+
+ /* copy titles */
+ for(i=0;i<src->ntaxa;i++) {
+ strcpy(dest->taxaname[i], src->taxaname[i]);
+ }
+
+ /* copy values */
+ memcpy(dest->val, src->valhandle, NJ_NCELLS(src->ntaxa)*sizeof(float));
+
+ /* copy r and r2 */
+ memcpy(dest->r, src->rhandle, src->ntaxa*sizeof(float));
+ memcpy(dest->r2, src->r2handle, src->ntaxa*sizeof(float));
+
+ /* track some memory addresses */
+ dest->valhandle = dest->val;
+ dest->rhandle = dest->r;
+ dest->r2handle = dest->r2;
+
+ return(dest);
+
+ XIT_BAD:
+
+ /* free what we may have allocated */
+ NJ_free_dmat(dest);
+
+ return(NULL);
+}
+
+
+
+
+/*
+ * NJ_free_dmat() -
+ */
+void
+NJ_free_dmat(DMAT *dmat) {
+
+ long int i;
+
+ if(dmat) {
+
+ if(dmat->taxaname) {
+
+ for(i=0;i<dmat->ntaxa;i++) {
+ if(dmat->taxaname[i]) {
+ free(dmat->taxaname[i]);
+ }
+ }
+
+ free(dmat->taxaname);
+ }
+
+ if(dmat->valhandle) {
+ free(dmat->valhandle);
+ }
+
+ if(dmat->rhandle) {
+ free(dmat->rhandle);
+ }
+
+ if(dmat->r2handle) {
+ free(dmat->r2handle);
+ }
+
+ free(dmat);
+ }
+
+ return;
+}
+
+
+
+
+
+/*
+ * NJ_free_vertex() -
+ *
+ * Free the vertex data structure
+ *
+ */
+void
+NJ_free_vertex(NJ_VERTEX *vertex) {
+
+ if(vertex) {
+ if(vertex->nodes_handle) {
+ free(vertex->nodes_handle);
+ }
+ free(vertex);
+ }
+
+ return;
+}
+
+
+
+
+
+
+
+
+
+/*
+ *
+ * NJ_min_transform() - Find the smallest transformed value to identify
+ * which nodes to join.
+ *
+ * INPUTS:
+ * -------
+ * dmat -- The distance matrix
+ *
+ * RETURNS:
+ * --------
+ * <float> -- The minimimum transformed distance
+ * ret_i -- The row of the smallest transformed distance (by reference)
+ * ret_j -- The col of the smallest transformed distance (by reference)
+ *
+ *
+ * DESCRIPTION:
+ * ------------
+ *
+ * Used only with traditional Neighbor-Joining, this function checks the entire
+ * working distance matrix and identifies the smallest transformed distance.
+ * This requires traversing the entire diagonal matrix, which is itself a
+ * O(N^2) operation.
+ *
+ */
+float
+NJ_min_transform(DMAT *dmat,
+ long int *ret_i,
+ long int *ret_j) {
+
+ long int i, j; /* indices used for looping */
+ long int tmp_i = 0;/* to limit pointer dereferencing */
+ long int tmp_j = 0;/* to limit pointer dereferencing */
+ float smallest; /* track the smallest trans. dist */
+ float curval; /* the current trans. dist in loop */
+
+ float *ptr; /* pointer into distance matrix */
+ float *r2; /* pointer to r2 matrix for computing transformed dists */
+
+ smallest = (float)HUGE_VAL;
+
+ /* track these here to limit pointer dereferencing in inner loop */
+ ptr = dmat->val;
+ r2 = dmat->r2;
+
+ /* for every row */
+ for(i=0;i<dmat->size;i++) {
+ ptr++; /* skip diagonal */
+ for(j=i+1;j<dmat->size;j++) { /* for every column */
+
+ /* find transformed distance in matrix at i, j */
+ curval = *(ptr++) - (r2[i] + r2[j]);
+
+ /* if the transformed distanance is less than the known minimum */
+ if(curval < smallest) {
+
+ smallest = curval;
+ tmp_i = i;
+ tmp_j = j;
+ }
+ }
+ }
+
+ /* pass back (by reference) the coords of the min. transformed distance */
+ *ret_i = tmp_i;
+ *ret_j = tmp_j;
+
+ return(smallest); /* return the min transformed distance */
+}
+
+
+
+
+
+
+
+
+
+
/*
* clearcut.h
*
#ifndef _INC_CLEARCUT_H_
#define _INC_CLEARCUT_H_ 1
+extern "C" {
+
#include "common.h"
#include "cmdargs.h"
-
#define NJ_VERSION "1.0.9"
} NJ_VERTEX;
-
-
-
-
/* some function prototypes */
+int clearcut_main(int, char**);
/* core function for performing Relaxed Neighbor Joining */
NJ_TREE *
long int *ret_i,
long int *ret_j);
+}
+
#endif /* _INC_CLEARCUT_H_ */
+
*/
#include "clearcutcommand.h"
+#include "clearcut.h"
+
/**************************************************************************************/
ClearcutCommand::ClearcutCommand(string option) {
//prepare filename
string outputName = outputDir + m->getRootName(m->getSimpleName(inputFile)) + "tre";
- //get location of clearcut
- GlobalData* globaldata = GlobalData::getInstance();
- string path = globaldata->argv;
- path = path.substr(0, (path.find_last_of('m')));
+ vector<char*> cPara;
- string clearcutCommand = path + "clearcut ";
+ char* tempClearcut = new char[8]; strcpy(tempClearcut, "clearcut"); cPara.push_back(tempClearcut);
//you gave us a distance matrix
- if (phylipfile != "") { clearcutCommand += "--distance "; }
+ if (phylipfile != "") { char* temp = new char[10]; strcpy(temp, "--distance"); cPara.push_back(temp); }
//you gave us a fastafile
- if (fastafile != "") { clearcutCommand += "--alignment "; }
+ if (fastafile != "") { char* temp = new char[11]; strcpy(temp, "--alignment"); cPara.push_back(temp); }
- if (version) { clearcutCommand += "--version "; }
- if (verbose) { clearcutCommand += "--verbose "; }
- if (quiet) { clearcutCommand += "--quiet "; }
- if (seed != "*") { clearcutCommand += "--seed=" + seed + " "; }
- if (norandom) { clearcutCommand += "--norandom "; }
- if (shuffle) { clearcutCommand += "--shuffle "; }
- if (neighbor) { clearcutCommand += "--neighbor "; }
+ if (version) { char* temp = new char[9]; strcpy(temp, "--version"); cPara.push_back(temp); }
+ if (verbose) { char* temp = new char[9]; strcpy(temp, "--verbose"); cPara.push_back(temp); }
+ if (quiet) { char* temp = new char[7]; strcpy(temp, "--quiet"); cPara.push_back(temp); }
+ if (seed != "*") {
+ string tempSeed = "--seed=" + seed;
+ char* temp = new char[tempSeed.length()];
+ strcpy(temp, tempSeed.c_str());
+ cPara.push_back(temp);
+ }
+ if (norandom) { char* temp = new char[10]; strcpy(temp, "--norandom"); cPara.push_back(temp); }
+ if (shuffle) { char* temp = new char[9]; strcpy(temp, "--shuffle"); cPara.push_back(temp); }
+ if (neighbor) { char* temp = new char[10]; strcpy(temp, "--neighbor"); cPara.push_back(temp); }
+ string tempIn = "";
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- clearcutCommand += "--in=" + inputFile + " ";
+ tempIn += "--in=" + inputFile;
#else
- clearcutCommand += "--in=\"" + inputFile + "\" ";
+ tempIn += "--in=\"" + inputFile + "\"";
#endif
- if (stdoutWanted) { clearcutCommand += "--stdout "; }
+ char* tempI = new char[tempIn.length()];
+ strcpy(tempI, tempIn.c_str());
+ cPara.push_back(tempI);
+
+ if (stdoutWanted) { char* temp = new char[8]; strcpy(temp, "--stdout"); cPara.push_back(temp); }
else{
+ string tempOut = "";
#if defined (__APPLE__) || (__MACH__) || (linux) || (__linux)
- clearcutCommand += "--out=" + outputName + " "; }
+ tempOut += "--out=" + outputName;
#else
- clearcutCommand += "--out=\"" + outputName + "\" "; }
+ tempOut += "--out=\"" + outputName + "\"";
#endif
+
+ char* temp = new char[tempOut.length()];
+ strcpy(temp, tempOut.c_str());
+ cPara.push_back(temp);
+ }
+
+ if (DNA) { char* temp = new char[5]; strcpy(temp, "--DNA"); cPara.push_back(temp); }
+ if (protein) { char* temp = new char[9]; strcpy(temp, "--protein"); cPara.push_back(temp); }
+ if (jukes) { char* temp = new char[7]; strcpy(temp, "--jukes"); cPara.push_back(temp); }
+ if (kimura) { char* temp = new char[8]; strcpy(temp, "--kimura"); cPara.push_back(temp); }
+ if (matrixout != "") {
+ string tempMatrix = "--matrixout=" + matrixout;
+ char* temp = new char[tempMatrix.length()];
+ strcpy(temp, tempMatrix.c_str());
+ cPara.push_back(temp);
+ }
+
+ if (ntrees != "1") {
+ string tempNtrees = "--ntrees=" + ntrees;
+ char* temp = new char[tempNtrees.length()];
+ strcpy(temp, tempNtrees.c_str());
+ cPara.push_back(temp);
+ }
+
+ if (expblen) { char* temp = new char[9]; strcpy(temp, "--expblen"); cPara.push_back(temp); }
+ if (expdist) { char* temp = new char[9]; strcpy(temp, "--expdist"); cPara.push_back(temp); }
+
+ char** clearcutParameters;
+ clearcutParameters = new char*[cPara.size()];
+ for (int i = 0; i < cPara.size(); i++) { clearcutParameters[i] = cPara[i]; }
+ int numArgs = cPara.size();
- if (DNA) { clearcutCommand += "--DNA "; }
- if (protein) { clearcutCommand += "--protein "; }
- if (jukes) { clearcutCommand += "--jukes "; }
- if (kimura) { clearcutCommand += "--kimura "; }
- if (matrixout != "") { clearcutCommand += "--matrixout=" + matrixout + " "; }
- if (ntrees != "1") { clearcutCommand += "--ntrees=" + ntrees + " "; }
- if (expblen) { clearcutCommand += "--expblen "; }
- if (expdist) { clearcutCommand += "--expdist "; }
-
- //run clearcut
- system(clearcutCommand.c_str());
+ clearcut_main(numArgs, clearcutParameters);
if (!stdoutWanted) {
m->mothurOutEndLine();
+++ /dev/null
-/*
- * cmdargs.c
- *
- * $Id$
- *
- *****************************************************************************
- *
- * Copyright (c) 2004, Luke Sheneman
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * + Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * + Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * + The names of its contributors may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- *****************************************************************************
- *
- * AUTHOR:
- *
- * Luke Sheneman
- * sheneman@cs.uidaho.edu
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-
-#ifdef USE_GNU
-#include <getopt.h>
-#else
-#include "getopt_long.h"
-#endif /* USE_GNU*/
-
-
-#include "clearcut.h"
-#include "cmdargs.h"
-
-
-/*
- * NJ_handle_args() -
- *
- */
-NJ_ARGS *
-NJ_handle_args(int argc,
- char *argv[]) {
-
- static NJ_ARGS nj_args;
- int option_index, c;
-
- struct option NJ_long_options[] = {
-
- /* These options don't set a flag */
- {"in", required_argument, NULL, 'i'},
- {"out", required_argument, NULL, 'o'},
- {"seed", required_argument, NULL, 's'},
- {"matrixout", required_argument, NULL, 'm'},
- {"ntrees", required_argument, NULL, 'n'},
-
- /* These options set a flag */
- {"verbose", no_argument, &(nj_args.verbose_flag), 1},
- {"quiet", no_argument, &(nj_args.quiet_flag), 1},
- {"distance", no_argument, &(nj_args.input_mode), NJ_INPUT_MODE_DISTANCE},
- {"alignment", no_argument, &(nj_args.input_mode), NJ_INPUT_MODE_ALIGNED_SEQUENCES},
- {"help", no_argument, &(nj_args.help), 1},
- {"version", no_argument, &(nj_args.version), 1},
- {"norandom", no_argument, &(nj_args.norandom), 1},
- {"shuffle", no_argument, &(nj_args.shuffle), 1},
- {"stdin", no_argument, &(nj_args.stdin_flag), 1},
- {"stdout", no_argument, &(nj_args.stdout_flag), 1},
- {"dna", no_argument, &(nj_args.dna_flag), 1},
- {"DNA", no_argument, &(nj_args.dna_flag), 1},
- {"protein", no_argument, &(nj_args.protein_flag), 1},
- {"neighbor", no_argument, &(nj_args.neighbor), 1},
- {"expblen", no_argument, &(nj_args.expblen), 1},
- {"expdist", no_argument, &(nj_args.expdist), 1},
-
- {"jukes", no_argument, &(nj_args.jukes_flag), 1},
- {"kimura", no_argument, &(nj_args.kimura_flag), 1},
-
- {0, 0, 0, 0}
-
- };
-
- /* initializes options to their default */
- nj_args.infilename = NULL;
- nj_args.outfilename = NULL;
- nj_args.matrixout = NULL;
- nj_args.seed = time(0);
- nj_args.verbose_flag = 0;
- nj_args.quiet_flag = 0;
- nj_args.input_mode = NJ_INPUT_MODE_DISTANCE;
- nj_args.help = 0;
- nj_args.version = 0;
- nj_args.norandom = 0;
- nj_args.shuffle = 0;
- nj_args.stdin_flag = 0;
- nj_args.stdout_flag = 0;
- nj_args.dna_flag = 0;
- nj_args.protein_flag = 0;
- nj_args.correction_model = NJ_MODEL_NONE;
- nj_args.jukes_flag = 0;
- nj_args.kimura_flag = 0;
- nj_args.neighbor = 0;
- nj_args.ntrees = 1;
- nj_args.expblen = 0;
- nj_args.expdist = 0;
-
- while(1) {
- c = getopt_long(argc,
- argv,
- "i:o:s:m:n:vqduahVSIOrDPjkNeE",
- NJ_long_options,
- &option_index);
-
- if(c == -1) {
- break;
- }
-
- switch(c) {
-
- case 0:
- if(NJ_long_options[option_index].flag) {
- break;
- }
-
- printf("option %s", NJ_long_options[option_index].name);
- if(optarg) {
- printf(" with arg %s", optarg);
- }
- printf("\n");
- break;
-
- case 'i':
- nj_args.infilename = optarg;
- break;
-
- case 'o':
- nj_args.outfilename = optarg;
- break;
-
- case 's':
- nj_args.seed = atoi(optarg);
- break;
-
- case 'm':
- nj_args.matrixout = optarg;
- break;
-
- case 'n':
- nj_args.ntrees = atoi(optarg);
- break;
-
- case 'v':
- nj_args.verbose_flag = 1;
- break;
-
- case 'q':
- nj_args.quiet_flag = 1;
- break;
-
- case 'd':
- nj_args.input_mode = NJ_INPUT_MODE_DISTANCE;
- break;
-
- case 'a':
- nj_args.input_mode = NJ_INPUT_MODE_ALIGNED_SEQUENCES;
- break;
-
- case 'h':
- nj_args.help = 1;
- break;
-
- case 'V':
- nj_args.version = 1;
- break;
-
- case 'S':
- nj_args.shuffle = 1;
- break;
-
- case 'I':
- nj_args.stdin_flag = 1;
- break;
-
- case 'O':
- nj_args.stdin_flag = 1;
- break;
-
- case 'r':
- nj_args.norandom = 1;
- break;
-
- case 'D':
- nj_args.dna_flag = 1;
- break;
-
- case 'P':
- nj_args.protein_flag = 1;
- break;
-
- case 'j':
- nj_args.jukes_flag = 1;
- break;
-
- case 'k':
- nj_args.kimura_flag = 1;
- break;
-
- case 'N':
- nj_args.neighbor = 1;
- break;
-
- case 'e':
- nj_args.expblen = 1;
- break;
-
- case 'E':
- nj_args.expdist = 1;
- break;
-
- default:
- NJ_usage();
- exit(-1);
- }
- }
-
- if(optind < argc) {
- fprintf(stderr, "Clearcut: Unknown command-line argument:\n --> %s\n", argv[optind]);
- NJ_usage();
- exit(-1);
- }
-
- if(nj_args.version) {
- printf("Clearcut Version: %s\n", NJ_VERSION);
- exit(0);
- }
-
- if(nj_args.help) {
- NJ_usage();
- exit(0);
- }
-
- /* if stdin & explicit filename are specified for input */
- if(nj_args.stdin_flag) {
- if(nj_args.infilename) {
- fprintf(stderr, "Clearcut: Ambiguous input source specified. Specify input filename OR stdin.\n");
- NJ_usage();
- exit(-1);
- }
- }
-
- /* if stdout & explicit filename are specified for output */
- if(nj_args.stdout_flag) {
- if(nj_args.outfilename) {
- fprintf(stderr, "Clearcut: Ambiguous output specified. Specify output filename OR stdout.\n");
- NJ_usage();
- exit(-1);
- }
- }
-
- /* if user did not specify stdin or filename, default to stdin */
- if(!nj_args.stdin_flag) {
- if(!nj_args.infilename) {
-
- fprintf(stderr, "Clearcut: No input file specified. Using stdin.\n");
- nj_args.stdin_flag = 1;
- }
- }
-
- /* if user did not specify stdout or filename, default to stdout */
- if(!nj_args.stdout_flag) {
- if(!nj_args.outfilename) {
-
- fprintf(stderr, "Clearcut: No output file specified. Using stdout.\n");
- nj_args.stdout_flag = 1;
- }
- }
-
- /* User must specify distance matrix or alignment */
- if(nj_args.input_mode == NJ_INPUT_MODE_UNKNOWN) {
- fprintf(stderr, "Clearcut: Must specify input type (--distance | --alignment)\n");
- NJ_usage();
- exit(-1);
- }
-
- /* do not allow protein or DNA options for distance matrix input */
- if(nj_args.input_mode == NJ_INPUT_MODE_DISTANCE) {
- if(nj_args.dna_flag || nj_args.protein_flag) {
- fprintf(stderr, "Clearcut: Ambiguous arguments. (--protein | --DNA) do not apply to distance \n");
- NJ_usage();
- exit(-1);
- }
- }
-
- /* make sure different filenames were specified for input and output */
- if(!nj_args.stdin_flag && !nj_args.stdout_flag) {
-
- if(!strcmp(nj_args.infilename, nj_args.outfilename)) {
- fprintf(stderr, "Clearcut: Input filename and output filename must be unique.\n");
- NJ_usage();
- exit(-1);
- }
- }
-
- /* make sure that user specifies DNA or Protein if dealing with alignment input */
- if(nj_args.input_mode == NJ_INPUT_MODE_ALIGNED_SEQUENCES) {
- if(!nj_args.dna_flag && !nj_args.protein_flag) {
- fprintf(stderr, "Clearcut: Must specify protein or DNA for alignment input.\n");
- NJ_usage();
- exit(-1);
- }
- }
-
- /* make sure that user does not specify both protein and DNA when dealing with alignment input */
- if(nj_args.input_mode == NJ_INPUT_MODE_ALIGNED_SEQUENCES) {
- if(nj_args.dna_flag && nj_args.protein_flag) {
- fprintf(stderr, "Clearcut: Specifying protein and DNA sequences are mutually exclusive options\n");
- NJ_usage();
- exit(-1);
- }
- }
-
- /* make sure verbose and quiet were not specified together */
- if(nj_args.verbose_flag && nj_args.quiet_flag) {
- fprintf(stderr, "Clearcut: Verbose and Quiet mode are mutually exclusive.\n");
- NJ_usage();
- exit(-1);
- }
-
- /* make sure that a correction model was specified only when providing an alignment */
- if(nj_args.input_mode == NJ_INPUT_MODE_DISTANCE) {
- if(nj_args.jukes_flag || nj_args.kimura_flag) {
- fprintf(stderr, "Clearcut: Only specify correction model for alignment input.\n");
- NJ_usage();
- exit(-1);
- }
- } else {
- if(nj_args.jukes_flag && nj_args.kimura_flag) {
- fprintf(stderr, "Clearcut: Only specify one correction model\n");
- NJ_usage();
- exit(-1);
- } else {
- if(nj_args.jukes_flag && !nj_args.kimura_flag) {
- nj_args.correction_model = NJ_MODEL_JUKES;
- } else if(nj_args.kimura_flag && !nj_args.jukes_flag) {
- nj_args.correction_model = NJ_MODEL_KIMURA;
- } else {
- nj_args.correction_model = NJ_MODEL_NONE; /* DEFAULT */
- }
- }
- }
-
- /* make sure that the number of output trees is reasonable */
- if(nj_args.ntrees <= 0) {
- fprintf(stderr, "Clearcut: Number of output trees must be a positive integer.\n");
- NJ_usage();
- exit(-1);
- }
-
- /*
- * make sure that if exponential distances are specified,
- * we are dealing with alignment input
- */
- if(nj_args.expdist && nj_args.input_mode != NJ_INPUT_MODE_ALIGNED_SEQUENCES) {
- fprintf(stderr, "Clearcut: Exponential notation for distance matrix output requires that input be an alignment\n");
- NJ_usage();
- exit(-1);
- }
-
- return(&nj_args);
-}
-
-
-
-
-
-/*
- * NJ_print_args() -
- *
- */
-void
-NJ_print_args(NJ_ARGS *nj_args) {
-
- char input_mode[32];
-
- switch (nj_args->input_mode) {
- case NJ_INPUT_MODE_DISTANCE:
- sprintf(input_mode, "Distance Matrix");
- break;
- case NJ_INPUT_MODE_UNALIGNED_SEQUENCES:
- sprintf(input_mode, "Unaligned Sequences");
- break;
- case NJ_INPUT_MODE_ALIGNED_SEQUENCES:
- sprintf(input_mode, "Aligned Sequences");
- break;
- default:
- sprintf(input_mode, "UNKNOWN");
- break;
- }
-
- printf("\n*** Command Line Arguments ***\n");
-
- printf("Input Mode: %s\n", input_mode);
-
- if(nj_args->stdin_flag) {
- printf("Input from STDIN\n");
- } else {
- printf("Input File: %s\n", nj_args->infilename);
- }
-
- if(nj_args->stdout_flag) {
- printf("Output from STDOUT\n");
- } else {
- printf("Output File: %s\n", nj_args->outfilename);
- }
-
- if(nj_args->input_mode != NJ_INPUT_MODE_DISTANCE) {
- if(nj_args->aligned_flag) {
- printf("Input Sequences Aligned: YES\n");
- } else {
- printf("Input Sequences Aligned: NO\n");
- }
- }
-
- if(nj_args->verbose_flag) {
- printf("Verbose Mode: ON\n");
- } else {
- printf("Verbose Mode: OFF\n");
- }
-
- if(nj_args->quiet_flag) {
- printf("Quiet Mode: ON\n");
- } else {
- printf("Quiet Mode: OFF\n");
- }
-
- if(nj_args->seed) {
- printf("Random Seed: %d\n", nj_args->seed);
- }
-
- printf("\n*******\n");
-
- return;
-}
-
-
-
-
-/*
- * NJ_usage() -
- *
- * Print a usage message
- *
- */
-void
-NJ_usage(void) {
-
- printf("Usage: clearcut --in=<infilename> --out=<outfilename> [options]...\n");
- printf("GENERAL OPTIONS:\n");
- printf(" -h, --help Display this information.\n");
- printf(" -V, --version Print the version of this program.\n");
- printf(" -v, --verbose More output. (Default: OFF)\n");
- printf(" -q, --quiet Silent operation. (Default: ON)\n");
- printf(" -s, --seed=<seed> Explicitly set the PRNG seed to a specific value.\n");
- printf(" -r, --norandom Attempt joins deterministically. (Default: OFF)\n");
- printf(" -S, --shuffle Randomly shuffle the distance matrix. (Default: OFF)\n");
- printf(" -N, --neighbor Use traditional Neighbor-Joining algorithm. (Default: OFF)\n");
-
- printf("\n");
- printf("INPUT OPTIONS:\n");
- printf(" -I, --stdin Read input from STDIN.\n");
- printf(" -d, --distance Input file is a distance matrix. (Default: ON)\n");
- printf(" -a, --alignment Input file is a set of aligned sequences. (Default: OFF)\n");
- printf(" -D, --DNA Input alignment are DNA sequences.\n");
- printf(" -P, --protein Input alignment are protein sequences.\n");
-
- printf("\n");
- printf("CORRECTION MODEL FOR COMPUTING DISTANCE MATRIX (Default: NO Correction):\n");
- printf(" -j, --jukes Use Jukes-Cantor correction for computing distance matrix.\n");
- printf(" -k, --kimura Use Kimura correction for distance matrix.\n");
-
- printf("\n");
- printf("OUTPUT OPTIONS:\n");
- printf(" -O, --stdout Output tree to STDOUT.\n");
- printf(" -m, --matrixout=<file> Output distance matrix to specified file.\n");
- printf(" -n, --ntrees=<n> Output n trees. (Default: 1)\n");
- printf(" -e, --expblen Exponential notation for branch lengths. (Default: OFF)\n");
- printf(" -E, --expdist Exponential notation in distance output. (Default: OFF)\n");
-
- printf("\n");
- printf("EXAMPLES:\n");
- printf(" Compute tree by supplying distance matrix via stdin:\n");
- printf(" clearcut --distance < distances.txt > treefile.tre\n");
- printf("\n");
- printf(" Compute tree by supplying an alignment of DNA sequences from a file:\n");
- printf(" clearcut --alignment --DNA --in=alignment.txt --out=treefile.tre\n");
-
- return;
-}
-
-
-
--- /dev/null
+/*
+ * cmdargs.c
+ *
+ * $Id$
+ *
+ *****************************************************************************
+ *
+ * Copyright (c) 2004, Luke Sheneman
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * + Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * + Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * + The names of its contributors may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************
+ *
+ * AUTHOR:
+ *
+ * Luke Sheneman
+ * sheneman@cs.uidaho.edu
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+
+#ifdef USE_GNU
+#include <getopt.h>
+#else
+#include "getopt_long.h"
+#endif /* USE_GNU*/
+
+
+#include "clearcut.h"
+#include "cmdargs.h"
+
+
+/*
+ * NJ_handle_args() -
+ *
+ */
+NJ_ARGS *
+NJ_handle_args(int argc,
+ char *argv[]) {
+
+ static NJ_ARGS nj_args;
+ int option_index, c;
+
+ struct option NJ_long_options[] = {
+
+ /* These options don't set a flag */
+ {"in", required_argument, NULL, 'i'},
+ {"out", required_argument, NULL, 'o'},
+ {"seed", required_argument, NULL, 's'},
+ {"matrixout", required_argument, NULL, 'm'},
+ {"ntrees", required_argument, NULL, 'n'},
+
+ /* These options set a flag */
+ {"verbose", no_argument, &(nj_args.verbose_flag), 1},
+ {"quiet", no_argument, &(nj_args.quiet_flag), 1},
+ {"distance", no_argument, &(nj_args.input_mode), NJ_INPUT_MODE_DISTANCE},
+ {"alignment", no_argument, &(nj_args.input_mode), NJ_INPUT_MODE_ALIGNED_SEQUENCES},
+ {"help", no_argument, &(nj_args.help), 1},
+ {"version", no_argument, &(nj_args.version), 1},
+ {"norandom", no_argument, &(nj_args.norandom), 1},
+ {"shuffle", no_argument, &(nj_args.shuffle), 1},
+ {"stdin", no_argument, &(nj_args.stdin_flag), 1},
+ {"stdout", no_argument, &(nj_args.stdout_flag), 1},
+ {"dna", no_argument, &(nj_args.dna_flag), 1},
+ {"DNA", no_argument, &(nj_args.dna_flag), 1},
+ {"protein", no_argument, &(nj_args.protein_flag), 1},
+ {"neighbor", no_argument, &(nj_args.neighbor), 1},
+ {"expblen", no_argument, &(nj_args.expblen), 1},
+ {"expdist", no_argument, &(nj_args.expdist), 1},
+
+ {"jukes", no_argument, &(nj_args.jukes_flag), 1},
+ {"kimura", no_argument, &(nj_args.kimura_flag), 1},
+
+ {0, 0, 0, 0}
+
+ };
+
+ /* initializes options to their default */
+ nj_args.infilename = NULL;
+ nj_args.outfilename = NULL;
+ nj_args.matrixout = NULL;
+ nj_args.seed = time(0);
+ nj_args.verbose_flag = 0;
+ nj_args.quiet_flag = 0;
+ nj_args.input_mode = NJ_INPUT_MODE_DISTANCE;
+ nj_args.help = 0;
+ nj_args.version = 0;
+ nj_args.norandom = 0;
+ nj_args.shuffle = 0;
+ nj_args.stdin_flag = 0;
+ nj_args.stdout_flag = 0;
+ nj_args.dna_flag = 0;
+ nj_args.protein_flag = 0;
+ nj_args.correction_model = NJ_MODEL_NONE;
+ nj_args.jukes_flag = 0;
+ nj_args.kimura_flag = 0;
+ nj_args.neighbor = 0;
+ nj_args.ntrees = 1;
+ nj_args.expblen = 0;
+ nj_args.expdist = 0;
+
+ while(1) {
+
+ c = getopt_long(argc,
+ argv,
+ "i:o:s:m:n:vqduahVSIOrDPjkNeE",
+ NJ_long_options,
+ &option_index);
+ if(c == -1) {
+ break;
+ }
+
+ switch(c) {
+
+ case 0:
+ if(NJ_long_options[option_index].flag) {
+ break;
+ }
+
+ printf("option %s", NJ_long_options[option_index].name);
+ if(optarg) {
+ printf(" with arg %s", optarg);
+ }
+ printf("\n");
+ break;
+
+ case 'i':
+ nj_args.infilename = optarg;
+ break;
+
+ case 'o':
+ nj_args.outfilename = optarg;
+ break;
+
+ case 's':
+ nj_args.seed = atoi(optarg);
+ break;
+
+ case 'm':
+ nj_args.matrixout = optarg;
+ break;
+
+ case 'n':
+ nj_args.ntrees = atoi(optarg);
+ break;
+
+ case 'v':
+ nj_args.verbose_flag = 1;
+ break;
+
+ case 'q':
+ nj_args.quiet_flag = 1;
+ break;
+
+ case 'd':
+ nj_args.input_mode = NJ_INPUT_MODE_DISTANCE;
+ break;
+
+ case 'a':
+ nj_args.input_mode = NJ_INPUT_MODE_ALIGNED_SEQUENCES;
+ break;
+
+ case 'h':
+ nj_args.help = 1;
+ break;
+
+ case 'V':
+ nj_args.version = 1;
+ break;
+
+ case 'S':
+ nj_args.shuffle = 1;
+ break;
+
+ case 'I':
+ nj_args.stdin_flag = 1;
+ break;
+
+ case 'O':
+ nj_args.stdin_flag = 1;
+ break;
+
+ case 'r':
+ nj_args.norandom = 1;
+ break;
+
+ case 'D':
+ nj_args.dna_flag = 1;
+ break;
+
+ case 'P':
+ nj_args.protein_flag = 1;
+ break;
+
+ case 'j':
+ nj_args.jukes_flag = 1;
+ break;
+
+ case 'k':
+ nj_args.kimura_flag = 1;
+ break;
+
+ case 'N':
+ nj_args.neighbor = 1;
+ break;
+
+ case 'e':
+ nj_args.expblen = 1;
+ break;
+
+ case 'E':
+ nj_args.expdist = 1;
+ break;
+
+ default:
+ NJ_usage();
+ exit(-1);
+ }
+ }
+
+ if(optind < argc) {
+ fprintf(stderr, "Clearcut: Unknown command-line argument:\n --> %s\n", argv[optind]);
+ NJ_usage();
+ exit(-1);
+ }
+
+ if(nj_args.version) {
+ printf("Clearcut Version: %s\n", NJ_VERSION);
+ exit(0);
+ }
+
+ if(nj_args.help) {
+ NJ_usage();
+ exit(0);
+ }
+
+ /* if stdin & explicit filename are specified for input */
+ if(nj_args.stdin_flag) {
+ if(nj_args.infilename) {
+ fprintf(stderr, "Clearcut: Ambiguous input source specified. Specify input filename OR stdin.\n");
+ NJ_usage();
+ exit(-1);
+ }
+ }
+
+ /* if stdout & explicit filename are specified for output */
+ if(nj_args.stdout_flag) {
+ if(nj_args.outfilename) {
+ fprintf(stderr, "Clearcut: Ambiguous output specified. Specify output filename OR stdout.\n");
+ NJ_usage();
+ exit(-1);
+ }
+ }
+
+ /* if user did not specify stdin or filename, default to stdin */
+ if(!nj_args.stdin_flag) {
+ if(!nj_args.infilename) {
+
+ fprintf(stderr, "Clearcut: No input file specified. Using stdin.\n");
+ nj_args.stdin_flag = 1;
+ }
+ }
+
+ /* if user did not specify stdout or filename, default to stdout */
+ if(!nj_args.stdout_flag) {
+ if(!nj_args.outfilename) {
+
+ fprintf(stderr, "Clearcut: No output file specified. Using stdout.\n");
+ nj_args.stdout_flag = 1;
+ }
+ }
+
+ /* User must specify distance matrix or alignment */
+ if(nj_args.input_mode == NJ_INPUT_MODE_UNKNOWN) {
+ fprintf(stderr, "Clearcut: Must specify input type (--distance | --alignment)\n");
+ NJ_usage();
+ exit(-1);
+ }
+
+ /* do not allow protein or DNA options for distance matrix input */
+ if(nj_args.input_mode == NJ_INPUT_MODE_DISTANCE) {
+ if(nj_args.dna_flag || nj_args.protein_flag) {
+ fprintf(stderr, "Clearcut: Ambiguous arguments. (--protein | --DNA) do not apply to distance \n");
+ NJ_usage();
+ exit(-1);
+ }
+ }
+
+ /* make sure different filenames were specified for input and output */
+ if(!nj_args.stdin_flag && !nj_args.stdout_flag) {
+
+ if(!strcmp(nj_args.infilename, nj_args.outfilename)) {
+ fprintf(stderr, "Clearcut: Input filename and output filename must be unique.\n");
+ NJ_usage();
+ exit(-1);
+ }
+ }
+
+ /* make sure that user specifies DNA or Protein if dealing with alignment input */
+ if(nj_args.input_mode == NJ_INPUT_MODE_ALIGNED_SEQUENCES) {
+ if(!nj_args.dna_flag && !nj_args.protein_flag) {
+ fprintf(stderr, "Clearcut: Must specify protein or DNA for alignment input.\n");
+ NJ_usage();
+ exit(-1);
+ }
+ }
+
+ /* make sure that user does not specify both protein and DNA when dealing with alignment input */
+ if(nj_args.input_mode == NJ_INPUT_MODE_ALIGNED_SEQUENCES) {
+ if(nj_args.dna_flag && nj_args.protein_flag) {
+ fprintf(stderr, "Clearcut: Specifying protein and DNA sequences are mutually exclusive options\n");
+ NJ_usage();
+ exit(-1);
+ }
+ }
+
+ /* make sure verbose and quiet were not specified together */
+ if(nj_args.verbose_flag && nj_args.quiet_flag) {
+ fprintf(stderr, "Clearcut: Verbose and Quiet mode are mutually exclusive.\n");
+ NJ_usage();
+ exit(-1);
+ }
+
+ /* make sure that a correction model was specified only when providing an alignment */
+ if(nj_args.input_mode == NJ_INPUT_MODE_DISTANCE) {
+ if(nj_args.jukes_flag || nj_args.kimura_flag) {
+ fprintf(stderr, "Clearcut: Only specify correction model for alignment input.\n");
+ NJ_usage();
+ exit(-1);
+ }
+ } else {
+ if(nj_args.jukes_flag && nj_args.kimura_flag) {
+ fprintf(stderr, "Clearcut: Only specify one correction model\n");
+ NJ_usage();
+ exit(-1);
+ } else {
+ if(nj_args.jukes_flag && !nj_args.kimura_flag) {
+ nj_args.correction_model = NJ_MODEL_JUKES;
+ } else if(nj_args.kimura_flag && !nj_args.jukes_flag) {
+ nj_args.correction_model = NJ_MODEL_KIMURA;
+ } else {
+ nj_args.correction_model = NJ_MODEL_NONE; /* DEFAULT */
+ }
+ }
+ }
+
+ /* make sure that the number of output trees is reasonable */
+ if(nj_args.ntrees <= 0) {
+ fprintf(stderr, "Clearcut: Number of output trees must be a positive integer.\n");
+ NJ_usage();
+ exit(-1);
+ }
+
+ /*
+ * make sure that if exponential distances are specified,
+ * we are dealing with alignment input
+ */
+ if(nj_args.expdist && nj_args.input_mode != NJ_INPUT_MODE_ALIGNED_SEQUENCES) {
+ fprintf(stderr, "Clearcut: Exponential notation for distance matrix output requires that input be an alignment\n");
+ NJ_usage();
+ exit(-1);
+ }
+
+ return(&nj_args);
+}
+
+
+
+
+
+/*
+ * NJ_print_args() -
+ *
+ */
+void
+NJ_print_args(NJ_ARGS *nj_args) {
+
+ char input_mode[32];
+
+ switch (nj_args->input_mode) {
+ case NJ_INPUT_MODE_DISTANCE:
+ sprintf(input_mode, "Distance Matrix");
+ break;
+ case NJ_INPUT_MODE_UNALIGNED_SEQUENCES:
+ sprintf(input_mode, "Unaligned Sequences");
+ break;
+ case NJ_INPUT_MODE_ALIGNED_SEQUENCES:
+ sprintf(input_mode, "Aligned Sequences");
+ break;
+ default:
+ sprintf(input_mode, "UNKNOWN");
+ break;
+ }
+
+ printf("\n*** Command Line Arguments ***\n");
+
+ printf("Input Mode: %s\n", input_mode);
+
+ if(nj_args->stdin_flag) {
+ printf("Input from STDIN\n");
+ } else {
+ printf("Input File: %s\n", nj_args->infilename);
+ }
+
+ if(nj_args->stdout_flag) {
+ printf("Output from STDOUT\n");
+ } else {
+ printf("Output File: %s\n", nj_args->outfilename);
+ }
+
+ if(nj_args->input_mode != NJ_INPUT_MODE_DISTANCE) {
+ if(nj_args->aligned_flag) {
+ printf("Input Sequences Aligned: YES\n");
+ } else {
+ printf("Input Sequences Aligned: NO\n");
+ }
+ }
+
+ if(nj_args->verbose_flag) {
+ printf("Verbose Mode: ON\n");
+ } else {
+ printf("Verbose Mode: OFF\n");
+ }
+
+ if(nj_args->quiet_flag) {
+ printf("Quiet Mode: ON\n");
+ } else {
+ printf("Quiet Mode: OFF\n");
+ }
+
+ if(nj_args->seed) {
+ printf("Random Seed: %d\n", nj_args->seed);
+ }
+
+ printf("\n*******\n");
+
+ return;
+}
+
+
+
+
+/*
+ * NJ_usage() -
+ *
+ * Print a usage message
+ *
+ */
+void
+NJ_usage(void) {
+
+ printf("Usage: clearcut --in=<infilename> --out=<outfilename> [options]...\n");
+ printf("GENERAL OPTIONS:\n");
+ printf(" -h, --help Display this information.\n");
+ printf(" -V, --version Print the version of this program.\n");
+ printf(" -v, --verbose More output. (Default: OFF)\n");
+ printf(" -q, --quiet Silent operation. (Default: ON)\n");
+ printf(" -s, --seed=<seed> Explicitly set the PRNG seed to a specific value.\n");
+ printf(" -r, --norandom Attempt joins deterministically. (Default: OFF)\n");
+ printf(" -S, --shuffle Randomly shuffle the distance matrix. (Default: OFF)\n");
+ printf(" -N, --neighbor Use traditional Neighbor-Joining algorithm. (Default: OFF)\n");
+
+ printf("\n");
+ printf("INPUT OPTIONS:\n");
+ printf(" -I, --stdin Read input from STDIN.\n");
+ printf(" -d, --distance Input file is a distance matrix. (Default: ON)\n");
+ printf(" -a, --alignment Input file is a set of aligned sequences. (Default: OFF)\n");
+ printf(" -D, --DNA Input alignment are DNA sequences.\n");
+ printf(" -P, --protein Input alignment are protein sequences.\n");
+
+ printf("\n");
+ printf("CORRECTION MODEL FOR COMPUTING DISTANCE MATRIX (Default: NO Correction):\n");
+ printf(" -j, --jukes Use Jukes-Cantor correction for computing distance matrix.\n");
+ printf(" -k, --kimura Use Kimura correction for distance matrix.\n");
+
+ printf("\n");
+ printf("OUTPUT OPTIONS:\n");
+ printf(" -O, --stdout Output tree to STDOUT.\n");
+ printf(" -m, --matrixout=<file> Output distance matrix to specified file.\n");
+ printf(" -n, --ntrees=<n> Output n trees. (Default: 1)\n");
+ printf(" -e, --expblen Exponential notation for branch lengths. (Default: OFF)\n");
+ printf(" -E, --expdist Exponential notation in distance output. (Default: OFF)\n");
+
+ printf("\n");
+ printf("EXAMPLES:\n");
+ printf(" Compute tree by supplying distance matrix via stdin:\n");
+ printf(" clearcut --distance < distances.txt > treefile.tre\n");
+ printf("\n");
+ printf(" Compute tree by supplying an alignment of DNA sequences from a file:\n");
+ printf(" clearcut --alignment --DNA --in=alignment.txt --out=treefile.tre\n");
+
+ return;
+}
+
+
+
+++ /dev/null
-/*
- * dist.c
- *
- * $Id$
- *
- *
- *****************************************************************************
- *
- * Copyright (c) 2004, Luke Sheneman
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * + Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * + Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * + The names of its contributors may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- *****************************************************************************
- *
- * Compute a distance matrix given a set of sequences
- *
- *****************************************************************************
- *
- * AUTHOR:
- *
- * Luke Sheneman
- * sheneman@cs.uidaho.edu
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <string.h>
-#include <ctype.h>
-
-#include "common.h"
-#include "dayhoff.h"
-#include "fasta.h"
-#include "dist.h"
-
-
-
-
-/*
- * NJ_build_distance_matrix() -
- *
- * Given a filename for an alignment, read the alignment
- * into memory and then compute the distance matrix
- * using the appropriate correction model
- */
-DMAT *
-NJ_build_distance_matrix(NJ_ARGS *nj_args) {
-
- DMAT *dmat;
- NJ_alignment *alignment;
-
- /* Read an alignment in FASTA format */
- alignment =
- NJ_read_fasta(nj_args);
-
- if(!alignment) {
- return(NULL);
- }
-
- /*
- * Given a global multiple sequence alignment (MSA) and
- * a specified distance correction model, compute a
- * corrected distance matrix
- *
- * From proteins, we may want to allow users to specify
- * a substitution matrix (feature)
- */
- dmat =
- NJ_compute_dmat(nj_args,
- alignment);
-
- // NJ_print_taxanames(dmat);
-
- if(!dmat) {
- fprintf(stderr, "Clearcut: Error computing distance matrix\n");
- }
-
- /* now free the memory associated with the alignment */
- NJ_free_alignment(alignment);
-
- return(dmat);
-}
-
-
-
-
-
-/*
- * NJ_compute_dmat() -
- *
- * Given an alignment and a correction model, compute the
- * distance matrix and return it
- *
- */
-DMAT *
-NJ_compute_dmat(NJ_ARGS *nj_args,
- NJ_alignment *alignment) {
-
- DMAT *dmat;
- long int i;
-
-
- /* allocate distance matrix here */
- dmat = (DMAT *)calloc(1, sizeof(DMAT));
- if(!dmat) {
- fprintf(stderr, "Clearcut: Memory allocation error in NJ_compute_dmat()\n");
- return(NULL);
- }
-
- dmat->ntaxa = alignment->nseq;
- dmat->size = alignment->nseq;
-
- /* allocate memory to hold the taxa names */
- dmat->taxaname = (char **)calloc(alignment->nseq, sizeof(char *));
- if(!dmat->taxaname) {
- fprintf(stderr, "Clearcut: Memory allocation error in NJ_compute_dmat()\n");
- return(NULL);
- }
-
- /* copy sequence titles */
- for(i=0;i<alignment->nseq;i++) {
- dmat->taxaname[i] = (char *)calloc(strlen(alignment->titles[i])+1, sizeof(char));
- if(!dmat->taxaname[i]) {
- fprintf(stderr, "Clearcut: Memory allocation error in NJ_compute_dmat()\n");
- return(NULL);
- }
-
- strncpy(dmat->taxaname[i], alignment->titles[i], strlen(alignment->titles[i]));
- }
-
- /* allocate val matrix in dmat */
- dmat->val = (float *)calloc(dmat->ntaxa*dmat->ntaxa, sizeof(float));
- if(!dmat->val) {
- fprintf(stderr, "Clearcut: Memory allocation error in NJ_compute_dmat()\n");
- return(NULL);
- }
-
- /* now lets allocate space for the r and r2 columns */
- dmat->r = (float *)calloc(dmat->ntaxa, sizeof(float));
- dmat->r2 = (float *)calloc(dmat->ntaxa, sizeof(float));
-
- /* track some memory addresses */
- dmat->rhandle = dmat->r;
- dmat->r2handle = dmat->r2;
- dmat->valhandle = dmat->val;
-
- /* apply model correction to matrix */
- switch(nj_args->correction_model) {
-
- case NJ_MODEL_JUKES:
-
- if(nj_args->dna_flag) {
- NJ_DNA_jc_correction(dmat, alignment);
- } else if(nj_args->protein_flag) {
- NJ_PROTEIN_jc_correction(dmat, alignment);
- } else {
- fprintf(stderr, "Clearcut: Need to know sequence type for Jukes-Cantor model correction.\n");
- return(NULL);
- }
-
- break;
-
- case NJ_MODEL_KIMURA:
-
- if(nj_args->dna_flag) {
- NJ_DNA_k2p_correction(dmat, alignment);
- } else if(nj_args->protein_flag) {
- NJ_PROTEIN_kimura_correction(dmat, alignment);
- } else {
- fprintf(stderr, "Clearcut: Need to know sequence type for Kimura model correction.\n");
- return(NULL);
- }
-
- break;
-
- case NJ_MODEL_NONE:
-
- NJ_no_correction(dmat, alignment);
-
- break;
-
- default:
- fprintf(stderr, "Clearcut: Invalid distance correction model.\n");
- return(NULL);
- }
-
- return(dmat);
-}
-
-
-
-
-
-/*
- * NJ_no_correction() -
- *
- * Compute the distance matrix without correction
- * (straight percent ID)
- *
- * Resolve ambiguities in sequence data by skipping
- * those nucleotides/residues
- *
- */
-void
-NJ_no_correction(DMAT *dmat,
- NJ_alignment *alignment) {
-
- long int i, j;
- float pdiff;
-
- /* compute pairwise percent identity */
- for(i=0;i<dmat->size;i++) {
- for(j=i+1;j<dmat->size;j++) {
- pdiff = 1.0 - NJ_pw_percentid(alignment, i, j);
- dmat->val[NJ_MAP(i, j, dmat->size)] = pdiff;
- }
- }
-
- return;
-}
-
-
-
-
-/*
- * NJ_DNA_jc_correction() -
- *
- * Compute the distance matrix with jukes-cantor correction
- * and assign high distance if sequence divergence exceeds
- * 0.75
- *
- * Jukes, T.H. (1969), Evolution of protein molecules. In H.N. Munro (Ed.),
- * Mammalian Protein Metabolism, Volume III, Chapter 24, pp. 21-132.
- * New York: Academic Press
- *
- */
-void
-NJ_DNA_jc_correction(DMAT *dmat,
- NJ_alignment *alignment) {
-
- long int i, j;
- long int k;
- float d, cutoff, dist;
- long int residues;
-
- cutoff = 0.75;
-
- for(i=0;i<dmat->size;i++) {
- for(j=i+1;j<dmat->size;j++) {
-
- k = NJ_pw_differences(alignment, i, j, &residues);
- d = 1.0 - NJ_pw_percentid(alignment, i, j);
-
- if(d > cutoff) {
- dist = NJ_BIGDIST;
- } else {
- dist = (-0.75) * log(1.0 - (4.0/3.0)*d);
- }
-
- if(fabs(dist) < FLT_EPSILON) {
- dmat->val[NJ_MAP(i, j, dmat->size)] = 0.0;
- } else {
- dmat->val[NJ_MAP(i, j, dmat->size)] = dist;
- }
- }
- }
-
-
-
- return;
-}
-
-
-
-
-
-
-/*
- * NJ_PROTEIN_jc_correction() -
- *
- * This function performs modified jukes/cantor correction on
- * a protein alignment
- *
- * Jukes, T.H. (1969), Evolution of protein molecules. In H.N. Munro (Ed.),
- * Mammalian Protein Metabolism, Volume III, Chapter 24, pp. 21-132.
- * New York: Academic Press
- *
- */
-void
-NJ_PROTEIN_jc_correction(DMAT *dmat,
- NJ_alignment *alignment) {
-
- long int i, j;
- long int residues;
- long int diff;
- float dist, x;
-
-
- for(i=0;i<dmat->size;i++) {
- for(j=i+1;j<dmat->size;j++) {
-
- diff = NJ_pw_differences(alignment, i, j, &residues);
-
- if(!diff || !residues) {
- dist = 0.0;
- } else {
-
- dist = (float)diff/(float)residues;
- x = ((20.0/19.0)*dist);
-
- if(NJ_FLT_GT(x, 1.0)) {
- dist = NJ_BIGDIST;
- } else {
- dist = -(19.0/20.0) * log(1.0 - x);
- }
- }
-
- dmat->val[NJ_MAP(i, j, dmat->size)] = dist;
- }
- }
-
- return;
-}
-
-
-
-
-
-
-/*
- * NJ_DNA_k2p_correction() -
- *
- * Correct a distance matrix using k2p correction using
- * cutoffs to avoid problems with logarithms.
- *
- * dist = -0.5ln(1-2P-Q) - 0.25ln(1-2Q)
- *
- * But due to the logarithms, this is only valid when
- *
- * (2P+Q <= 1) &&
- * (2Q <= 1)
- *
- * So assign arbitary distances when these constraints are
- * not strictly followed.
- *
- * Kimura, M. (1980), A simple method for estimating evolutionary
- * rates of base substitutions through comparative studies of
- * nucleotide sequences. J. Mol. Evol., 16, 111-120
- *
- */
-void
-NJ_DNA_k2p_correction(DMAT *dmat,
- NJ_alignment *alignment) {
-
- long int i, j;
- float P; /* proportion of transitions */
- float Q; /* proportion of transversions */
- long int nucleotides;
- long int transitions, transversions;
- float dist;
- float log_x = 0.0; /* the params for the first log */
- float log_y = 0.0; /* the params for the second log */
-
- int blowup; /* a flag to specify if we have a log blowup */
-
-
- for(i=0;i<dmat->size;i++) {
- for(j=i+1;j<dmat->size;j++) {
-
- blowup = 0;
-
- /* count the number of transitions and transversions */
- NJ_DNA_count_tt(alignment, i, j, &transitions, &transversions, &nucleotides);
-
- if(!nucleotides) { /* sequences have no non-ambiguous overlap in alignment */
- P = 0.0;
- Q = 0.0;
- } else {
- P = (float)transitions / (float)nucleotides;
- Q = (float)transversions / (float)nucleotides;
- }
-
- /* the first log blows up if 2*P+Q = 1.0 */
- if(NJ_FLT_EQ((2.0 * P + Q), 1.0)) {
- blowup = 1;
- } else {
- if( NJ_FLT_LT(1.0 - 2.0*P - Q, 0.0) ) {
- blowup = 1;
- } else {
- log_x = log(1.0 - 2.0*P - Q);
- }
- }
-
- /* the second log blows up if 2*Q >= 1.0 */
- if( NJ_FLT_EQ((2.0 * Q), 1.0) ||
- NJ_FLT_GT((2.0 * Q), 1.0) ) {
- blowup = 1;
- } else {
- log_y = log(1.0 - 2.0*Q);
- }
-
- /* if our logarithms blow up, we just set the distance to the max */
- if(blowup) {
- dist = NJ_BIGDIST;
- } else {
- dist = (-0.5)*log_x - 0.25*log_y;
- }
-
- if(fabs(dist) < FLT_EPSILON) {
- dmat->val[NJ_MAP(i, j, dmat->size)] = 0.0;
- } else {
- dmat->val[NJ_MAP(i, j, dmat->size)] = dist;
- }
- }
- }
-
- return;
-}
-
-
-
-
-/*
- * NJ_PROTEIN_kimura_correction() -
- *
- * Perform Kimura correction for distances derived from protein
- * alignments.
- *
- * Kimura, M. (1983), The Neutral Theory of Molecular Evolution.
- * p. 75., Cambridge University Press, Cambridge, England
- *
- */
-void
-NJ_PROTEIN_kimura_correction(DMAT *dmat,
- NJ_alignment *alignment) {
-
- long int i, j;
- long int residues;
- long int diff;
- float dist;
-
-
- printf("NJ_PROTEIN_kimura_correction()\n");
-
- for(i=0;i<dmat->size;i++) {
- for(j=i+1;j<dmat->size;j++) {
- diff = NJ_pw_differences(alignment, i, j, &residues);
-
- if(!diff || !residues) {
- dist = 0.0;
- } else {
- dist = (float)diff/(float)residues;
- }
-
- if(NJ_FLT_LT(dist, 0.75)) {
- if(NJ_FLT_GT(dist, 0.0) ) {
- dist = -log(1.0 - dist - (dist * dist/5.0) );
- }
- } else {
- if(NJ_FLT_GT(dist, 0.93) ) {
- dist = 10.0;
- } else {
- dist = (float)NJ_dayhoff[ (int)((dist*1000.0)-750.0) ] / 100.0 ;
- }
- }
-
- dmat->val[NJ_MAP(i, j, dmat->size)] = dist;
- }
- }
-
- return;
-}
-
-
-
-
-
-/*
- * NJ_DNA_count_tt() -
- *
- * Count the number of transitions and transversions
- * between two aligned DNA sequences
- *
- * This routine automatically skips ambiguities when
- * counting transitions and transversions.
- *
- */
-void
-NJ_DNA_count_tt(NJ_alignment *alignment,
- long int x,
- long int y,
- long int *transitions,
- long int *transversions,
- long int *residues) {
-
- long int tmp_transitions = 0;
- long int tmp_transversions = 0;
- long int tmp_residues = 0;
- char a, b;
- long int i;
-
- for(i=0;i<alignment->length;i++) {
-
- a = toupper(alignment->data[x*alignment->length+i]);
- b = toupper(alignment->data[y*alignment->length+i]);
-
- if( (a == 'A' && b == 'T') ||
- (a == 'T' && b == 'A') ||
- (a == 'A' && b == 'C') ||
- (a == 'C' && b == 'A') ||
- (a == 'T' && b == 'G') ||
- (a == 'G' && b == 'T') ||
- (a == 'C' && b == 'G') ||
- (a == 'G' && b == 'C') ) {
- tmp_transversions++;
- }
-
- if( (a == 'C' && b == 'T') ||
- (a == 'T' && b == 'C') ||
- (a == 'G' && b == 'A') ||
- (a == 'A' && b == 'G') ) {
- tmp_transitions++;
- }
-
- /* count the number of residues */
- if(a != NJ_AMBIGUITY_CHAR &&
- b != NJ_AMBIGUITY_CHAR ) {
- tmp_residues++;
- }
-
- }
-
- *transitions = tmp_transitions;
- *transversions = tmp_transversions;
-
- if(residues) {
- *residues = tmp_residues;
- }
-
- return;
-}
-
-
-
-
-
-/*
- * NJ_pw_percentid() -
- *
- * Given an alignment and a specification
- * for two rows, compute the pairwise
- * percent identity between the two
- *
- */
-float
-NJ_pw_percentid(NJ_alignment *alignment,
- long int x,
- long int y) {
-
- float pid;
- long int i;
- long int residues;
- long int same;
- char c1, c2;
-
- residues = 0;
- same = 0;
- for(i=0;i<alignment->length;i++) {
-
- c1 = alignment->data[x*alignment->length+i];
- c2 = alignment->data[y*alignment->length+i];
-
- if( c1 != NJ_AMBIGUITY_CHAR ||
- c2 != NJ_AMBIGUITY_CHAR ) {
-
- residues++;
-
- if(c1 == c2) {
- same++;
- }
- }
-
- }
-
- pid = (float)same/(float)residues;
-
- return(pid);
-}
-
-
-
-/*
- * NJ_pw_differences() -
- *
- * Given an alignment and a specification
- * for two rows in the alignment, compute the
- * number of differences between the two sequences
- *
- * With respect to ambiguity codes, we will want to
- * disregard those sites entirely in our count.
- *
- */
-long int
-NJ_pw_differences(NJ_alignment *alignment,
- long int x,
- long int y,
- long int *residues) {
-
- long int i;
- long int diff;
- char c1, c2;
- long int tmp_residues;
-
- diff = 0;
- tmp_residues = 0;
- for(i=0;i<alignment->length;i++) {
-
- c1 = alignment->data[x*alignment->length+i];
- c2 = alignment->data[y*alignment->length+i];
-
- if( c1 != NJ_AMBIGUITY_CHAR ||
- c2 != NJ_AMBIGUITY_CHAR ) {
-
- tmp_residues++;
-
- if(c1 != c2) {
- diff++;
- }
- }
-
- }
-
- *residues = tmp_residues;
-
- return(diff);
-}
-
-
-
-
-
-
--- /dev/null
+/*
+ * dist.c
+ *
+ * $Id$
+ *
+ *
+ *****************************************************************************
+ *
+ * Copyright (c) 2004, Luke Sheneman
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * + Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * + Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * + The names of its contributors may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************
+ *
+ * Compute a distance matrix given a set of sequences
+ *
+ *****************************************************************************
+ *
+ * AUTHOR:
+ *
+ * Luke Sheneman
+ * sheneman@cs.uidaho.edu
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "common.h"
+#include "dayhoff.h"
+#include "fasta.h"
+#include "distclearcut.h"
+
+
+
+
+/*
+ * NJ_build_distance_matrix() -
+ *
+ * Given a filename for an alignment, read the alignment
+ * into memory and then compute the distance matrix
+ * using the appropriate correction model
+ */
+DMAT *
+NJ_build_distance_matrix(NJ_ARGS *nj_args) {
+
+ DMAT *dmat;
+ NJ_alignment *alignment;
+
+ /* Read an alignment in FASTA format */
+ alignment =
+ NJ_read_fasta(nj_args);
+
+ if(!alignment) {
+ return(NULL);
+ }
+
+ /*
+ * Given a global multiple sequence alignment (MSA) and
+ * a specified distance correction model, compute a
+ * corrected distance matrix
+ *
+ * From proteins, we may want to allow users to specify
+ * a substitution matrix (feature)
+ */
+ dmat =
+ NJ_compute_dmat(nj_args,
+ alignment);
+
+ // NJ_print_taxanames(dmat);
+
+ if(!dmat) {
+ fprintf(stderr, "Clearcut: Error computing distance matrix\n");
+ }
+
+ /* now free the memory associated with the alignment */
+ NJ_free_alignment(alignment);
+
+ return(dmat);
+}
+
+
+
+
+
+/*
+ * NJ_compute_dmat() -
+ *
+ * Given an alignment and a correction model, compute the
+ * distance matrix and return it
+ *
+ */
+DMAT *
+NJ_compute_dmat(NJ_ARGS *nj_args,
+ NJ_alignment *alignment) {
+
+ DMAT *dmat;
+ long int i;
+
+
+ /* allocate distance matrix here */
+ dmat = (DMAT *)calloc(1, sizeof(DMAT));
+ if(!dmat) {
+ fprintf(stderr, "Clearcut: Memory allocation error in NJ_compute_dmat()\n");
+ return(NULL);
+ }
+
+ dmat->ntaxa = alignment->nseq;
+ dmat->size = alignment->nseq;
+
+ /* allocate memory to hold the taxa names */
+ dmat->taxaname = (char **)calloc(alignment->nseq, sizeof(char *));
+ if(!dmat->taxaname) {
+ fprintf(stderr, "Clearcut: Memory allocation error in NJ_compute_dmat()\n");
+ return(NULL);
+ }
+
+ /* copy sequence titles */
+ for(i=0;i<alignment->nseq;i++) {
+ dmat->taxaname[i] = (char *)calloc(strlen(alignment->titles[i])+1, sizeof(char));
+ if(!dmat->taxaname[i]) {
+ fprintf(stderr, "Clearcut: Memory allocation error in NJ_compute_dmat()\n");
+ return(NULL);
+ }
+
+ strncpy(dmat->taxaname[i], alignment->titles[i], strlen(alignment->titles[i]));
+ }
+
+ /* allocate val matrix in dmat */
+ dmat->val = (float *)calloc(dmat->ntaxa*dmat->ntaxa, sizeof(float));
+ if(!dmat->val) {
+ fprintf(stderr, "Clearcut: Memory allocation error in NJ_compute_dmat()\n");
+ return(NULL);
+ }
+
+ /* now lets allocate space for the r and r2 columns */
+ dmat->r = (float *)calloc(dmat->ntaxa, sizeof(float));
+ dmat->r2 = (float *)calloc(dmat->ntaxa, sizeof(float));
+
+ /* track some memory addresses */
+ dmat->rhandle = dmat->r;
+ dmat->r2handle = dmat->r2;
+ dmat->valhandle = dmat->val;
+
+ /* apply model correction to matrix */
+ switch(nj_args->correction_model) {
+
+ case NJ_MODEL_JUKES:
+
+ if(nj_args->dna_flag) {
+ NJ_DNA_jc_correction(dmat, alignment);
+ } else if(nj_args->protein_flag) {
+ NJ_PROTEIN_jc_correction(dmat, alignment);
+ } else {
+ fprintf(stderr, "Clearcut: Need to know sequence type for Jukes-Cantor model correction.\n");
+ return(NULL);
+ }
+
+ break;
+
+ case NJ_MODEL_KIMURA:
+
+ if(nj_args->dna_flag) {
+ NJ_DNA_k2p_correction(dmat, alignment);
+ } else if(nj_args->protein_flag) {
+ NJ_PROTEIN_kimura_correction(dmat, alignment);
+ } else {
+ fprintf(stderr, "Clearcut: Need to know sequence type for Kimura model correction.\n");
+ return(NULL);
+ }
+
+ break;
+
+ case NJ_MODEL_NONE:
+
+ NJ_no_correction(dmat, alignment);
+
+ break;
+
+ default:
+ fprintf(stderr, "Clearcut: Invalid distance correction model.\n");
+ return(NULL);
+ }
+
+ return(dmat);
+}
+
+
+
+
+
+/*
+ * NJ_no_correction() -
+ *
+ * Compute the distance matrix without correction
+ * (straight percent ID)
+ *
+ * Resolve ambiguities in sequence data by skipping
+ * those nucleotides/residues
+ *
+ */
+void
+NJ_no_correction(DMAT *dmat,
+ NJ_alignment *alignment) {
+
+ long int i, j;
+ float pdiff;
+
+ /* compute pairwise percent identity */
+ for(i=0;i<dmat->size;i++) {
+ for(j=i+1;j<dmat->size;j++) {
+ pdiff = 1.0 - NJ_pw_percentid(alignment, i, j);
+ dmat->val[NJ_MAP(i, j, dmat->size)] = pdiff;
+ }
+ }
+
+ return;
+}
+
+
+
+
+/*
+ * NJ_DNA_jc_correction() -
+ *
+ * Compute the distance matrix with jukes-cantor correction
+ * and assign high distance if sequence divergence exceeds
+ * 0.75
+ *
+ * Jukes, T.H. (1969), Evolution of protein molecules. In H.N. Munro (Ed.),
+ * Mammalian Protein Metabolism, Volume III, Chapter 24, pp. 21-132.
+ * New York: Academic Press
+ *
+ */
+void
+NJ_DNA_jc_correction(DMAT *dmat,
+ NJ_alignment *alignment) {
+
+ long int i, j;
+ long int k;
+ float d, cutoff, dist;
+ long int residues;
+
+ cutoff = 0.75;
+
+ for(i=0;i<dmat->size;i++) {
+ for(j=i+1;j<dmat->size;j++) {
+
+ k = NJ_pw_differences(alignment, i, j, &residues);
+ d = 1.0 - NJ_pw_percentid(alignment, i, j);
+
+ if(d > cutoff) {
+ dist = NJ_BIGDIST;
+ } else {
+ dist = (-0.75) * log(1.0 - (4.0/3.0)*d);
+ }
+
+ if(fabs(dist) < FLT_EPSILON) {
+ dmat->val[NJ_MAP(i, j, dmat->size)] = 0.0;
+ } else {
+ dmat->val[NJ_MAP(i, j, dmat->size)] = dist;
+ }
+ }
+ }
+
+
+
+ return;
+}
+
+
+
+
+
+
+/*
+ * NJ_PROTEIN_jc_correction() -
+ *
+ * This function performs modified jukes/cantor correction on
+ * a protein alignment
+ *
+ * Jukes, T.H. (1969), Evolution of protein molecules. In H.N. Munro (Ed.),
+ * Mammalian Protein Metabolism, Volume III, Chapter 24, pp. 21-132.
+ * New York: Academic Press
+ *
+ */
+void
+NJ_PROTEIN_jc_correction(DMAT *dmat,
+ NJ_alignment *alignment) {
+
+ long int i, j;
+ long int residues;
+ long int diff;
+ float dist, x;
+
+
+ for(i=0;i<dmat->size;i++) {
+ for(j=i+1;j<dmat->size;j++) {
+
+ diff = NJ_pw_differences(alignment, i, j, &residues);
+
+ if(!diff || !residues) {
+ dist = 0.0;
+ } else {
+
+ dist = (float)diff/(float)residues;
+ x = ((20.0/19.0)*dist);
+
+ if(NJ_FLT_GT(x, 1.0)) {
+ dist = NJ_BIGDIST;
+ } else {
+ dist = -(19.0/20.0) * log(1.0 - x);
+ }
+ }
+
+ dmat->val[NJ_MAP(i, j, dmat->size)] = dist;
+ }
+ }
+
+ return;
+}
+
+
+
+
+
+
+/*
+ * NJ_DNA_k2p_correction() -
+ *
+ * Correct a distance matrix using k2p correction using
+ * cutoffs to avoid problems with logarithms.
+ *
+ * dist = -0.5ln(1-2P-Q) - 0.25ln(1-2Q)
+ *
+ * But due to the logarithms, this is only valid when
+ *
+ * (2P+Q <= 1) &&
+ * (2Q <= 1)
+ *
+ * So assign arbitary distances when these constraints are
+ * not strictly followed.
+ *
+ * Kimura, M. (1980), A simple method for estimating evolutionary
+ * rates of base substitutions through comparative studies of
+ * nucleotide sequences. J. Mol. Evol., 16, 111-120
+ *
+ */
+void
+NJ_DNA_k2p_correction(DMAT *dmat,
+ NJ_alignment *alignment) {
+
+ long int i, j;
+ float P; /* proportion of transitions */
+ float Q; /* proportion of transversions */
+ long int nucleotides;
+ long int transitions, transversions;
+ float dist;
+ float log_x = 0.0; /* the params for the first log */
+ float log_y = 0.0; /* the params for the second log */
+
+ int blowup; /* a flag to specify if we have a log blowup */
+
+
+ for(i=0;i<dmat->size;i++) {
+ for(j=i+1;j<dmat->size;j++) {
+
+ blowup = 0;
+
+ /* count the number of transitions and transversions */
+ NJ_DNA_count_tt(alignment, i, j, &transitions, &transversions, &nucleotides);
+
+ if(!nucleotides) { /* sequences have no non-ambiguous overlap in alignment */
+ P = 0.0;
+ Q = 0.0;
+ } else {
+ P = (float)transitions / (float)nucleotides;
+ Q = (float)transversions / (float)nucleotides;
+ }
+
+ /* the first log blows up if 2*P+Q = 1.0 */
+ if(NJ_FLT_EQ((2.0 * P + Q), 1.0)) {
+ blowup = 1;
+ } else {
+ if( NJ_FLT_LT(1.0 - 2.0*P - Q, 0.0) ) {
+ blowup = 1;
+ } else {
+ log_x = log(1.0 - 2.0*P - Q);
+ }
+ }
+
+ /* the second log blows up if 2*Q >= 1.0 */
+ if( NJ_FLT_EQ((2.0 * Q), 1.0) ||
+ NJ_FLT_GT((2.0 * Q), 1.0) ) {
+ blowup = 1;
+ } else {
+ log_y = log(1.0 - 2.0*Q);
+ }
+
+ /* if our logarithms blow up, we just set the distance to the max */
+ if(blowup) {
+ dist = NJ_BIGDIST;
+ } else {
+ dist = (-0.5)*log_x - 0.25*log_y;
+ }
+
+ if(fabs(dist) < FLT_EPSILON) {
+ dmat->val[NJ_MAP(i, j, dmat->size)] = 0.0;
+ } else {
+ dmat->val[NJ_MAP(i, j, dmat->size)] = dist;
+ }
+ }
+ }
+
+ return;
+}
+
+
+
+
+/*
+ * NJ_PROTEIN_kimura_correction() -
+ *
+ * Perform Kimura correction for distances derived from protein
+ * alignments.
+ *
+ * Kimura, M. (1983), The Neutral Theory of Molecular Evolution.
+ * p. 75., Cambridge University Press, Cambridge, England
+ *
+ */
+void
+NJ_PROTEIN_kimura_correction(DMAT *dmat,
+ NJ_alignment *alignment) {
+
+ long int i, j;
+ long int residues;
+ long int diff;
+ float dist;
+
+
+ printf("NJ_PROTEIN_kimura_correction()\n");
+
+ for(i=0;i<dmat->size;i++) {
+ for(j=i+1;j<dmat->size;j++) {
+ diff = NJ_pw_differences(alignment, i, j, &residues);
+
+ if(!diff || !residues) {
+ dist = 0.0;
+ } else {
+ dist = (float)diff/(float)residues;
+ }
+
+ if(NJ_FLT_LT(dist, 0.75)) {
+ if(NJ_FLT_GT(dist, 0.0) ) {
+ dist = -log(1.0 - dist - (dist * dist/5.0) );
+ }
+ } else {
+ if(NJ_FLT_GT(dist, 0.93) ) {
+ dist = 10.0;
+ } else {
+ dist = (float)NJ_dayhoff[ (int)((dist*1000.0)-750.0) ] / 100.0 ;
+ }
+ }
+
+ dmat->val[NJ_MAP(i, j, dmat->size)] = dist;
+ }
+ }
+
+ return;
+}
+
+
+
+
+
+/*
+ * NJ_DNA_count_tt() -
+ *
+ * Count the number of transitions and transversions
+ * between two aligned DNA sequences
+ *
+ * This routine automatically skips ambiguities when
+ * counting transitions and transversions.
+ *
+ */
+void
+NJ_DNA_count_tt(NJ_alignment *alignment,
+ long int x,
+ long int y,
+ long int *transitions,
+ long int *transversions,
+ long int *residues) {
+
+ long int tmp_transitions = 0;
+ long int tmp_transversions = 0;
+ long int tmp_residues = 0;
+ char a, b;
+ long int i;
+
+ for(i=0;i<alignment->length;i++) {
+
+ a = toupper(alignment->data[x*alignment->length+i]);
+ b = toupper(alignment->data[y*alignment->length+i]);
+
+ if( (a == 'A' && b == 'T') ||
+ (a == 'T' && b == 'A') ||
+ (a == 'A' && b == 'C') ||
+ (a == 'C' && b == 'A') ||
+ (a == 'T' && b == 'G') ||
+ (a == 'G' && b == 'T') ||
+ (a == 'C' && b == 'G') ||
+ (a == 'G' && b == 'C') ) {
+ tmp_transversions++;
+ }
+
+ if( (a == 'C' && b == 'T') ||
+ (a == 'T' && b == 'C') ||
+ (a == 'G' && b == 'A') ||
+ (a == 'A' && b == 'G') ) {
+ tmp_transitions++;
+ }
+
+ /* count the number of residues */
+ if(a != NJ_AMBIGUITY_CHAR &&
+ b != NJ_AMBIGUITY_CHAR ) {
+ tmp_residues++;
+ }
+
+ }
+
+ *transitions = tmp_transitions;
+ *transversions = tmp_transversions;
+
+ if(residues) {
+ *residues = tmp_residues;
+ }
+
+ return;
+}
+
+
+
+
+
+/*
+ * NJ_pw_percentid() -
+ *
+ * Given an alignment and a specification
+ * for two rows, compute the pairwise
+ * percent identity between the two
+ *
+ */
+float
+NJ_pw_percentid(NJ_alignment *alignment,
+ long int x,
+ long int y) {
+
+ float pid;
+ long int i;
+ long int residues;
+ long int same;
+ char c1, c2;
+
+ residues = 0;
+ same = 0;
+ for(i=0;i<alignment->length;i++) {
+
+ c1 = alignment->data[x*alignment->length+i];
+ c2 = alignment->data[y*alignment->length+i];
+
+ if( c1 != NJ_AMBIGUITY_CHAR ||
+ c2 != NJ_AMBIGUITY_CHAR ) {
+
+ residues++;
+
+ if(c1 == c2) {
+ same++;
+ }
+ }
+
+ }
+
+ pid = (float)same/(float)residues;
+
+ return(pid);
+}
+
+
+
+/*
+ * NJ_pw_differences() -
+ *
+ * Given an alignment and a specification
+ * for two rows in the alignment, compute the
+ * number of differences between the two sequences
+ *
+ * With respect to ambiguity codes, we will want to
+ * disregard those sites entirely in our count.
+ *
+ */
+long int
+NJ_pw_differences(NJ_alignment *alignment,
+ long int x,
+ long int y,
+ long int *residues) {
+
+ long int i;
+ long int diff;
+ char c1, c2;
+ long int tmp_residues;
+
+ diff = 0;
+ tmp_residues = 0;
+ for(i=0;i<alignment->length;i++) {
+
+ c1 = alignment->data[x*alignment->length+i];
+ c2 = alignment->data[y*alignment->length+i];
+
+ if( c1 != NJ_AMBIGUITY_CHAR ||
+ c2 != NJ_AMBIGUITY_CHAR ) {
+
+ tmp_residues++;
+
+ if(c1 != c2) {
+ diff++;
+ }
+ }
+
+ }
+
+ *residues = tmp_residues;
+
+ return(diff);
+}
+
+
+
+
+
+
+++ /dev/null
-/*
- * dmat.c
- *
- * $Id$
- *
- *****************************************************************************
- *
- * Copyright (c) 2004, Luke Sheneman
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * + Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * + Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * + The names of its contributors may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- *****************************************************************************
- *
- * Distance matrix parser
- *
- *****************************************************************************
- *
- * AUTHOR:
- *
- * Luke Sheneman
- * sheneman@cs.uidaho.edu
- *
- */
-
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <float.h>
-#include <errno.h>
-#include <string.h>
-
-
-
-#include "common.h"
-#include "clearcut.h"
-
-#include "dmat.h"
-
-
-
-
-
-/*
- *
- * NJ_is_alpha() - determine if character is an alphabetic character
- *
- * INPUT:
- * ------
- * c -- character to test
- *
- * RETURN:
- * -------
- * int -- 1 if character is alphabetic (A-Z || a-z)
- * 0 if character is NOT alphabetic
- *
- */
-static inline
-int
-NJ_is_alpha(char c) {
-
- if( (c >= 'A' && c <= 'Z') ||
- (c >= 'a' && c <= 'z') ) {
- return(1);
- } else {
- return(0);
- }
-}
-
-
-
-/*
- *
- * NJ_is_whitespace() - determine if character is a whitespace character
- *
- * INPUT:
- * ------
- * c -- character to test
- *
- * RETURN:
- * -------
- * int -- 1 if character is whitespace (space, tab, CR, LF)
- * 0 if character is NOT whitespace
- *
- */
-static inline
-int
-NJ_is_whitespace(char c) {
-
- if( c == ' ' || /* space */
- c == '\n' || /* newline */
- c == '\r' || /* carriage-return */
- c == '\v' || /* vertical tab */
- c == '\f' || /* form feed */
- c == '\t' ) { /* horizontal tab */
- return(1);
- } else {
- return(0);
- }
-
-}
-
-
-
-
-/*
- *
- * NJ_is_number() - determine if character is a number
- *
- * INPUT:
- * ------
- * c -- character to test
- *
- * RETURN:
- * -------
- * int -- 1 if character is a number (0-9)
- * 0 if character is NOT a number
- *
- */
-static inline
-int
-NJ_is_number(char c) {
- if(c >= '0' && c <= '9') {
- return(1);
- } else {
- return(0);
- }
-}
-
-
-
-/*
- * NJ_is_distance() - check if string is a properly formatted distance value
- *
- */
-static inline
-int
-NJ_is_distance(char *token) {
-
- int i;
- char c;
- int exponent_state;
- int expsign_state;
- int dpoint_state;
-
- /* if token is NULL return failure */
- if(!token) {
- return(0);
- }
-
- exponent_state = 0;
- expsign_state = 0;
- dpoint_state = 0;
-
- /* The first character must be a number, a decimal point or a sign */
- c = token[0];
- if(!NJ_is_number(c) &&
- c != '.' &&
- c != '-' &&
- c != '+' ) {
-
- goto BAD;
- }
-
- /*
- * if the first character is not a number, and string is only one
- * character long, then we return failure.
- */
- if(strlen(token) == 1) {
- if(!NJ_is_number(c)) {
- goto BAD;
- }
- }
-
- for(i=0;i<strlen(token);i++) {
-
- c = token[i];
-
- /* make sure that all chars in dist string are in list of valid chars */
- if(!NJ_is_number(c) &&
- c != '.' &&
- c != '-' &&
- c != '+' &&
- c != 'e' &&
- c != 'E' ) {
-
- goto BAD;
- }
-
- /* not the first char and we are not in exponent state but read (+,-) */
- if(i>0 && !exponent_state) {
- if(c == '-' ||
- c == '+') {
- goto BAD;
- }
- }
-
- /* if we are in the exponent state, and we've already seen a sign */
- if(exponent_state && expsign_state) {
- if(c == '-' ||
- c == '+') {
- goto BAD;
- }
- }
-
- /* if we are in the exponent state and we see a decimal point */
- if(exponent_state) {
- if(c == '.') {
- goto BAD;
- }
- }
-
- /* if we are in the exponent state and see another e or E */
- if(exponent_state) {
- if(c == 'e' ||
- c == 'E') {
- goto BAD;
- }
- }
-
- /* if we are dpoint_state and see another decimal point */
- if(dpoint_state) {
- if(c == '.') {
- goto BAD;
- }
- }
-
-
- /* enter the exponent state if we need to */
- if(!exponent_state) {
- if(c == 'e' ||
- c == 'E') {
- exponent_state = 1;
- }
- }
-
- /* enter the expsign_state if we need to */
- if(exponent_state && !expsign_state) {
- if(c == '-' ||
- c == '+') {
- expsign_state = 1;
- }
- }
-
- /* if not in dpoint state and we see a dpoint */
- if(!dpoint_state) {
- if(c == '.') {
- dpoint_state = 1;
- }
- }
-
- }
-
- /* the token must end in a number char */
- if(!NJ_is_number(token[strlen(token)-1])) {
- goto BAD;
- }
-
- /* token is a valid numerical distance */
- return(1);
-
- BAD:
-
- /* token is invalid distance format */
- return(0);
-}
-
-
-
-
-/*
- * NJ_is_label() -
- *
- * Simply, if token is not a valid number, then it is a name
- *
- */
-static inline
-int
-NJ_is_label(char *token) {
- if(NJ_is_distance(token)) {
- return(0);
- } else {
- return(1);
- }
-}
-
-
-
-/*
- * NJ_get_token() - get a token from an input stream
- *
- */
-static inline
-int
-NJ_get_token(FILE *fp,
- NJ_DIST_TOKEN *token) {
-
- char c;
- int index;
-
- c = fgetc(fp);
- if(feof(fp)) {
- token->type = NJ_EOF_STATE;
- return(token->type);
- }
-
- if(NJ_is_whitespace(c)) {
- token->buf[0] = c;
- token->buf[1] = '\0';
- token->type = NJ_WS_STATE;
-
- return NJ_WS_STATE;
- }
-
- index = 0;
- while(!NJ_is_whitespace(c)) {
-
- /* reallocate our buffer if necessary */
- if(index >= token->bufsize) {
- token->bufsize *= 2;
- token->buf = (char *)realloc(token->buf, token->bufsize*sizeof(char));
- if(!token->buf) {
- fprintf(stderr, "Clearcut: Memory allocation error in NJ_get_token()\n");
- exit(-1);
- }
- }
-
- token->buf[index++] = c;
-
- c = fgetc(fp);
- if(feof(fp)) {
- token->type = NJ_EOF_STATE;
- break;
- }
- }
-
- token->buf[index] = '\0';
-
- if(token->type != NJ_EOF_STATE) {
-
- if(NJ_is_distance(token->buf)) {
- token->type = NJ_FLOAT_STATE;
- } else {
- token->type = NJ_NAME_STATE;
- }
-
- }
-
- return(token->type);
-}
-
-
-
-
-
-
-/*
- * NJ_parse_distance_matrix() -- Takes a filename and returns a distance matrix
- *
- *
- * INPUT:
- * ------
- * nj_args -- a pointer to a structure containing the command-line arguments
- *
- * OUTPUT:
- * -------
- * <DMAT *> -- NULL (failure)
- * -- A pointer to a populated distance matrix
- *
- * DESCRIPTION:
- * ------------
- * This function implements a simple state machine to parse a distance matrix
- * in approximate PHYLIP format. This function auto-detects whether the
- * distance matrix is in upper, lower, or fully-symmetric format and handles
- * it accordingly. For full/symmetric matrices, values must be symmetric
- * around the diagonal, which is required to be zeroes. Names and values must
- * be separated by whitespace (space, tab, newlines, etc.). Taxon labels can
- * include numbers, but must start with non-numerical symbols.
- *
- *
- * *** UPPER FORMAT EXAMPLE ***
- *
- * 4
- * seq1 0.2 0.3 0.1
- * seq2 0.2 0.3
- * seq3 0.1
- * seq4
- *
- * *** LOWER FORMAT EXAMPLE ***
- *
- * 4
- * seq1
- * seq2 0.3
- * seq3 0.2 0.4
- * seq4 0.3 0.1 0.3
- *
- * *** SYMMETRIC (FULL) EXAMPLE ***
- *
- * 4
- * seq1 0.0 0.3 0.5 0.3
- * seq2 0.3 0.0 0.1 0.2
- * seq3 0.5 0.1 0.0 0.9
- * seq4 0.3 0.2 0.9 0.0
- *
- * Values in the distance matrix can be positive or negative, integers or
- * real values. Values can also be parsed in exponential notation form.
- *
- */
-DMAT *
-NJ_parse_distance_matrix(NJ_ARGS *nj_args) {
-
- DMAT *dmat = NULL;
- FILE *fp = NULL;
- NJ_DIST_TOKEN *token = NULL;
-
- int state, dmat_type;
- int row;
- int fltcnt;
- int x, y, i;
- int numvalread;
- int expectedvalues = -1;
- float val;
- int first_state = 0;
-
-
- /* allocate our distance matrix and token structure */
- dmat = (DMAT *)calloc(1, sizeof(DMAT));
- token = (NJ_DIST_TOKEN *)calloc(1, sizeof(NJ_DIST_TOKEN));
- if(token) {
- token->bufsize = NJ_INITIAL_BUFSIZE;
- token->buf = (char *)calloc(token->bufsize, sizeof(char));
- }
- if(!dmat || !token || !token->buf) {
- fprintf(stderr, "Clearcut: Memory allocation error in NJ_parse_distance_matrix()\n");
- goto XIT_BAD;
- }
-
- /* open distance matrix file here */
- if(nj_args->stdin_flag) {
- fp = stdin;
- } else {
- fp = fopen(nj_args->infilename, "r");
- if(!fp) {
- fprintf(stderr, "Clearcut: Could not open distance matrix: %s\n", nj_args->infilename);
- perror("Clearcut");
- goto XIT_BAD;
- }
- }
-
- /* get the number of taxa in this file */
- fscanf(fp, "%ld\n", &dmat->ntaxa);
- if(dmat->ntaxa < 2) {
- fprintf(stderr, "Clearcut: Invalid number of taxa in distance matrix\n");
-
- goto XIT_BAD;
- }
-
- /* set our initial working size according to the # of taxa */
- dmat->size = dmat->ntaxa;
-
- /* allocate space for the distance matrix values here */
- dmat->val =
- (float *)calloc(NJ_NCELLS(dmat->ntaxa), sizeof(float));
- if(!dmat->val) {
- fprintf(stderr, "Clearcut: Memory allocation error in NJ_parse_distance_matrix()\n");
- goto XIT_BAD;
- }
-
- /* taxa names */
- dmat->taxaname = (char **)calloc(dmat->ntaxa, sizeof(char *));
- if(!dmat->taxaname) {
- fprintf(stderr, "Clearcut: Memory allocation error in NJ_parse_distance_matrix()\n");
- goto XIT_BAD;
- }
-
- /* set the initial state of our state machine */
- dmat_type = NJ_PARSE_UNKNOWN;
- row = -1;
- fltcnt = 0;
- numvalread = 0;
-
-
- /* read the input one character at a time to drive simple state machine */
- state = NJ_get_token(fp, token);
- while(state != NJ_EOF_STATE) {
-
- switch(state) {
-
- case NJ_NAME_STATE:
-
- if(first_state == 0) {
- first_state = 1;
- }
-
- row++;
-
- if(row > 0 && dmat_type == NJ_PARSE_UNKNOWN) {
-
- if(fltcnt == dmat->ntaxa) {
-
- dmat_type = NJ_PARSE_SYMMETRIC;
- expectedvalues = dmat->ntaxa * dmat->ntaxa;
-
- } else if (fltcnt == dmat->ntaxa-1) {
-
- dmat_type = NJ_PARSE_UPPER;
- expectedvalues = ((dmat->ntaxa) * (dmat->ntaxa-1)) / 2;
-
- /* shift everything in first row by one char */
- for(i=dmat->ntaxa-2;i>=0;i--) {
- dmat->val[i+1] = dmat->val[i];
- }
-
- } else if (fltcnt == 0) {
-
- dmat_type = NJ_PARSE_LOWER;
- expectedvalues = ((dmat->ntaxa) * (dmat->ntaxa-1)) / 2;
-
- } else {
- goto XIT_BAD;
- }
- }
-
- if(row >= dmat->ntaxa) {
- goto XIT_BAD;
- }
-
- /* allocate space for this taxon label */
- dmat->taxaname[row] = (char *)calloc(strlen(token->buf)+1, sizeof(char));
- if(!dmat->taxaname[row]) {
- fprintf(stderr, "Clearcut: Memory allocation error in NJ_parse_distance_matrix()\n");
- goto XIT_BAD;
- }
-
- strcpy(dmat->taxaname[row], token->buf);
-
- fltcnt = 0;
-
- break;
-
-
- case NJ_FLOAT_STATE:
-
- if(first_state == 0) {
- goto XIT_BAD;
- }
-
- val = atof(token->buf);
- if(errno) {
- fprintf(stderr, "Clearcut: Distance value out-of-range.\n");
- goto XIT_BAD;
- }
-
- x = row;
- y = fltcnt;
-
- switch(dmat_type) {
-
- case NJ_PARSE_UNKNOWN:
-
- dmat->val[NJ_MAP(x, y, dmat->size)] = val;
-
- break;
-
- case NJ_PARSE_SYMMETRIC:
-
- if(fltcnt >= dmat->ntaxa) {
- fprintf(stderr, "Clearcut: Incorrect number of distance values on row.\n");
- goto XIT_BAD;
- }
-
- if(x < y) {
- dmat->val[NJ_MAP(x, y, dmat->size)] = val;
- } else if(x > y) {
- if(!NJ_FLT_EQ(val, dmat->val[NJ_MAP(y, x, dmat->size)])) {
- fprintf(stderr, "Clearcut: Full matrices must be symmetric.\n");
- goto XIT_BAD;
- }
- } else {
- if(!NJ_FLT_EQ(val, 0.0)) {
- fprintf(stderr, "Clearcut: Values along the diagonal in a symmetric matrix must be zero.\n");
- goto XIT_BAD;
-
- }
- }
-
- break;
-
- case NJ_PARSE_UPPER:
-
- if(fltcnt > dmat->ntaxa-row) {
- fprintf(stderr, "Clearcut: Incorrect number of distance values on row.\n");
- goto XIT_BAD;
- }
-
- dmat->val[NJ_MAP(x, x+y+1, dmat->size)] = val;
-
- break;
-
- case NJ_PARSE_LOWER:
-
- if(fltcnt > row-1) {
- fprintf(stderr, "Clearcut: Incorrect number of distance values on row.\n");
- goto XIT_BAD;
- }
-
- dmat->val[NJ_MAP(y, x, dmat->size)] = val;
-
- break;
-
- default:
-
- goto XIT_BAD;
-
- break;
- }
-
- fltcnt++;
- numvalread++;
-
- break;
-
- case NJ_WS_STATE:
-
- break;
-
- case NJ_EOF_STATE:
-
- if(first_state == 0) {
- goto XIT_BAD;
- }
-
- break;
-
- default:
-
- fprintf(stderr, "Clearcut: Unknown state in distance matrix parser.\n");
- break;
-
- }
-
- /* get next token from stream */
- state = NJ_get_token(fp, token);
- }
-
-
- /*
- * At the end, if we have not read the number of values that we predicted
- * we would need, then there was a problem and we need to punt.
- */
- if(numvalread != expectedvalues) {
- fprintf(stderr, "Clearcut: Incorrect number of values in the distance matrix.\n");
- goto XIT_BAD;
- }
-
- /* special check to make sure first value read is 0.0 */
- if(dmat_type == NJ_PARSE_SYMMETRIC) {
- if(!NJ_FLT_EQ(dmat->val[NJ_MAP(0, 0, dmat->size)], 0.0)) {
- fprintf(stderr, "Clearcut: Values along the diagonal in a symmetric matrix must be zero.\n");
- goto XIT_BAD;
- }
- }
-
-
- /* now lets allocate space for the r and r2 columns */
- dmat->r = (float *)calloc(dmat->ntaxa, sizeof(float));
- dmat->r2 = (float *)calloc(dmat->ntaxa, sizeof(float));
- if(!dmat->r || !dmat->r2) {
- fprintf(stderr, "Clearcut: Memory allocation error in NJ_parse_distance_matrix()\n");
- goto XIT_BAD;
- }
-
- /* track some memory addresses */
- dmat->rhandle = dmat->r;
- dmat->r2handle = dmat->r2;
- dmat->valhandle = dmat->val;
-
- /* close matrix file here */
- if(!nj_args->stdin_flag) {
- fclose(fp);
- }
-
- if(token) {
- if(token->buf) {
- free(token->buf);
- }
- free(token);
- }
-
- return(dmat);
-
-
-
- /* clean up our partial progress */
- XIT_BAD:
-
- if(fp) {
- fprintf(stderr, "Clearcut: Syntax error in distance matrix at offset %ld.\n", ftell(fp));
- }
-
- /* close matrix file here */
- if(!nj_args->stdin_flag) {
- if(fp) {
- fclose(fp);
- }
- }
-
- /* if we have a valid dmat (partial or complete), we need to free it */
- if(dmat) {
- NJ_free_dmat(dmat);
- }
-
- if(token) {
- if(token->buf) {
- free(token->buf);
- }
- free(token);
- }
-
- return(NULL);
-}
-
-
-
-
-
-
-
-/*
- * NJ_output_matrix() - Output a distance matrix to the specified file
- *
- *
- * INPUTS:
- * -------
- * nj_args -- a pointer to a data structure holding the command-line args
- * dmat -- a pointer to a distance matrix
- *
- *
- * RETURNS:
- * --------
- * NOTHING
- *
- *
- * DESCRIPTION:
- * ------------
- * If the appropriate flag was specified in the command-line, this function
- * now outputs the parsed or computed distance matrix to a file. This
- * can be useful if generating a distance matrix was the primary goal of
- * running the program, or if one wanted to debug and/or verify the
- * correctness of the program.
- *
- * Currently this function outputs full/symmetric matrices only.
- *
- */
-void
-NJ_output_matrix(NJ_ARGS *nj_args,
- DMAT *dmat) {
-
- FILE *fp = NULL;
- long int i, j;
-
-
-
- /* if we haven't specieid matrixout, return immediately */
- if(!nj_args->matrixout) {
- return;
- }
-
- /* open the specified matrix file for writing */
- fp = fopen(nj_args->matrixout, "w");
- if(!fp) {
- fprintf(stderr, "Clearcut: Could not open matrix file %s for output.\n", nj_args->matrixout);
- return;
- }
-
- /* output the number of taxa in the matrix */
- fprintf(fp, " %ld\n", dmat->size);
-
- fprintf(fp, "%s\n", dmat->taxaname[0]); // print the first taxon name outside of the main loop
-
- for(i=1;i<dmat->size;i++) {
-
- /* output taxaname */
- fprintf(fp, "%s\t", dmat->taxaname[i]);
-
- for(j=0;j<i;j++) {
- if(nj_args->expdist) { /* exponential notation (or not) */
- fprintf(fp, "%e ", dmat->val[NJ_MAP(j,i,dmat->size)]);
- } else {
- fprintf(fp, "%f ", dmat->val[NJ_MAP(j,i,dmat->size)]);
- }
- }
-
- fprintf(fp, "\n");
- }
-
-#ifdef FULL_SYMMETRIC_MATRIX
-
- /* output the number of taxa in the matrix */
- fprintf(fp, " %ld\n", dmat->size);
- for(i=0;i<dmat->size;i++) {
-
- /* output taxaname */
- fprintf(fp, "%s\t", dmat->taxaname[i]);
-
- for(j=0;j<dmat->size;j++) {
- if(i>j) {
- if(nj_args->expdist) { /* exponential notation (or not) */
- fprintf(fp, "%e ", dmat->val[NJ_MAP(j,i,dmat->size)]);
- } else {
- fprintf(fp, "%f ", dmat->val[NJ_MAP(j,i,dmat->size)]);
- }
- } else if(i<j) {
- if(nj_args->expdist) { /* exponential notation (or not) */
- fprintf(fp, "%e ", dmat->val[NJ_MAP(i,j,dmat->size)]);
- } else {
- fprintf(fp, "%f ", dmat->val[NJ_MAP(i,j,dmat->size)]);
- }
- } else {
- if(nj_args->expdist) { /* exponential notation (or not) */
- fprintf(fp, "%e ", 0.0);
- } else {
- fprintf(fp, "%f ", 0.0);
- }
- }
- }
-
- fprintf(fp, "\n");
- }
-
-#endif // FULL_SYMMETRIC_MATRIX
-
- /* close the file here */
- if(fp) {
- fclose(fp);
- }
-
- return;
-}
-
-
-
-
-
--- /dev/null
+/*
+ * dmat.c
+ *
+ * $Id$
+ *
+ *****************************************************************************
+ *
+ * Copyright (c) 2004, Luke Sheneman
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * + Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * + Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * + The names of its contributors may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************
+ *
+ * Distance matrix parser
+ *
+ *****************************************************************************
+ *
+ * AUTHOR:
+ *
+ * Luke Sheneman
+ * sheneman@cs.uidaho.edu
+ *
+ */
+
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <float.h>
+#include <errno.h>
+#include <string.h>
+
+
+
+#include "common.h"
+#include "clearcut.h"
+
+#include "dmat.h"
+
+
+
+
+
+/*
+ *
+ * NJ_is_alpha() - determine if character is an alphabetic character
+ *
+ * INPUT:
+ * ------
+ * c -- character to test
+ *
+ * RETURN:
+ * -------
+ * int -- 1 if character is alphabetic (A-Z || a-z)
+ * 0 if character is NOT alphabetic
+ *
+ */
+static inline
+int
+NJ_is_alpha(char c) {
+
+ if( (c >= 'A' && c <= 'Z') ||
+ (c >= 'a' && c <= 'z') ) {
+ return(1);
+ } else {
+ return(0);
+ }
+}
+
+
+
+/*
+ *
+ * NJ_is_whitespace() - determine if character is a whitespace character
+ *
+ * INPUT:
+ * ------
+ * c -- character to test
+ *
+ * RETURN:
+ * -------
+ * int -- 1 if character is whitespace (space, tab, CR, LF)
+ * 0 if character is NOT whitespace
+ *
+ */
+static inline
+int
+NJ_is_whitespace(char c) {
+
+ if( c == ' ' || /* space */
+ c == '\n' || /* newline */
+ c == '\r' || /* carriage-return */
+ c == '\v' || /* vertical tab */
+ c == '\f' || /* form feed */
+ c == '\t' ) { /* horizontal tab */
+ return(1);
+ } else {
+ return(0);
+ }
+
+}
+
+
+
+
+/*
+ *
+ * NJ_is_number() - determine if character is a number
+ *
+ * INPUT:
+ * ------
+ * c -- character to test
+ *
+ * RETURN:
+ * -------
+ * int -- 1 if character is a number (0-9)
+ * 0 if character is NOT a number
+ *
+ */
+static inline
+int
+NJ_is_number(char c) {
+ if(c >= '0' && c <= '9') {
+ return(1);
+ } else {
+ return(0);
+ }
+}
+
+
+
+/*
+ * NJ_is_distance() - check if string is a properly formatted distance value
+ *
+ */
+static inline
+int
+NJ_is_distance(char *token) {
+
+ int i;
+ char c;
+ int exponent_state;
+ int expsign_state;
+ int dpoint_state;
+
+ /* if token is NULL return failure */
+ if(!token) {
+ return(0);
+ }
+
+ exponent_state = 0;
+ expsign_state = 0;
+ dpoint_state = 0;
+
+ /* The first character must be a number, a decimal point or a sign */
+ c = token[0];
+ if(!NJ_is_number(c) &&
+ c != '.' &&
+ c != '-' &&
+ c != '+' ) {
+
+ goto BAD;
+ }
+
+ /*
+ * if the first character is not a number, and string is only one
+ * character long, then we return failure.
+ */
+ if(strlen(token) == 1) {
+ if(!NJ_is_number(c)) {
+ goto BAD;
+ }
+ }
+
+ for(i=0;i<strlen(token);i++) {
+
+ c = token[i];
+
+ /* make sure that all chars in dist string are in list of valid chars */
+ if(!NJ_is_number(c) &&
+ c != '.' &&
+ c != '-' &&
+ c != '+' &&
+ c != 'e' &&
+ c != 'E' ) {
+
+ goto BAD;
+ }
+
+ /* not the first char and we are not in exponent state but read (+,-) */
+ if(i>0 && !exponent_state) {
+ if(c == '-' ||
+ c == '+') {
+ goto BAD;
+ }
+ }
+
+ /* if we are in the exponent state, and we've already seen a sign */
+ if(exponent_state && expsign_state) {
+ if(c == '-' ||
+ c == '+') {
+ goto BAD;
+ }
+ }
+
+ /* if we are in the exponent state and we see a decimal point */
+ if(exponent_state) {
+ if(c == '.') {
+ goto BAD;
+ }
+ }
+
+ /* if we are in the exponent state and see another e or E */
+ if(exponent_state) {
+ if(c == 'e' ||
+ c == 'E') {
+ goto BAD;
+ }
+ }
+
+ /* if we are dpoint_state and see another decimal point */
+ if(dpoint_state) {
+ if(c == '.') {
+ goto BAD;
+ }
+ }
+
+
+ /* enter the exponent state if we need to */
+ if(!exponent_state) {
+ if(c == 'e' ||
+ c == 'E') {
+ exponent_state = 1;
+ }
+ }
+
+ /* enter the expsign_state if we need to */
+ if(exponent_state && !expsign_state) {
+ if(c == '-' ||
+ c == '+') {
+ expsign_state = 1;
+ }
+ }
+
+ /* if not in dpoint state and we see a dpoint */
+ if(!dpoint_state) {
+ if(c == '.') {
+ dpoint_state = 1;
+ }
+ }
+
+ }
+
+ /* the token must end in a number char */
+ if(!NJ_is_number(token[strlen(token)-1])) {
+ goto BAD;
+ }
+
+ /* token is a valid numerical distance */
+ return(1);
+
+ BAD:
+
+ /* token is invalid distance format */
+ return(0);
+}
+
+
+
+
+/*
+ * NJ_is_label() -
+ *
+ * Simply, if token is not a valid number, then it is a name
+ *
+ */
+static inline
+int
+NJ_is_label(char *token) {
+ if(NJ_is_distance(token)) {
+ return(0);
+ } else {
+ return(1);
+ }
+}
+
+
+
+/*
+ * NJ_get_token() - get a token from an input stream
+ *
+ */
+static inline
+int
+NJ_get_token(FILE *fp,
+ NJ_DIST_TOKEN *token) {
+
+ char c;
+ int index;
+
+ c = fgetc(fp);
+ if(feof(fp)) {
+ token->type = NJ_EOF_STATE;
+ return(token->type);
+ }
+
+ if(NJ_is_whitespace(c)) {
+ token->buf[0] = c;
+ token->buf[1] = '\0';
+ token->type = NJ_WS_STATE;
+
+ return NJ_WS_STATE;
+ }
+
+ index = 0;
+ while(!NJ_is_whitespace(c)) {
+
+ /* reallocate our buffer if necessary */
+ if(index >= token->bufsize) {
+ token->bufsize *= 2;
+ token->buf = (char *)realloc(token->buf, token->bufsize*sizeof(char));
+ if(!token->buf) {
+ fprintf(stderr, "Clearcut: Memory allocation error in NJ_get_token()\n");
+ exit(-1);
+ }
+ }
+
+ token->buf[index++] = c;
+
+ c = fgetc(fp);
+ if(feof(fp)) {
+ token->type = NJ_EOF_STATE;
+ break;
+ }
+ }
+
+ token->buf[index] = '\0';
+
+ if(token->type != NJ_EOF_STATE) {
+
+ if(NJ_is_distance(token->buf)) {
+ token->type = NJ_FLOAT_STATE;
+ } else {
+ token->type = NJ_NAME_STATE;
+ }
+
+ }
+
+ return(token->type);
+}
+
+
+
+
+
+
+/*
+ * NJ_parse_distance_matrix() -- Takes a filename and returns a distance matrix
+ *
+ *
+ * INPUT:
+ * ------
+ * nj_args -- a pointer to a structure containing the command-line arguments
+ *
+ * OUTPUT:
+ * -------
+ * <DMAT *> -- NULL (failure)
+ * -- A pointer to a populated distance matrix
+ *
+ * DESCRIPTION:
+ * ------------
+ * This function implements a simple state machine to parse a distance matrix
+ * in approximate PHYLIP format. This function auto-detects whether the
+ * distance matrix is in upper, lower, or fully-symmetric format and handles
+ * it accordingly. For full/symmetric matrices, values must be symmetric
+ * around the diagonal, which is required to be zeroes. Names and values must
+ * be separated by whitespace (space, tab, newlines, etc.). Taxon labels can
+ * include numbers, but must start with non-numerical symbols.
+ *
+ *
+ * *** UPPER FORMAT EXAMPLE ***
+ *
+ * 4
+ * seq1 0.2 0.3 0.1
+ * seq2 0.2 0.3
+ * seq3 0.1
+ * seq4
+ *
+ * *** LOWER FORMAT EXAMPLE ***
+ *
+ * 4
+ * seq1
+ * seq2 0.3
+ * seq3 0.2 0.4
+ * seq4 0.3 0.1 0.3
+ *
+ * *** SYMMETRIC (FULL) EXAMPLE ***
+ *
+ * 4
+ * seq1 0.0 0.3 0.5 0.3
+ * seq2 0.3 0.0 0.1 0.2
+ * seq3 0.5 0.1 0.0 0.9
+ * seq4 0.3 0.2 0.9 0.0
+ *
+ * Values in the distance matrix can be positive or negative, integers or
+ * real values. Values can also be parsed in exponential notation form.
+ *
+ */
+DMAT *
+NJ_parse_distance_matrix(NJ_ARGS *nj_args) {
+
+ DMAT *dmat = NULL;
+ FILE *fp = NULL;
+ NJ_DIST_TOKEN *token = NULL;
+
+ int state, dmat_type;
+ int row;
+ int fltcnt;
+ int x, y, i;
+ int numvalread;
+ int expectedvalues = -1;
+ float val;
+ int first_state = 0;
+
+
+ /* allocate our distance matrix and token structure */
+ dmat = (DMAT *)calloc(1, sizeof(DMAT));
+ token = (NJ_DIST_TOKEN *)calloc(1, sizeof(NJ_DIST_TOKEN));
+ if(token) {
+ token->bufsize = NJ_INITIAL_BUFSIZE;
+ token->buf = (char *)calloc(token->bufsize, sizeof(char));
+ }
+ if(!dmat || !token || !token->buf) {
+ fprintf(stderr, "Clearcut: Memory allocation error in NJ_parse_distance_matrix()\n");
+ goto XIT_BAD;
+ }
+
+ /* open distance matrix file here */
+ if(nj_args->stdin_flag) {
+ fp = stdin;
+ } else {
+ fp = fopen(nj_args->infilename, "r");
+ if(!fp) {
+ fprintf(stderr, "Clearcut: Could not open distance matrix: %s\n", nj_args->infilename);
+ perror("Clearcut");
+ goto XIT_BAD;
+ }
+ }
+
+ /* get the number of taxa in this file */
+ fscanf(fp, "%ld\n", &dmat->ntaxa);
+ if(dmat->ntaxa < 2) {
+ fprintf(stderr, "Clearcut: Invalid number of taxa in distance matrix\n");
+
+ goto XIT_BAD;
+ }
+
+ /* set our initial working size according to the # of taxa */
+ dmat->size = dmat->ntaxa;
+
+ /* allocate space for the distance matrix values here */
+ dmat->val =
+ (float *)calloc(NJ_NCELLS(dmat->ntaxa), sizeof(float));
+ if(!dmat->val) {
+ fprintf(stderr, "Clearcut: Memory allocation error in NJ_parse_distance_matrix()\n");
+ goto XIT_BAD;
+ }
+
+ /* taxa names */
+ dmat->taxaname = (char **)calloc(dmat->ntaxa, sizeof(char *));
+ if(!dmat->taxaname) {
+ fprintf(stderr, "Clearcut: Memory allocation error in NJ_parse_distance_matrix()\n");
+ goto XIT_BAD;
+ }
+
+ /* set the initial state of our state machine */
+ dmat_type = NJ_PARSE_UNKNOWN;
+ row = -1;
+ fltcnt = 0;
+ numvalread = 0;
+
+
+ /* read the input one character at a time to drive simple state machine */
+ state = NJ_get_token(fp, token);
+ while(state != NJ_EOF_STATE) {
+
+ switch(state) {
+
+ case NJ_NAME_STATE:
+
+ if(first_state == 0) {
+ first_state = 1;
+ }
+
+ row++;
+
+ if(row > 0 && dmat_type == NJ_PARSE_UNKNOWN) {
+
+ if(fltcnt == dmat->ntaxa) {
+
+ dmat_type = NJ_PARSE_SYMMETRIC;
+ expectedvalues = dmat->ntaxa * dmat->ntaxa;
+
+ } else if (fltcnt == dmat->ntaxa-1) {
+
+ dmat_type = NJ_PARSE_UPPER;
+ expectedvalues = ((dmat->ntaxa) * (dmat->ntaxa-1)) / 2;
+
+ /* shift everything in first row by one char */
+ for(i=dmat->ntaxa-2;i>=0;i--) {
+ dmat->val[i+1] = dmat->val[i];
+ }
+
+ } else if (fltcnt == 0) {
+
+ dmat_type = NJ_PARSE_LOWER;
+ expectedvalues = ((dmat->ntaxa) * (dmat->ntaxa-1)) / 2;
+
+ } else {
+ goto XIT_BAD;
+ }
+ }
+
+ if(row >= dmat->ntaxa) {
+ goto XIT_BAD;
+ }
+
+ /* allocate space for this taxon label */
+ dmat->taxaname[row] = (char *)calloc(strlen(token->buf)+1, sizeof(char));
+ if(!dmat->taxaname[row]) {
+ fprintf(stderr, "Clearcut: Memory allocation error in NJ_parse_distance_matrix()\n");
+ goto XIT_BAD;
+ }
+
+ strcpy(dmat->taxaname[row], token->buf);
+
+ fltcnt = 0;
+
+ break;
+
+
+ case NJ_FLOAT_STATE:
+
+ if(first_state == 0) {
+ goto XIT_BAD;
+ }
+
+ val = atof(token->buf);
+ if(errno) {
+ fprintf(stderr, "Clearcut: Distance value out-of-range.\n");
+ goto XIT_BAD;
+ }
+
+ x = row;
+ y = fltcnt;
+
+ switch(dmat_type) {
+
+ case NJ_PARSE_UNKNOWN:
+
+ dmat->val[NJ_MAP(x, y, dmat->size)] = val;
+
+ break;
+
+ case NJ_PARSE_SYMMETRIC:
+
+ if(fltcnt >= dmat->ntaxa) {
+ fprintf(stderr, "Clearcut: Incorrect number of distance values on row.\n");
+ goto XIT_BAD;
+ }
+
+ if(x < y) {
+ dmat->val[NJ_MAP(x, y, dmat->size)] = val;
+ } else if(x > y) {
+ if(!NJ_FLT_EQ(val, dmat->val[NJ_MAP(y, x, dmat->size)])) {
+ fprintf(stderr, "Clearcut: Full matrices must be symmetric.\n");
+ goto XIT_BAD;
+ }
+ } else {
+ if(!NJ_FLT_EQ(val, 0.0)) {
+ fprintf(stderr, "Clearcut: Values along the diagonal in a symmetric matrix must be zero.\n");
+ goto XIT_BAD;
+
+ }
+ }
+
+ break;
+
+ case NJ_PARSE_UPPER:
+
+ if(fltcnt > dmat->ntaxa-row) {
+ fprintf(stderr, "Clearcut: Incorrect number of distance values on row.\n");
+ goto XIT_BAD;
+ }
+
+ dmat->val[NJ_MAP(x, x+y+1, dmat->size)] = val;
+
+ break;
+
+ case NJ_PARSE_LOWER:
+
+ if(fltcnt > row-1) {
+ fprintf(stderr, "Clearcut: Incorrect number of distance values on row.\n");
+ goto XIT_BAD;
+ }
+
+ dmat->val[NJ_MAP(y, x, dmat->size)] = val;
+
+ break;
+
+ default:
+
+ goto XIT_BAD;
+
+ break;
+ }
+
+ fltcnt++;
+ numvalread++;
+
+ break;
+
+ case NJ_WS_STATE:
+
+ break;
+
+ case NJ_EOF_STATE:
+
+ if(first_state == 0) {
+ goto XIT_BAD;
+ }
+
+ break;
+
+ default:
+
+ fprintf(stderr, "Clearcut: Unknown state in distance matrix parser.\n");
+ break;
+
+ }
+
+ /* get next token from stream */
+ state = NJ_get_token(fp, token);
+ }
+
+
+ /*
+ * At the end, if we have not read the number of values that we predicted
+ * we would need, then there was a problem and we need to punt.
+ */
+ if(numvalread != expectedvalues) {
+ fprintf(stderr, "Clearcut: Incorrect number of values in the distance matrix.\n");
+ goto XIT_BAD;
+ }
+
+ /* special check to make sure first value read is 0.0 */
+ if(dmat_type == NJ_PARSE_SYMMETRIC) {
+ if(!NJ_FLT_EQ(dmat->val[NJ_MAP(0, 0, dmat->size)], 0.0)) {
+ fprintf(stderr, "Clearcut: Values along the diagonal in a symmetric matrix must be zero.\n");
+ goto XIT_BAD;
+ }
+ }
+
+
+ /* now lets allocate space for the r and r2 columns */
+ dmat->r = (float *)calloc(dmat->ntaxa, sizeof(float));
+ dmat->r2 = (float *)calloc(dmat->ntaxa, sizeof(float));
+ if(!dmat->r || !dmat->r2) {
+ fprintf(stderr, "Clearcut: Memory allocation error in NJ_parse_distance_matrix()\n");
+ goto XIT_BAD;
+ }
+
+ /* track some memory addresses */
+ dmat->rhandle = dmat->r;
+ dmat->r2handle = dmat->r2;
+ dmat->valhandle = dmat->val;
+
+ /* close matrix file here */
+ if(!nj_args->stdin_flag) {
+ fclose(fp);
+ }
+
+ if(token) {
+ if(token->buf) {
+ free(token->buf);
+ }
+ free(token);
+ }
+
+ return(dmat);
+
+
+
+ /* clean up our partial progress */
+ XIT_BAD:
+
+ if(fp) {
+ fprintf(stderr, "Clearcut: Syntax error in distance matrix at offset %ld.\n", ftell(fp));
+ }
+
+ /* close matrix file here */
+ if(!nj_args->stdin_flag) {
+ if(fp) {
+ fclose(fp);
+ }
+ }
+
+ /* if we have a valid dmat (partial or complete), we need to free it */
+ if(dmat) {
+ NJ_free_dmat(dmat);
+ }
+
+ if(token) {
+ if(token->buf) {
+ free(token->buf);
+ }
+ free(token);
+ }
+
+ return(NULL);
+}
+
+
+
+
+
+
+
+/*
+ * NJ_output_matrix() - Output a distance matrix to the specified file
+ *
+ *
+ * INPUTS:
+ * -------
+ * nj_args -- a pointer to a data structure holding the command-line args
+ * dmat -- a pointer to a distance matrix
+ *
+ *
+ * RETURNS:
+ * --------
+ * NOTHING
+ *
+ *
+ * DESCRIPTION:
+ * ------------
+ * If the appropriate flag was specified in the command-line, this function
+ * now outputs the parsed or computed distance matrix to a file. This
+ * can be useful if generating a distance matrix was the primary goal of
+ * running the program, or if one wanted to debug and/or verify the
+ * correctness of the program.
+ *
+ * Currently this function outputs full/symmetric matrices only.
+ *
+ */
+void
+NJ_output_matrix(NJ_ARGS *nj_args,
+ DMAT *dmat) {
+
+ FILE *fp = NULL;
+ long int i, j;
+
+
+
+ /* if we haven't specieid matrixout, return immediately */
+ if(!nj_args->matrixout) {
+ return;
+ }
+
+ /* open the specified matrix file for writing */
+ fp = fopen(nj_args->matrixout, "w");
+ if(!fp) {
+ fprintf(stderr, "Clearcut: Could not open matrix file %s for output.\n", nj_args->matrixout);
+ return;
+ }
+
+ /* output the number of taxa in the matrix */
+ fprintf(fp, " %ld\n", dmat->size);
+
+ fprintf(fp, "%s\n", dmat->taxaname[0]); // print the first taxon name outside of the main loop
+
+ for(i=1;i<dmat->size;i++) {
+
+ /* output taxaname */
+ fprintf(fp, "%s\t", dmat->taxaname[i]);
+
+ for(j=0;j<i;j++) {
+ if(nj_args->expdist) { /* exponential notation (or not) */
+ fprintf(fp, "%e ", dmat->val[NJ_MAP(j,i,dmat->size)]);
+ } else {
+ fprintf(fp, "%f ", dmat->val[NJ_MAP(j,i,dmat->size)]);
+ }
+ }
+
+ fprintf(fp, "\n");
+ }
+
+#ifdef FULL_SYMMETRIC_MATRIX
+
+ /* output the number of taxa in the matrix */
+ fprintf(fp, " %ld\n", dmat->size);
+ for(i=0;i<dmat->size;i++) {
+
+ /* output taxaname */
+ fprintf(fp, "%s\t", dmat->taxaname[i]);
+
+ for(j=0;j<dmat->size;j++) {
+ if(i>j) {
+ if(nj_args->expdist) { /* exponential notation (or not) */
+ fprintf(fp, "%e ", dmat->val[NJ_MAP(j,i,dmat->size)]);
+ } else {
+ fprintf(fp, "%f ", dmat->val[NJ_MAP(j,i,dmat->size)]);
+ }
+ } else if(i<j) {
+ if(nj_args->expdist) { /* exponential notation (or not) */
+ fprintf(fp, "%e ", dmat->val[NJ_MAP(i,j,dmat->size)]);
+ } else {
+ fprintf(fp, "%f ", dmat->val[NJ_MAP(i,j,dmat->size)]);
+ }
+ } else {
+ if(nj_args->expdist) { /* exponential notation (or not) */
+ fprintf(fp, "%e ", 0.0);
+ } else {
+ fprintf(fp, "%f ", 0.0);
+ }
+ }
+ }
+
+ fprintf(fp, "\n");
+ }
+
+#endif // FULL_SYMMETRIC_MATRIX
+
+ /* close the file here */
+ if(fp) {
+ fclose(fp);
+ }
+
+ return;
+}
+
+
+
+
+
+++ /dev/null
-/*
- * fasta.c
- *
- * $Id$
- *
- *****************************************************************************
- *
- * Copyright (c) 2004, Luke Sheneman
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * + Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * + Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * + The names of its contributors may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- *
- *****************************************************************************
- *
- * Functions for parsing FASTA formatted alignment files
- *
- *****************************************************************************
- *
- * AUTHOR:
- *
- * Luke Sheneman
- * sheneman@cs.uidaho.edu
- *
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-
-#include "clearcut.h"
-#include "common.h"
-#include "fasta.h"
-
-
-#define NJ_NUM_DNA_AMBIGUITY_SYMS 14
-static const char NJ_dna_ambiguity_syms[NJ_NUM_DNA_AMBIGUITY_SYMS] =
-{
- 'M', 'R', 'W', 'S', 'Y', 'K',
- 'V', 'H', 'D', 'B', 'X', 'N',
- '-', '.'
-};
-
-
-#define NJ_NUM_PROTEIN_AMBIGUITY_SYMS 6
-static const char NJ_protein_ambiguity_syms[NJ_NUM_PROTEIN_AMBIGUITY_SYMS] =
-{
- 'X', 'B', 'Z', '*', '-', '.'
-};
-
-#define NJ_NUM_DNA_SYMS 5
-static const char NJ_dna_syms[NJ_NUM_DNA_SYMS] =
-{
- 'A', 'G', 'C', 'T', 'U'
-};
-
-
-#define NJ_NUM_PROTEIN_SYMS 20
-static const char NJ_protein_syms[NJ_NUM_PROTEIN_SYMS] =
-{
- 'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I',
- 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'
-};
-
-
-
-
-
-/*
- * NJ_is_whitespace() - Check to see if character is whitespace
- *
- * INPUTS:
- * -------
- * c -- character to check
- *
- * RETURNS:
- * --------
- * int -- 0 if not whitespace
- * 1 if whitespace
- */
-static inline
-int
-NJ_is_whitespace(char c) {
- if( c == ' ' || /* space */
- c == '\n' || /* newline */
- c == '\r' || /* carriage-return */
- c == '\v' || /* vertical tab */
- c == '\f' || /* form feed */
- c == '\t' ) { /* horizontal tab */
- return(1);
- } else {
- return(0);
- }
-}
-
-
-
-
-
-/*
- * NJ_is_dna() -
- *
- * Determines if the given symbol is DNA
- *
- * RETURNS: 1 if DNA
- * 0 if not DNA
- *
- */
-static inline
-int
-NJ_is_dna(char c) {
-
- int i;
- char up_c;
-
- up_c = toupper(c);
-
- for(i=0;i<NJ_NUM_DNA_SYMS;i++) {
- if(up_c == NJ_dna_syms[i]) {
- return(1);
- }
- }
-
- return(0);
-}
-
-
-
-
-
-/*
- * NJ_is_dna_ambiguity() -
- *
- * Determines if the given symbol is a
- * DNA ambiguity code
- *
- * RETURNS: 1 if DNA Ambiguity Code
- * 0 if not DNA Ambiguity Code
- *
- */
-static inline
-int
-NJ_is_dna_ambiguity(char c) {
-
- int i;
- char up_c;
-
- up_c = toupper(c);
-
- for(i=0;i<NJ_NUM_DNA_AMBIGUITY_SYMS;i++) {
- if(up_c == NJ_dna_ambiguity_syms[i]) {
- return(1);
- }
- }
-
- return(0);
-}
-
-
-
-/*
- * NJ_is_protein() -
- *
- * Determines if supplied symbol is amino acid symbol
- *
- * RETURNS: 1 if amino acid
- * 0 if not amino acid
- *
- */
-static inline
-int
-NJ_is_protein(char c) {
-
- int i;
- char up_c;
-
- up_c = toupper(c);
-
- for(i=0;i<NJ_NUM_PROTEIN_SYMS;i++) {
- if(up_c == NJ_protein_syms[i]) {
- return(1);
- }
- }
-
- return(0);
-}
-
-
-
-
-/*
- * NJ_is_protein_ambiguity() -
- *
- * Determines if supplied symbol is amino acid ambiguity symbol
- *
- * RETURNS: 1 if amino acid ambiguity symbol
- * 0 if not amino acid ambiguity symbol
- *
- */
-static inline
-int
-NJ_is_protein_ambiguity(char c) {
-
- int i;
- char up_c;
-
- up_c = toupper(c);
-
- for(i=0;i<NJ_NUM_PROTEIN_AMBIGUITY_SYMS;i++) {
- if(up_c == NJ_protein_ambiguity_syms[i]) {
- return(1);
- }
- }
-
- return(0);
-}
-
-
-
-
-
-
-/*
- * NJ_read_fasta() - A function for inputing FASTA sequences into an alignment
- * data structure
- *
- *
- * INPUTS:
- * -------
- * nj_args -- A pointer to a data structure containing command-line args
- *
- * RETURNS:
- * --------
- * NJ_alignment * -- A pointer to a multiple sequence alignment
- *
- * DESCRIPTION:
- * ------------
- *
- *
- * This function implements a state machine parser for parsing FASTA-formatted
- * multiple sequence alignment files.
- *
- * Example Input:
- *
- * > sequence1
- * ATAGATATAGATTAGAATAT----TATAGATAT----ATATAT-TTT-
- * > sequence2
- * --ATAGATA---ATATATATATTTT--GTCTCATAGT---ATATGCTT
- * > sequence3
- * TTATAGATA---ATATATATATTTTAAGTCTCATAGT-A-ATATGC--
- *
- * This function will parse alignments for DNA or protein, and will do
- * so mindful of ambiguity codes for these kinds of sequences. All
- * ambiguity codes are ignored by this program for the purposes of
- * computing a distance matrix from a multiple alignment. By design,
- * this program does not auto-detect DNA vs. Protein, and requires that
- * the user explictly specify that on the command-line.
- *
- * Gaps can be represented either with the '-' or '.' characters.
- *
- * Newlines and other whitespace are allowed to be interspersed
- * throughout the sequences.
- *
- * Taxon labels are required to be unique, and they must start with
- * an alphabetic character (not a number, etc.). The parser will read
- * the first token after the > character in the description line up until the
- * first whitespace and use that for the taxon label.
- *
- * For example, in the line "> taxon1 is homo sapien", the taxon label will be
- * "taxon1"
- *
- */
-NJ_alignment *
-NJ_read_fasta(NJ_ARGS *nj_args) {
-
- FILE *fp = NULL;
- char *buf = NULL;
- char *ptr = NULL;
- NJ_alignment *alignment = NULL;
-
- char c;
- int state;
- long int index, x, seq;
- long int i;
- long int bufsize, nseqs = NJ_INITIAL_NSEQS;
- int first_sequence_flag;
-
-
-
-
- /*
- * In this function, we implement a FASTA alignment parser which
- * reads in an alignment character-by-character, maintaining state
- * information which guides the parser.
- *
- * The program reads either DNA or Protein alignments. All title lines
- * and sequences can be arbitrarily long. Gaps can be represented by
- * "-" or "." characters.
- *
- * Ambiguity codes are also handled.
- *
- */
-
- /*
- * We can't handle reading fasta input unless the user explicity
- * specifies the input type...just to be sure.
- */
- if( (!nj_args->dna_flag && !nj_args->protein_flag) ||
- (nj_args->dna_flag && nj_args->protein_flag) ) {
- fprintf(stderr, "Clearcut: Explicitly specify protein or DNA\n");
- goto XIT_BAD;
- }
-
- /* open specified fasta file here */
- if(nj_args->stdin_flag) {
- fp = stdin;
- } else {
- fp = fopen(nj_args->infilename, "r");
- if(!fp) {
- fprintf(stderr, "Clearcut: Failed to open input FASTA file: %s\n", nj_args->infilename);
- perror("Clearcut");
- goto XIT_BAD;
- }
- }
-
- /* allocate the initial buffer */
- bufsize = NJ_INITIAL_BUFFER_SIZE;
- buf = (char *)calloc(bufsize, sizeof(char));
-
- /* allocate the alignment container here */
- alignment = (NJ_alignment *)calloc(1, sizeof(NJ_alignment));
-
- /* allocate initial title array */
- // printf("allocating initial title array\n");
- alignment->titles = (char **)calloc(NJ_INITIAL_NSEQS, sizeof(char *));
-
- /* make sure that we successfully allocated memory */
- if(!buf || !alignment || !alignment->titles) {
- fprintf(stderr, "Clearcut: Memory allocation error in NJ_read_fasta()\n");
- goto XIT_BAD;
- }
-
- /* a flag */
- first_sequence_flag = 1;
-
- index = 0; /* tracks the position in buffer */
- x = 0; /* tracks the position on sequence */
- seq = 0; /* tracks the active sequence */
-
- /* intitial state of state machine */
- state = NJ_FASTA_MODE_UNKNOWN;
-
- while(1) {
-
- /* get the next character */
- c = fgetc(fp);
- if(feof(fp)) {
-
- if(state == NJ_FASTA_MODE_SEQUENCE) {
- buf[index+1] = '\0';
-
- /* copy buf to alignment */
- for(i=1;i<=alignment->length;i++) {
- alignment->data[seq*alignment->length+i-1] = buf[i];
- }
- }
-
- break;
- }
-
- /* make sure our dynamic buffer is big enough */
- if(index >= bufsize) {
- bufsize *= 2;
- buf = (char *)realloc(buf, bufsize);
- if(!buf) {
- fprintf(stderr, "Clearcut: Memory allocation error in NJ_read_fasta()\n");
- goto XIT_BAD;
- }
- }
-
- switch(state) {
-
- case NJ_FASTA_MODE_UNKNOWN:
-
- if(!NJ_is_whitespace(c)) {
- if(c == '>') {
- state = NJ_FASTA_MODE_TITLE;
- buf[0] = '>';
- } else {
- goto XIT_BAD;
- }
- }
-
- break;
-
- case NJ_FASTA_MODE_TITLE:
-
- if( c == '\n' ||
- c == '\r' ) {
-
- buf[index] = '\0';
- state = NJ_FASTA_MODE_SEQUENCE;
- index = 0;
- x = -1;
-
- /* make sure we've allocated enough space for titles and sequences */
- if(seq == nseqs) {
-
- // printf("realloc(). seq = %d, nseqs = %d\n", seq, nseqs);
-
- nseqs *= 2;
-
- alignment->titles = (char **)realloc(alignment->titles, nseqs*sizeof(char *));
- if(!alignment->titles) {
- fprintf(stderr, "Clearcut: Memory allocation error in NJ_read_fasta()\n");
- goto XIT_BAD;
- }
-
- alignment->data = (char *)realloc(alignment->data, alignment->length*nseqs*sizeof(char));
- if(!alignment->data) {
- fprintf(stderr, "Clearcut: Allocation error in NJ_read_fasta()\n");
- goto XIT_BAD;
- }
- }
-
- // printf("Allocating %d bytes for title %d: %s\n", (int)strlen(buf), (int)seq, buf);
-
- alignment->titles[seq] = (char *)calloc(strlen(buf), sizeof(char));
- if(!alignment->titles[seq]) {
- fprintf(stderr, "Clearcut: Memory allocation error in NJ_read_fasta()\n");
- goto XIT_BAD;
- }
-
- /* lets forward to the first non-space (space/tab) character after the '>' */
-
- if(first_sequence_flag) {
- ptr = buf;
- } else {
- ptr = &buf[1];
- }
- while(*ptr == '\t' || *ptr == ' ') {
- ptr++;
- }
- sscanf(ptr, "%s", alignment->titles[seq]); /* get the first word and use as the title */
-
- alignment->nseq++;
- }
-
- buf[index++] = c;
-
- break;
-
-
- case NJ_FASTA_MODE_SEQUENCE:
-
- if(c == '>') {
-
- if(first_sequence_flag) {
- first_sequence_flag = 0;
-
- /* allocate our alignment data section here */
- alignment->length = index-1;
-
- nseqs = NJ_INITIAL_NSEQS;
- alignment->data = (char *)calloc(alignment->length*nseqs, sizeof(char));
- if(!alignment->data) {
- fprintf(stderr, "Clearcut: Allocation error in NJ_read_fasta()\n");
- goto XIT_BAD;
- }
- }
-
- if(!first_sequence_flag) {
- if(index-1 < alignment->length) {
- fprintf(stderr, "Clearcut: Sequences must be of uniform length in alignment at sequence %ld\n", seq);
- goto XIT_BAD;
- }
- }
-
- /* re-allocate if necessary */
- /*
- if(seq >= nseqs) {
- nseqs *= 2;
- alignment->data = (char *)realloc(alignment->data, alignment->length*nseqs*sizeof(char));
- if(!alignment->data) {
- fprintf(stderr, "Clearcut: Allocation error in NJ_read_fasta()\n");
- goto XIT_BAD;
- }
- }
- */
-
- /* copy buf to alignment */
- for(i=1;i<=alignment->length;i++) {
- alignment->data[seq*alignment->length+i-1] = buf[i];
- }
-
- state = NJ_FASTA_MODE_TITLE;
- index = 1;
- x = 1;
-
- buf[0] = c;
-
- seq++;
-
- } else {
-
- if(NJ_is_whitespace(c)) {
- break;
- }
-
- if(!first_sequence_flag) {
- if(index-1 >= alignment->length) {
- fprintf(stderr, "Clearcut: Sequences must be of uniform length in alignment at sequence %ld\n", seq);
- goto XIT_BAD;
- }
- }
-
-
- /*
- * Here we check to make sure that the symbol read is appropriate
- * for the type of data the user specified. (dna or protein).
- * We also handle ambiguity codes by converting them to a specific
- * assigned ambiguity code character. Ambiguity codes are ignored
- * when computing distances
- */
-
- if(nj_args->dna_flag) {
- if(NJ_is_dna(c)) {
- buf[index++] = toupper(c);
- } else {
- if(NJ_is_dna_ambiguity(c)) {
- buf[index++] = NJ_AMBIGUITY_CHAR;
- } else {
- fprintf(stderr, "Clearcut: Unknown symbol '%c' in nucleotide sequence %ld.\n", c, seq);
- goto XIT_BAD;
- }
- }
- } else if(nj_args->protein_flag) {
- if(NJ_is_protein(c)) {
- buf[index++] = toupper(c);
- } else {
- if(NJ_is_protein_ambiguity(c)) {
- buf[index++] = NJ_AMBIGUITY_CHAR;
- } else {
- fprintf(stderr, "Clearcut: Unknown symbol '%c' in protein sequence %ld.\n", c, seq);
- goto XIT_BAD;
- }
- }
- }
-
- }
-
- break;
-
- default:
- goto XIT_BAD;
-
- break;
-
- }
- }
-
- if(index-1 != alignment->length) {
- fprintf(stderr, "Clearcut: Sequences must be of uniform length in alignment at sequence %ld\n", seq);
- goto XIT_BAD;
- }
-
- /* check for duplicate taxon labels */
- if(!NJ_taxaname_unique(alignment)) {
- goto XIT_BAD;
- }
-
- return(alignment);
-
-
- XIT_BAD:
-
- if(fp) {
- fprintf(stderr, "Clearcut: Fatal error parsing FASTA file at file offset %ld.\n", ftell(fp));
- }
-
- if(buf) {
- free(buf);
- }
-
- NJ_free_alignment(alignment);
-
- return(NULL);
-}
-
-
-
-
-/*
- * NJ_print_alignment() - Print multiple sequence alignment (for debugging)
- *
- * INPUTS:
- * -------
- * alignment -- A pointer to the alignment
- *
- * RETURNS:
- * --------
- * NONE
- *
- */
-void
-NJ_print_alignment(NJ_alignment *alignment) {
-
- long int i, j;
-
- printf("nseq = %ld, length = %ld\n", alignment->nseq, alignment->length);
-
- for(i=0;i<alignment->nseq;i++) {
-
- printf("> %s\n", alignment->titles[i]);
-
- for(j=0;j<alignment->length;j++) {
- printf("%c", alignment->data[i*alignment->length+j]);
- }
-
- printf("\n");
- }
-
- return;
-}
-
-
-
-
-
-
-
-/*
- *
- * NJ_free_alignment() - Free all of the memory allocated for the
- * multiple sequence alignment
- *
- * INPUTS:
- * -------
- * alignment -- A pointer to the multiple sequence alignment
- *
- * RETURNS:
- * --------
- * NONE
- *
- */
-void
-NJ_free_alignment(NJ_alignment *alignment) {
-
- long int i;
-
- if(alignment) {
-
- /* free the allocated titles */
- if(alignment->titles) {
- for(i=0;i<alignment->nseq;i++) {
-
- if(alignment->titles[i]) {
- free(alignment->titles[i]);
- }
- }
-
- free(alignment->titles);
- }
-
- /* free the alignment data */
- if(alignment->data) {
- free(alignment->data);
- }
-
- /* free the alignment itself */
- free(alignment);
- }
-
- return;
-}
-
-
-
-
-/*
- * NJ_taxaname_unique() - Check to see if taxanames are unique in alignment
- *
- * INPUTS:
- * -------
- * alignment -- a pointer to a multiple sequence alignment
- *
- * OUTPUTS:
- * --------
- * int -- 0 if all taxanames in alignment are unique
- * 1 if all taxanames in alignment are NOT unique
- *
- *
- * DESCRIPTION:
- * ------------
- *
- * Check to see if the taxanames in the alignment are unique. It
- * will be impossible to make sense of the final tree if the taxon
- * labels are not unqiue.
- *
- */
-int
-NJ_taxaname_unique(NJ_alignment *alignment) {
-
- long int i, j;
-
- for(i=0;i<alignment->nseq;i++) {
- for(j=i+1;j<alignment->nseq;j++) {
- if(!strcmp(alignment->titles[i],
- alignment->titles[j])) {
- fprintf(stderr, "Clearcut: Taxa %ld and %ld (%s) do not have unique taxon labels.\n",
- i, j, alignment->titles[i]);
- return(0);
- }
- }
- }
-
- return(1);
-}
-
-
-void
-NJ_print_titles(NJ_alignment *alignment) {
-
- int i;
-
- for(i=0;i<alignment->nseq;i++) {
- printf("%d: %s\n", i, alignment->titles[i]);
- }
-
- return;
-}
--- /dev/null
+/*
+ * fasta.c
+ *
+ * $Id$
+ *
+ *****************************************************************************
+ *
+ * Copyright (c) 2004, Luke Sheneman
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * + Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * + Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * + The names of its contributors may not be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ *****************************************************************************
+ *
+ * Functions for parsing FASTA formatted alignment files
+ *
+ *****************************************************************************
+ *
+ * AUTHOR:
+ *
+ * Luke Sheneman
+ * sheneman@cs.uidaho.edu
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "clearcut.h"
+#include "common.h"
+#include "fasta.h"
+
+
+#define NJ_NUM_DNA_AMBIGUITY_SYMS 14
+static const char NJ_dna_ambiguity_syms[NJ_NUM_DNA_AMBIGUITY_SYMS] =
+{
+ 'M', 'R', 'W', 'S', 'Y', 'K',
+ 'V', 'H', 'D', 'B', 'X', 'N',
+ '-', '.'
+};
+
+
+#define NJ_NUM_PROTEIN_AMBIGUITY_SYMS 6
+static const char NJ_protein_ambiguity_syms[NJ_NUM_PROTEIN_AMBIGUITY_SYMS] =
+{
+ 'X', 'B', 'Z', '*', '-', '.'
+};
+
+#define NJ_NUM_DNA_SYMS 5
+static const char NJ_dna_syms[NJ_NUM_DNA_SYMS] =
+{
+ 'A', 'G', 'C', 'T', 'U'
+};
+
+
+#define NJ_NUM_PROTEIN_SYMS 20
+static const char NJ_protein_syms[NJ_NUM_PROTEIN_SYMS] =
+{
+ 'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I',
+ 'L', 'K', 'M', 'F', 'P', 'S', 'T', 'W', 'Y', 'V'
+};
+
+
+
+
+
+/*
+ * NJ_is_whitespace() - Check to see if character is whitespace
+ *
+ * INPUTS:
+ * -------
+ * c -- character to check
+ *
+ * RETURNS:
+ * --------
+ * int -- 0 if not whitespace
+ * 1 if whitespace
+ */
+static inline
+int
+NJ_is_whitespace(char c) {
+ if( c == ' ' || /* space */
+ c == '\n' || /* newline */
+ c == '\r' || /* carriage-return */
+ c == '\v' || /* vertical tab */
+ c == '\f' || /* form feed */
+ c == '\t' ) { /* horizontal tab */
+ return(1);
+ } else {
+ return(0);
+ }
+}
+
+
+
+
+
+/*
+ * NJ_is_dna() -
+ *
+ * Determines if the given symbol is DNA
+ *
+ * RETURNS: 1 if DNA
+ * 0 if not DNA
+ *
+ */
+static inline
+int
+NJ_is_dna(char c) {
+
+ int i;
+ char up_c;
+
+ up_c = toupper(c);
+
+ for(i=0;i<NJ_NUM_DNA_SYMS;i++) {
+ if(up_c == NJ_dna_syms[i]) {
+ return(1);
+ }
+ }
+
+ return(0);
+}
+
+
+
+
+
+/*
+ * NJ_is_dna_ambiguity() -
+ *
+ * Determines if the given symbol is a
+ * DNA ambiguity code
+ *
+ * RETURNS: 1 if DNA Ambiguity Code
+ * 0 if not DNA Ambiguity Code
+ *
+ */
+static inline
+int
+NJ_is_dna_ambiguity(char c) {
+
+ int i;
+ char up_c;
+
+ up_c = toupper(c);
+
+ for(i=0;i<NJ_NUM_DNA_AMBIGUITY_SYMS;i++) {
+ if(up_c == NJ_dna_ambiguity_syms[i]) {
+ return(1);
+ }
+ }
+
+ return(0);
+}
+
+
+
+/*
+ * NJ_is_protein() -
+ *
+ * Determines if supplied symbol is amino acid symbol
+ *
+ * RETURNS: 1 if amino acid
+ * 0 if not amino acid
+ *
+ */
+static inline
+int
+NJ_is_protein(char c) {
+
+ int i;
+ char up_c;
+
+ up_c = toupper(c);
+
+ for(i=0;i<NJ_NUM_PROTEIN_SYMS;i++) {
+ if(up_c == NJ_protein_syms[i]) {
+ return(1);
+ }
+ }
+
+ return(0);
+}
+
+
+
+
+/*
+ * NJ_is_protein_ambiguity() -
+ *
+ * Determines if supplied symbol is amino acid ambiguity symbol
+ *
+ * RETURNS: 1 if amino acid ambiguity symbol
+ * 0 if not amino acid ambiguity symbol
+ *
+ */
+static inline
+int
+NJ_is_protein_ambiguity(char c) {
+
+ int i;
+ char up_c;
+
+ up_c = toupper(c);
+
+ for(i=0;i<NJ_NUM_PROTEIN_AMBIGUITY_SYMS;i++) {
+ if(up_c == NJ_protein_ambiguity_syms[i]) {
+ return(1);
+ }
+ }
+
+ return(0);
+}
+
+
+
+
+
+
+/*
+ * NJ_read_fasta() - A function for inputing FASTA sequences into an alignment
+ * data structure
+ *
+ *
+ * INPUTS:
+ * -------
+ * nj_args -- A pointer to a data structure containing command-line args
+ *
+ * RETURNS:
+ * --------
+ * NJ_alignment * -- A pointer to a multiple sequence alignment
+ *
+ * DESCRIPTION:
+ * ------------
+ *
+ *
+ * This function implements a state machine parser for parsing FASTA-formatted
+ * multiple sequence alignment files.
+ *
+ * Example Input:
+ *
+ * > sequence1
+ * ATAGATATAGATTAGAATAT----TATAGATAT----ATATAT-TTT-
+ * > sequence2
+ * --ATAGATA---ATATATATATTTT--GTCTCATAGT---ATATGCTT
+ * > sequence3
+ * TTATAGATA---ATATATATATTTTAAGTCTCATAGT-A-ATATGC--
+ *
+ * This function will parse alignments for DNA or protein, and will do
+ * so mindful of ambiguity codes for these kinds of sequences. All
+ * ambiguity codes are ignored by this program for the purposes of
+ * computing a distance matrix from a multiple alignment. By design,
+ * this program does not auto-detect DNA vs. Protein, and requires that
+ * the user explictly specify that on the command-line.
+ *
+ * Gaps can be represented either with the '-' or '.' characters.
+ *
+ * Newlines and other whitespace are allowed to be interspersed
+ * throughout the sequences.
+ *
+ * Taxon labels are required to be unique, and they must start with
+ * an alphabetic character (not a number, etc.). The parser will read
+ * the first token after the > character in the description line up until the
+ * first whitespace and use that for the taxon label.
+ *
+ * For example, in the line "> taxon1 is homo sapien", the taxon label will be
+ * "taxon1"
+ *
+ */
+NJ_alignment *
+NJ_read_fasta(NJ_ARGS *nj_args) {
+
+ FILE *fp = NULL;
+ char *buf = NULL;
+ char *ptr = NULL;
+ NJ_alignment *alignment = NULL;
+
+ char c;
+ int state;
+ long int index, x, seq;
+ long int i;
+ long int bufsize, nseqs = NJ_INITIAL_NSEQS;
+ int first_sequence_flag;
+
+
+
+
+ /*
+ * In this function, we implement a FASTA alignment parser which
+ * reads in an alignment character-by-character, maintaining state
+ * information which guides the parser.
+ *
+ * The program reads either DNA or Protein alignments. All title lines
+ * and sequences can be arbitrarily long. Gaps can be represented by
+ * "-" or "." characters.
+ *
+ * Ambiguity codes are also handled.
+ *
+ */
+
+ /*
+ * We can't handle reading fasta input unless the user explicity
+ * specifies the input type...just to be sure.
+ */
+ if( (!nj_args->dna_flag && !nj_args->protein_flag) ||
+ (nj_args->dna_flag && nj_args->protein_flag) ) {
+ fprintf(stderr, "Clearcut: Explicitly specify protein or DNA\n");
+ goto XIT_BAD;
+ }
+
+ /* open specified fasta file here */
+ if(nj_args->stdin_flag) {
+ fp = stdin;
+ } else {
+ fp = fopen(nj_args->infilename, "r");
+ if(!fp) {
+ fprintf(stderr, "Clearcut: Failed to open input FASTA file: %s\n", nj_args->infilename);
+ perror("Clearcut");
+ goto XIT_BAD;
+ }
+ }
+
+ /* allocate the initial buffer */
+ bufsize = NJ_INITIAL_BUFFER_SIZE;
+ buf = (char *)calloc(bufsize, sizeof(char));
+
+ /* allocate the alignment container here */
+ alignment = (NJ_alignment *)calloc(1, sizeof(NJ_alignment));
+
+ /* allocate initial title array */
+ // printf("allocating initial title array\n");
+ alignment->titles = (char **)calloc(NJ_INITIAL_NSEQS, sizeof(char *));
+
+ /* make sure that we successfully allocated memory */
+ if(!buf || !alignment || !alignment->titles) {
+ fprintf(stderr, "Clearcut: Memory allocation error in NJ_read_fasta()\n");
+ goto XIT_BAD;
+ }
+
+ /* a flag */
+ first_sequence_flag = 1;
+
+ index = 0; /* tracks the position in buffer */
+ x = 0; /* tracks the position on sequence */
+ seq = 0; /* tracks the active sequence */
+
+ /* intitial state of state machine */
+ state = NJ_FASTA_MODE_UNKNOWN;
+
+ while(1) {
+
+ /* get the next character */
+ c = fgetc(fp);
+ if(feof(fp)) {
+
+ if(state == NJ_FASTA_MODE_SEQUENCE) {
+ buf[index+1] = '\0';
+
+ /* copy buf to alignment */
+ for(i=1;i<=alignment->length;i++) {
+ alignment->data[seq*alignment->length+i-1] = buf[i];
+ }
+ }
+
+ break;
+ }
+
+ /* make sure our dynamic buffer is big enough */
+ if(index >= bufsize) {
+ bufsize *= 2;
+ buf = (char *)realloc(buf, bufsize);
+ if(!buf) {
+ fprintf(stderr, "Clearcut: Memory allocation error in NJ_read_fasta()\n");
+ goto XIT_BAD;
+ }
+ }
+
+ switch(state) {
+
+ case NJ_FASTA_MODE_UNKNOWN:
+
+ if(!NJ_is_whitespace(c)) {
+ if(c == '>') {
+ state = NJ_FASTA_MODE_TITLE;
+ buf[0] = '>';
+ } else {
+ goto XIT_BAD;
+ }
+ }
+
+ break;
+
+ case NJ_FASTA_MODE_TITLE:
+
+ if( c == '\n' ||
+ c == '\r' ) {
+
+ buf[index] = '\0';
+ state = NJ_FASTA_MODE_SEQUENCE;
+ index = 0;
+ x = -1;
+
+ /* make sure we've allocated enough space for titles and sequences */
+ if(seq == nseqs) {
+
+ // printf("realloc(). seq = %d, nseqs = %d\n", seq, nseqs);
+
+ nseqs *= 2;
+
+ alignment->titles = (char **)realloc(alignment->titles, nseqs*sizeof(char *));
+ if(!alignment->titles) {
+ fprintf(stderr, "Clearcut: Memory allocation error in NJ_read_fasta()\n");
+ goto XIT_BAD;
+ }
+
+ alignment->data = (char *)realloc(alignment->data, alignment->length*nseqs*sizeof(char));
+ if(!alignment->data) {
+ fprintf(stderr, "Clearcut: Allocation error in NJ_read_fasta()\n");
+ goto XIT_BAD;
+ }
+ }
+
+ // printf("Allocating %d bytes for title %d: %s\n", (int)strlen(buf), (int)seq, buf);
+
+ alignment->titles[seq] = (char *)calloc(strlen(buf), sizeof(char));
+ if(!alignment->titles[seq]) {
+ fprintf(stderr, "Clearcut: Memory allocation error in NJ_read_fasta()\n");
+ goto XIT_BAD;
+ }
+
+ /* lets forward to the first non-space (space/tab) character after the '>' */
+
+ if(first_sequence_flag) {
+ ptr = buf;
+ } else {
+ ptr = &buf[1];
+ }
+ while(*ptr == '\t' || *ptr == ' ') {
+ ptr++;
+ }
+ sscanf(ptr, "%s", alignment->titles[seq]); /* get the first word and use as the title */
+
+ alignment->nseq++;
+ }
+
+ buf[index++] = c;
+
+ break;
+
+
+ case NJ_FASTA_MODE_SEQUENCE:
+
+ if(c == '>') {
+
+ if(first_sequence_flag) {
+ first_sequence_flag = 0;
+
+ /* allocate our alignment data section here */
+ alignment->length = index-1;
+
+ nseqs = NJ_INITIAL_NSEQS;
+ alignment->data = (char *)calloc(alignment->length*nseqs, sizeof(char));
+ if(!alignment->data) {
+ fprintf(stderr, "Clearcut: Allocation error in NJ_read_fasta()\n");
+ goto XIT_BAD;
+ }
+ }
+
+ if(!first_sequence_flag) {
+ if(index-1 < alignment->length) {
+ fprintf(stderr, "Clearcut: Sequences must be of uniform length in alignment at sequence %ld\n", seq);
+ goto XIT_BAD;
+ }
+ }
+
+ /* re-allocate if necessary */
+ /*
+ if(seq >= nseqs) {
+ nseqs *= 2;
+ alignment->data = (char *)realloc(alignment->data, alignment->length*nseqs*sizeof(char));
+ if(!alignment->data) {
+ fprintf(stderr, "Clearcut: Allocation error in NJ_read_fasta()\n");
+ goto XIT_BAD;
+ }
+ }
+ */
+
+ /* copy buf to alignment */
+ for(i=1;i<=alignment->length;i++) {
+ alignment->data[seq*alignment->length+i-1] = buf[i];
+ }
+
+ state = NJ_FASTA_MODE_TITLE;
+ index = 1;
+ x = 1;
+
+ buf[0] = c;
+
+ seq++;
+
+ } else {
+
+ if(NJ_is_whitespace(c)) {
+ break;
+ }
+
+ if(!first_sequence_flag) {
+ if(index-1 >= alignment->length) {
+ fprintf(stderr, "Clearcut: Sequences must be of uniform length in alignment at sequence %ld\n", seq);
+ goto XIT_BAD;
+ }
+ }
+
+
+ /*
+ * Here we check to make sure that the symbol read is appropriate
+ * for the type of data the user specified. (dna or protein).
+ * We also handle ambiguity codes by converting them to a specific
+ * assigned ambiguity code character. Ambiguity codes are ignored
+ * when computing distances
+ */
+
+ if(nj_args->dna_flag) {
+ if(NJ_is_dna(c)) {
+ buf[index++] = toupper(c);
+ } else {
+ if(NJ_is_dna_ambiguity(c)) {
+ buf[index++] = NJ_AMBIGUITY_CHAR;
+ } else {
+ fprintf(stderr, "Clearcut: Unknown symbol '%c' in nucleotide sequence %ld.\n", c, seq);
+ goto XIT_BAD;
+ }
+ }
+ } else if(nj_args->protein_flag) {
+ if(NJ_is_protein(c)) {
+ buf[index++] = toupper(c);
+ } else {
+ if(NJ_is_protein_ambiguity(c)) {
+ buf[index++] = NJ_AMBIGUITY_CHAR;
+ } else {
+ fprintf(stderr, "Clearcut: Unknown symbol '%c' in protein sequence %ld.\n", c, seq);
+ goto XIT_BAD;
+ }
+ }
+ }
+
+ }
+
+ break;
+
+ default:
+ goto XIT_BAD;
+
+ break;
+
+ }
+ }
+
+ if(index-1 != alignment->length) {
+ fprintf(stderr, "Clearcut: Sequences must be of uniform length in alignment at sequence %ld\n", seq);
+ goto XIT_BAD;
+ }
+
+ /* check for duplicate taxon labels */
+ if(!NJ_taxaname_unique(alignment)) {
+ goto XIT_BAD;
+ }
+
+ return(alignment);
+
+
+ XIT_BAD:
+
+ if(fp) {
+ fprintf(stderr, "Clearcut: Fatal error parsing FASTA file at file offset %ld.\n", ftell(fp));
+ }
+
+ if(buf) {
+ free(buf);
+ }
+
+ NJ_free_alignment(alignment);
+
+ return(NULL);
+}
+
+
+
+
+/*
+ * NJ_print_alignment() - Print multiple sequence alignment (for debugging)
+ *
+ * INPUTS:
+ * -------
+ * alignment -- A pointer to the alignment
+ *
+ * RETURNS:
+ * --------
+ * NONE
+ *
+ */
+void
+NJ_print_alignment(NJ_alignment *alignment) {
+
+ long int i, j;
+
+ printf("nseq = %ld, length = %ld\n", alignment->nseq, alignment->length);
+
+ for(i=0;i<alignment->nseq;i++) {
+
+ printf("> %s\n", alignment->titles[i]);
+
+ for(j=0;j<alignment->length;j++) {
+ printf("%c", alignment->data[i*alignment->length+j]);
+ }
+
+ printf("\n");
+ }
+
+ return;
+}
+
+
+
+
+
+
+
+/*
+ *
+ * NJ_free_alignment() - Free all of the memory allocated for the
+ * multiple sequence alignment
+ *
+ * INPUTS:
+ * -------
+ * alignment -- A pointer to the multiple sequence alignment
+ *
+ * RETURNS:
+ * --------
+ * NONE
+ *
+ */
+void
+NJ_free_alignment(NJ_alignment *alignment) {
+
+ long int i;
+
+ if(alignment) {
+
+ /* free the allocated titles */
+ if(alignment->titles) {
+ for(i=0;i<alignment->nseq;i++) {
+
+ if(alignment->titles[i]) {
+ free(alignment->titles[i]);
+ }
+ }
+
+ free(alignment->titles);
+ }
+
+ /* free the alignment data */
+ if(alignment->data) {
+ free(alignment->data);
+ }
+
+ /* free the alignment itself */
+ free(alignment);
+ }
+
+ return;
+}
+
+
+
+
+/*
+ * NJ_taxaname_unique() - Check to see if taxanames are unique in alignment
+ *
+ * INPUTS:
+ * -------
+ * alignment -- a pointer to a multiple sequence alignment
+ *
+ * OUTPUTS:
+ * --------
+ * int -- 0 if all taxanames in alignment are unique
+ * 1 if all taxanames in alignment are NOT unique
+ *
+ *
+ * DESCRIPTION:
+ * ------------
+ *
+ * Check to see if the taxanames in the alignment are unique. It
+ * will be impossible to make sense of the final tree if the taxon
+ * labels are not unqiue.
+ *
+ */
+int
+NJ_taxaname_unique(NJ_alignment *alignment) {
+
+ long int i, j;
+
+ for(i=0;i<alignment->nseq;i++) {
+ for(j=i+1;j<alignment->nseq;j++) {
+ if(!strcmp(alignment->titles[i],
+ alignment->titles[j])) {
+ fprintf(stderr, "Clearcut: Taxa %ld and %ld (%s) do not have unique taxon labels.\n",
+ i, j, alignment->titles[i]);
+ return(0);
+ }
+ }
+ }
+
+ return(1);
+}
+
+
+void
+NJ_print_titles(NJ_alignment *alignment) {
+
+ int i;
+
+ for(i=0;i<alignment->nseq;i++) {
+ printf("%d: %s\n", i, alignment->titles[i]);
+ }
+
+ return;
+}
+++ /dev/null
-/*
- This getopt_long() is compatible with GNU's, however, added original
- extention (short 1 byte option).
-
-
- Copyright (c) 2004 Koji Arai
-
- Permission is hereby granted, free of charge, to any person
- obtaining a copy of this software and associated documentation files
- (the "Software"), to deal in the Software without restriction,
- including without limitation the rights to use, copy, modify, merge,
- publish, distribute, sublicense, and/or sell copies of the Software,
- and to permit persons to whom the Software is furnished to do so,
- subject to the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-
-
- Compilation for Test:
-
- GNU:
- cc -DUSE_GNU -DDEBUG getopt_long.c -o test_getopt_long_gnu
-
- not GNU:
- cc -I. -DDEBUG getopt_long.c -o test_getopt_long
-
- ./test_getopt_long
- ./test_getopt_long_gnu
-
- BUGS:
- * not implemented any features for getopt() and getopt_long().
-*/
-
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-
-#if DEBUG
-static int
-puts_argv(char **argv)
-{
- int i;
-
- for (i = 0; argv[i]; i++) {
- if (i) printf(" ");
-
- printf("%s", argv[i]);
- }
- printf("\n");
-
- return 0;
-}
-#endif
-
-#ifndef USE_GNU
-#include <stdio.h>
-#include "getopt_long.h"
-
-char *optarg;
-int optind;
-
-int opterr;
-int optopt;
-
-/*
- return value 0: no option (include '-')
- 1: short option like '-x'
- 2: long option like '--xxx' and just '--'
-*/
-static int
-is_option(char *arg)
-{
- if (arg[0] == '-') {
- switch (arg[1]) {
- case 0: /* just "-" */
- return 0;
- case '-': /* long option (include just "--")*/
- return 2;
- default: /* short option */
- return 1;
- }
- }
- return 0;
-}
-
-static int
-insert_argv(char **argv, int src, int dest)
-{
- int i;
- char *tmp = argv[src];
-
- if (src > dest) {
- for (i = src; i > dest; i--)
- argv[i] = argv[i-1];
- }
- if (src < dest) {
- for (i = src; i < dest; i++)
- argv[i] = argv[i+1];
- }
-
- argv[dest] = tmp;
-
- return 0;
-}
-
-static int
-search_longopt(char *arg, struct option *longopts)
-{
- int i, found = -1;
- int len;
- for (len = 0; arg[len] && arg[len] != '='; len++)
- ;
-
- for (i = 0; longopts[i].name; i++) {
- if (strncmp(arg, longopts[i].name, len) == 0) {
- if (found != -1)
- return -1; /* found some candidate */
- found = i;
- }
- }
- return found;
-}
-
-/*
- * implemented my extention feature.
- * optional 1 byte argument with [...]
- * e.g.) shortopts = "a[0123]b"
- * accepts "-a0 -a1b" (same as "-a0 -a1 -b")
- */
-static int
-has_argument_short(char *arg, const char *shortopts)
-{
- int i;
- int open_bracket = 0;
- for (i = 0; shortopts[i]; i++) {
- switch (shortopts[i]) {
- case '[':
- open_bracket++;
- continue;
- case ']':
- if (open_bracket <= 0) {
- fprintf(stderr, "getopt_long() -- unbalanced bracket in short options");
- return -1;
- }
- open_bracket--;
- continue;
- }
- if (open_bracket) continue;
- if (*arg != shortopts[i]) continue;
-
- switch (shortopts[i+1]) {
- case ':':
- if (shortopts[i+2] != ':') {
- if (arg[1])
- return 1; /* following string is argument */
- else
- return 2; /* next argv is argument */
- }
- else {
- /* '::' means optional argument (GNU extention) */
- if (arg[1])
- return 1;
- else
- return 0; /* no argument */
- }
- case '[':
- if (arg[1] == '\0')
- return 0; /* no argument */
- /* my extention */
- for (i++; shortopts[i] && shortopts[i] != ']'; i++) {
- if (arg[1] == shortopts[i])
- return 3; /* has 1 byte argument */
- }
- if (!shortopts[i]) {
- fprintf(stderr, "getopt_long() -- unbalanced bracket in short options");
- return -1;
- }
- break;
- default:
- return 0; /* no argument */
- }
- }
- /* Invalid option */
- return -1;
-}
-
-static int
-has_argument_long(char *arg, struct option *longopts)
-{
- int i;
-
- i = search_longopt(arg, longopts);
- if (i == -1) {
- /* Invalid option */
- return -1;
- }
- else {
- int len = strlen(arg);
- char *p = strchr(arg, '=');
- if (p) {
- len = p - arg;
- }
-
- switch (longopts[i].has_arg) {
- case no_argument:
- return 0;
- case required_argument:
- if (arg[len] == '=')
- return 1;
- else
- return 2;
- case optional_argument:
- if (arg[len] == '=')
- return 1;
- else
- return 0;
- default:
- assert(0);
- }
- }
-}
-
-/*
- -1: no option
- 0: no argument
- 1: has argument in this argv
- 2: has argument in next argv
- 3: has 1 byte argument in this argv
-*/
-static int
-has_argument(char *arg,
- const char *shortopts,
- struct option *longopts)
-{
- int i, n;
-
- switch (is_option(arg)) {
- case 0: /* no option */
- return -1;
- case 1:
- /* short option */
- n = -1;
- for (i = 1; arg[i]; i++) {
- n = has_argument_short(arg+i, shortopts);
- if (n == 0 && arg[i+1]) continue;
- if (n == 3 && arg[i+2]) { i++; continue; }
- break;
- }
- return n;
- case 2:
- /* long option */
- return has_argument_long(arg+2, longopts);
- break;
- default:
- assert(0);
- }
-}
-
-int
-getopt_long(int argc, char **argv,
- const char *shortopts,
- struct option *longopts,
- int *indexptr)
-{
- char *opt;
- int i;
- static int shortoptind;
- static int no_optind = 0;
-
- if (optind == 0) { /* skip first argument (command name) */
- optind++;
- no_optind = 0;
- shortoptind = 0;
- }
-
- optarg = 0;
-
- if (no_optind && !shortoptind) {
- while (!is_option(argv[no_optind]))
- insert_argv(argv, no_optind, optind-1);
-
- if (has_argument(argv[no_optind], shortopts, longopts) == 2)
- no_optind += 2;
- else
- no_optind++;
-
- if (argv[optind] && strcmp(argv[optind], "--") == 0) {
- while (!is_option(argv[no_optind]))
- insert_argv(argv, no_optind, optind);
- optind = no_optind;
- no_optind = 0;
- }
- }
-
- if (optind >= argc)
- goto end_of_option;
-
- retry:
- /*
- puts_argv(&argv[optind]);
- */
- opt = argv[optind];
-
- if (shortoptind == 0 && is_option(opt) == 1) {
- shortoptind++;
- }
-
- if (shortoptind) {
- /* short option */
- char *p = &opt[shortoptind];
-
- if (*p == '\0')
- assert(0);
-
- switch (has_argument_short(p, shortopts)) {
- case 0:
- /* no argument */
- optarg = 0;
-
- shortoptind++;
- if (opt[shortoptind] == '\0')
- optind++, shortoptind = 0;
- return *p;
- case 1:
- /* following character is argument */
- optind++, shortoptind = 0;
- optarg = &p[1];
- return *p;
- case 2:
- /* next argv is argument */
- optind++, shortoptind = 0;
- optarg = argv[optind++];
- return *p;
- case 3:
- /* has 1 byte argument */
- optarg = &p[1];
- if (p[2] == 0)
- optind++, shortoptind = 0;
- else
- shortoptind += 2;
- return *p;
- default:
- /* Invalid option */
- if (opterr)
- fprintf(stderr,
- "%s: invalid option -- %c\n",
- argv[0],
- *p);
-
- optind++, shortoptind = 0;
- optopt = *p;
- return '?';
- }
- }
- else if (opt[0] == '-' && opt[1] == '-') {
- /* long option */
-
- if (opt[2] == '\0') {
- /* end of command line switch */
- optind++;
- return -1;
- }
-
- opt += 2;
-
- i = search_longopt(opt, longopts);
- if (i == -1) {
- optind++;
- optopt = 0;
- return '?';
- }
- else {
- int len = strlen(opt);
- char *p = strchr(opt, '=');
- if (p) {
- len = p - opt;
- }
-
- switch (longopts[i].has_arg) {
- case no_argument:
- break;
- case required_argument:
- if (opt[len] == '=')
- optarg = opt + len + 1;
- else {
- optind++;
- optarg = argv[optind];
- if (optarg == 0) {
- if (opterr)
- fprintf(stderr,
- "%s: option `--%s' requires an argument\n",
- argv[0],
- opt);
-
- optopt = 0;
- return '?'; /* no argument */
- }
- }
- break;
- case optional_argument:
- if (opt[len] == '=')
- optarg = opt + len + 1;
- else {
- optarg = 0;
- }
- break;
- default:
- break;
- }
-
- *indexptr = i;
- optind++;
- if (longopts[i].flag) {
- *longopts[i].flag = longopts[i].val;
- return 0;
- }
- else {
- return longopts[i].val;
- }
- }
-
- optind++;
- optopt = 0;
- return '?';
- }
-
- /* not option */
- if (no_optind == 0)
- no_optind = optind;
-
- for (i = optind; argv[i]; i++) {
- if (is_option(argv[i])) {
- optind = i;
- goto retry;
- }
- }
-
- end_of_option:
- if (no_optind) {
- optind = no_optind;
- no_optind = 0;
- }
-
- return -1;
-}
-#endif /* USE_GNU */
-
-#if DEBUG
-
-#include <stdio.h>
-#include <assert.h>
-#include <stdlib.h>
-
-#if USE_GNU
-#include <getopt.h> /* use GNU getopt_long() */
-#endif
-
-static int verbose_flag;
-static int option_index;
-int argc;
-char *argv[50];
-char **p;
-int c;
-static struct option long_options[] = {
- {"verbose", no_argument, &verbose_flag, 1},
- {"brief", no_argument, &verbose_flag, 0},
- {"add", required_argument, 0, 'a'},
- {"append", no_argument, 0, 0},
- {"delete", required_argument, 0, 0},
- {"create", optional_argument, 0, 0},
- {"change", optional_argument, 0, 0},
- {0, 0, 0, 0}
-};
-
-int
-call_getopt_long(int argc, char **argv,
- const char *shortopts,
- struct option *longopts,
- int *indexptr)
-{
- int c;
- c = getopt_long(argc, argv, shortopts, longopts, indexptr);
- puts_argv(argv);
- printf("ret=%d(%c) option_index=%d ", c, c, option_index);
- printf("optind=%d optarg=[%s] opterr=%d optopt=%d(%c)\n",
- optind, optarg, opterr, optopt, optopt);
- if (c == 0) {
- struct option *opt;
- opt = &longopts[*indexptr];
- printf("long option: --%s has_arg=%d\n", opt->name, opt->has_arg);
- if (opt->flag)
- printf(" flag=[%8p] val=%d\n", opt->flag, *opt->flag);
- }
-
- return c;
-}
-
-void
-test1()
-{
- optind = 0;
- argc = 0;
- p = argv;
-
- argc++; *p++ = "command_name";
- argc++; *p++ = "-a";
- argc++; *p++ = "-bcd";
- argc++; *p++ = "-d";
- argc++; *p++ = "-e";
- argc++; *p++ = "-f";
- argc++; *p++ = "-g";
- *p = 0;
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- assert(c == 'a');
- assert(option_index == 0);
- assert(optind == 2);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- assert(c == 'b');
- assert(option_index == 0);
- assert(optind == 2);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- assert(c == 'c');
- assert(option_index == 0);
- assert(optind == 3);
- assert(optarg == &argv[2][3]);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- assert(c == 'd');
- assert(option_index == 0);
- assert(optind == 5);
- assert(optarg == argv[4]);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- assert(c == '?');
- assert(option_index == 0);
- assert(optind == 6);
- assert(optarg == 0);
- assert(optopt == 'f');
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- assert(c == '?');
- assert(option_index == 0);
- assert(optind == 7);
- assert(optarg == 0);
- assert(optopt == 'g');
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- assert(c == -1);
- assert(option_index == 0);
- assert(optind == 7);
- assert(optarg == 0);
- assert(optopt == 'g'); /* no changed */
-}
-
-void
-test2()
-{
- optind = 0;
- argc = 0;
- p = argv;
-
- argc++; *p++ = "command_name";
- argc++; *p++ = "--verbose";
- argc++; *p++ = "--brief";
- argc++; *p++ = "--add";
- argc++; *p++ = "add_argument";
- argc++; *p++ = "--add=add_argument";
- argc++; *p++ = "--append";
- argc++; *p++ = "--delete=del_argument";
- argc++; *p++ = "--create=cre_argument";
- argc++; *p++ = "--create";
- argc++; *p++ = "files...";
- *p = 0;
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- assert(c == 0);
- assert(option_index == 0);
- assert(optind == 2);
- assert(optarg == 0);
- assert(optopt == 'g'); /* no changed */
- assert(strcmp(long_options[option_index].name, "verbose") == 0);
- assert(*long_options[option_index].flag == 1);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- assert(c == 0);
- assert(option_index == 1);
- assert(optind == 3);
- assert(optarg == 0);
- assert(optopt == 'g'); /* no changed */
- assert(strcmp(long_options[option_index].name, "brief") == 0);
- assert(*long_options[option_index].flag == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- assert(c == 'a');
- assert(option_index == 2);
- assert(optind == 5);
- assert(optarg == argv[4]);
- assert(optopt == 'g'); /* no changed */
- assert(strcmp(long_options[option_index].name, "add") == 0);
- assert(long_options[option_index].flag == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- assert(c == 'a');
- assert(option_index == 2);
- assert(optind == 6);
- assert(optarg == argv[5]+6);
- assert(optopt == 'g'); /* no changed */
- assert(strcmp(long_options[option_index].name, "add") == 0);
- assert(long_options[option_index].flag == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- assert(c == 0);
- assert(option_index == 3);
- assert(optind == 7);
- assert(optarg == 0);
- assert(optopt == 'g'); /* no changed */
- assert(strcmp(long_options[option_index].name, "append") == 0);
- assert(long_options[option_index].flag == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- assert(c == 0);
- assert(option_index == 4);
- assert(optind == 8);
- assert(optarg == argv[7]+9);
- assert(optopt == 'g'); /* no changed */
- assert(strcmp(long_options[option_index].name, "delete") == 0);
- assert(long_options[option_index].flag == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- assert(c == 0);
- assert(option_index == 5);
- assert(optind == 9);
- assert(optarg == argv[8]+9);
- assert(optopt == 'g'); /* no changed */
- assert(strcmp(long_options[option_index].name, "create") == 0);
- assert(long_options[option_index].flag == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- assert(c == 0);
- assert(option_index == 5);
- assert(optind == 10);
- assert(optarg == 0);
- assert(optopt == 'g'); /* no changed */
- assert(strcmp(long_options[option_index].name, "create") == 0);
- assert(long_options[option_index].flag == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- assert(c == -1);
- assert(option_index == 5); /* no changed */
- assert(optind == 10);
- assert(optarg == 0);
- assert(optopt == 'g'); /* no changed */
- assert(strcmp(argv[optind], "files...") == 0);
-
-}
-
-void
-test3()
-{
- optind = 0;
- argc = 0;
- p = argv;
-
- argc++; *p++ = "command_name";
- argc++; *p++ = "--delete"; /* required argument has no argument */
- *p = 0;
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- assert(c == '?');
- assert(option_index == 5); /* no changed */
- assert(optind == 2); /* changed */
- assert(optarg == 0);
- assert(optopt == 0); /* changed */
- assert(argv[optind] == 0);
-
- /* */
- optind = 0;
- argc = 0;
- p = argv;
-
- argc++; *p++ = "command_name";
- argc++; *p++ = "--file"; /* not option */
- *p = 0;
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- assert(c == '?');
- assert(option_index == 5); /* no changed */
- assert(optind == 2);
- assert(optarg == 0);
- assert(optopt == 0);
- assert(argv[optind] == 0);
-}
-
-void
-test4()
-{
- optind = 0;
- argc = 0;
- p = argv;
-
- argc++; *p++ = "command_name";
- argc++; *p++ = "-a";
- argc++; *p++ = "a1";
- argc++; *p++ = "a2";
- argc++; *p++ = "-b";
- argc++; *p++ = "b";
- argc++; *p++ = "-efg"; /* some options in a argument */
- argc++; *p++ = "g";
- argc++; *p++ = "-c";
- argc++; *p++ = "c";
- argc++; *p++ = "d";
- *p = 0;
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:efg:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "a1") == 0);
- assert(strcmp(*p++, "a2") == 0);
- assert(strcmp(*p++, "-b") == 0);
- assert(strcmp(*p++, "b") == 0);
- assert(strcmp(*p++, "-efg") == 0);
- assert(strcmp(*p++, "g") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "c") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(*p == 0);
-
- assert(c == 'a');
- assert(option_index == 5); /* no changed */
- assert(optind == 2);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:efg:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "a1") == 0);
- assert(strcmp(*p++, "a2") == 0);
- assert(strcmp(*p++, "-b") == 0);
- assert(strcmp(*p++, "b") == 0);
- assert(strcmp(*p++, "-efg") == 0);
- assert(strcmp(*p++, "g") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "c") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(*p == 0);
-
- assert(c == 'b');
- assert(option_index == 5); /* no changed */
- assert(optind == 5);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:efg:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-b") == 0);
- assert(strcmp(*p++, "a1") == 0);
- assert(strcmp(*p++, "a2") == 0);
- assert(strcmp(*p++, "b") == 0);
- assert(strcmp(*p++, "-efg") == 0);
- assert(strcmp(*p++, "g") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "c") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(*p == 0);
-
- assert(c == 'e');
- assert(option_index == 5); /* no changed */
- assert(optind == 6);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:efg:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-b") == 0);
- assert(strcmp(*p++, "a1") == 0);
- assert(strcmp(*p++, "a2") == 0);
- assert(strcmp(*p++, "b") == 0);
- assert(strcmp(*p++, "-efg") == 0);
- assert(strcmp(*p++, "g") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "c") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(*p == 0);
-
- assert(c == 'f');
- assert(option_index == 5); /* no changed */
- assert(optind == 6);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:efg:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-b") == 0);
- assert(strcmp(*p++, "a1") == 0);
- assert(strcmp(*p++, "a2") == 0);
- assert(strcmp(*p++, "b") == 0);
- assert(strcmp(*p++, "-efg") == 0);
- assert(strcmp(*p++, "g") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "c") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(*p == 0);
-
- assert(c == 'g');
- assert(option_index == 5); /* no changed */
- assert(optind == 8);
- assert(optarg == argv[7]);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:efg:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-b") == 0);
- assert(strcmp(*p++, "-efg") == 0);
- assert(strcmp(*p++, "g") == 0);
- assert(strcmp(*p++, "a1") == 0);
- assert(strcmp(*p++, "a2") == 0);
- assert(strcmp(*p++, "b") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "c") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(*p == 0);
-
- assert(c == 'c');
- assert(option_index == 5); /* no changed */
- assert(optind == 10);
- assert(optarg == argv[9]);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:efg:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-b") == 0);
- assert(strcmp(*p++, "-efg") == 0);
- assert(strcmp(*p++, "g") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "c") == 0);
- assert(strcmp(*p++, "a1") == 0);
- assert(strcmp(*p++, "a2") == 0);
- assert(strcmp(*p++, "b") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(*p == 0);
-
- assert(c == -1);
- assert(option_index == 5); /* no changed */
- assert(optind == 7);
- assert(optarg == 0);
- assert(optopt == 0);
-
-}
-
-void
-test5()
-{
- optind = 0;
- argc = 0;
- p = argv;
-
- argc++; *p++ = "command_name";
- argc++; *p++ = "-a";
- argc++; *p++ = "-";
- argc++; *p++ = "-b";
- *p = 0;
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(strcmp(*p++, "-b") == 0);
- assert(*p == 0);
-
- assert(c == 'a');
- assert(option_index == 5); /* no changed */
- assert(optind == 2);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(strcmp(*p++, "-b") == 0);
- assert(*p == 0);
-
- assert(c == 'b');
- assert(option_index == 5); /* no changed */
- assert(optind == 4);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-b") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(*p == 0);
-
- assert(c == -1);
- assert(option_index == 5); /* no changed */
- assert(optind == 3);
- assert(optarg == 0);
- assert(optopt == 0);
-}
-
-void
-test6()
-{
- optind = 0;
- argc = 0;
- p = argv;
-
- argc++; *p++ = "command_name";
- argc++; *p++ = "-a";
- argc++; *p++ = "-";
- argc++; *p++ = "-";
- argc++; *p++ = "-b";
- *p = 0;
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(strcmp(*p++, "-b") == 0);
- assert(*p == 0);
-
- assert(c == 'a');
- assert(option_index == 5); /* no changed */
- assert(optind == 2);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(strcmp(*p++, "-b") == 0);
- assert(*p == 0);
-
- assert(c == 'b');
- assert(option_index == 5); /* no changed */
- assert(optind == 5);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-b") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(*p == 0);
-
- assert(c == -1);
- assert(option_index == 5); /* no changed */
- assert(optind == 3);
- assert(optarg == 0);
- assert(optopt == 0);
-}
-
-void
-test7()
-{
- optind = 0;
- argc = 0;
- p = argv;
-
- argc++; *p++ = "command_name";
- argc++; *p++ = "-a";
- argc++; *p++ = "-";
- argc++; *p++ = "-";
- argc++; *p++ = "-c";
- argc++; *p++ = "c";
- *p = 0;
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "c") == 0);
- assert(*p == 0);
-
- assert(c == 'a');
- assert(option_index == 5); /* no changed */
- assert(optind == 2);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "c") == 0);
- assert(*p == 0);
-
- assert(c == 'c');
- assert(option_index == 5); /* no changed */
- assert(optind == 6);
- assert(optarg == argv[5]);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "c") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(*p == 0);
-
- assert(c == -1);
- assert(option_index == 5); /* no changed */
- assert(optind == 4);
- assert(optarg == 0);
- assert(optopt == 0);
-}
-
-void
-test8()
-{
- optind = 0;
- argc = 0;
- p = argv;
-
- argc++; *p++ = "command_name";
- argc++; *p++ = "-a";
- argc++; *p++ = "-c";
- argc++; *p++ = "c";
- argc++; *p++ = "--";
- argc++; *p++ = "-d";
- argc++; *p++ = "d";
- *p = 0;
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "c") == 0);
- assert(strcmp(*p++, "--") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(*p == 0);
-
- assert(c == 'a');
- assert(option_index == 5); /* no changed */
- assert(optind == 2);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "c") == 0);
- assert(strcmp(*p++, "--") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(*p == 0);
-
- assert(c == 'c');
- assert(option_index == 5); /* no changed */
- assert(optind == 4);
- assert(optarg == argv[3]);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "c") == 0);
- assert(strcmp(*p++, "--") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(*p == 0);
-
- assert(c == -1);
- assert(option_index == 5); /* no changed */
- assert(optind == 5);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "c") == 0);
- assert(strcmp(*p++, "--") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(*p == 0);
-
- assert(c == 'd');
- assert(option_index == 5); /* no changed */
- assert(optind == 7);
- assert(optarg == argv[6]);
- assert(optopt == 0);
-}
-
-void
-test9()
-{
- optind = 0;
- argc = 0;
- p = argv;
-
- argc++; *p++ = "command_name";
- argc++; *p++ = "-a";
- argc++; *p++ = "-";
- argc++; *p++ = "-";
- argc++; *p++ = "-c";
- argc++; *p++ = "c";
- argc++; *p++ = "--";
- argc++; *p++ = "-d";
- argc++; *p++ = "d";
- *p = 0;
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "c") == 0);
- assert(strcmp(*p++, "--") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(*p == 0);
-
- assert(c == 'a');
- assert(option_index == 5); /* no changed */
- assert(optind == 2);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "c") == 0);
- assert(strcmp(*p++, "--") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(*p == 0);
-
- assert(c == 'c');
- assert(option_index == 5); /* no changed */
- assert(optind == 6);
- assert(optarg == argv[5]);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "c") == 0);
- assert(strcmp(*p++, "--") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(*p == 0);
-
- assert(c == -1);
- assert(option_index == 5); /* no changed */
- assert(optind == 5);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "c") == 0);
- assert(strcmp(*p++, "--") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(strcmp(*p++, "-") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(*p == 0);
-
- assert(c == 'd');
- assert(option_index == 5); /* no changed */
- assert(optind == 9);
- assert(optarg == argv[8]);
- assert(optopt == 0);
-}
-
-void
-test10()
-{
- optind = 0;
- argc = 0;
- p = argv;
-
- argc++; *p++ = "command_name";
- argc++; *p++ = "-a";
- argc++; *p++ = "-cc";
- argc++; *p++ = "-d";
- argc++; *p++ = "d";
- argc++; *p++ = "-c"; /* no argument */
- argc++; *p++ = "-d"; /* at last */
- *p = 0;
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-cc") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(*p == 0);
-
- assert(c == 'a');
- assert(option_index == 5); /* no changed */
- assert(optind == 2);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-cc") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(*p == 0);
-
- assert(c == 'c');
- assert(option_index == 5); /* no changed */
- assert(optind == 3);
- assert(optarg == argv[2]+2);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-cc") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(*p == 0);
-
- assert(c == 'd');
- assert(option_index == 5); /* no changed */
- assert(optind == 4);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-cc") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(*p == 0);
-
- assert(c == 'c');
- assert(option_index == 5); /* no changed */
- assert(optind == 6);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-cc") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(*p == 0);
-
- assert(c == 'd');
- assert(option_index == 5); /* no changed */
- assert(optind == 7);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-cc") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "-c") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(*p == 0);
-
- assert(c == -1);
- assert(option_index == 5); /* no changed */
- assert(optind == 6);
- assert(optarg == 0);
- assert(optopt == 0);
-}
-
-void
-test11()
-{
- optind = 0;
- argc = 0;
- p = argv;
-
- argc++; *p++ = "command_name";
- argc++; *p++ = "--verbose";
- argc++; *p++ = "--create=c";
- argc++; *p++ = "--change";
- argc++; *p++ = "d";
- argc++; *p++ = "--create"; /* no argument */
- argc++; *p++ = "--change"; /* at last */
- *p = 0;
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "--verbose") == 0);
- assert(strcmp(*p++, "--create=c") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "--create") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(*p == 0);
-
- assert(c == 0);
- assert(option_index == 0);
- assert(optind == 2);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "--verbose") == 0);
- assert(strcmp(*p++, "--create=c") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "--create") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(*p == 0);
-
- assert(c == 0);
- assert(option_index == 5);
- assert(optind == 3);
- assert(optarg == argv[2]+9);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "--verbose") == 0);
- assert(strcmp(*p++, "--create=c") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "--create") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(*p == 0);
-
- assert(c == 0);
- assert(option_index == 6);
- assert(optind == 4);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "--verbose") == 0);
- assert(strcmp(*p++, "--create=c") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "--create") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(*p == 0);
-
- assert(c == 0);
- assert(option_index == 5);
- assert(optind == 6);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "--verbose") == 0);
- assert(strcmp(*p++, "--create=c") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(strcmp(*p++, "--create") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(*p == 0);
-
- assert(c == 0);
- assert(option_index == 6);
- assert(optind == 7);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "--verbose") == 0);
- assert(strcmp(*p++, "--create=c") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(strcmp(*p++, "--create") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(*p == 0);
-
- assert(c == -1);
- assert(option_index == 6);
- assert(optind == 6);
- assert(optarg == 0);
- assert(optopt == 0);
-}
-
-void
-test12()
-{
- optind = 0;
- argc = 0;
- p = argv;
-
- argc++; *p++ = "command_name";
- argc++; *p++ = "--verbose";
- argc++; *p++ = "--create=c";
- argc++; *p++ = "files...";
- argc++; *p++ = "--delete"; /* required argument */
- argc++; *p++ = "d";
- argc++; *p++ = "--create"; /* no argument */
- argc++; *p++ = "--change"; /* at last */
- *p = 0;
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "--verbose") == 0);
- assert(strcmp(*p++, "--create=c") == 0);
- assert(strcmp(*p++, "files...") == 0);
- assert(strcmp(*p++, "--delete") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "--create") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(*p == 0);
-
- assert(c == 0);
- assert(option_index == 0);
- assert(optind == 2);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "--verbose") == 0);
- assert(strcmp(*p++, "--create=c") == 0);
- assert(strcmp(*p++, "files...") == 0);
- assert(strcmp(*p++, "--delete") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "--create") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(*p == 0);
-
- assert(c == 0);
- assert(option_index == 5);
- assert(optind == 3);
- assert(optarg == argv[2]+9);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "--verbose") == 0);
- assert(strcmp(*p++, "--create=c") == 0);
- assert(strcmp(*p++, "files...") == 0);
- assert(strcmp(*p++, "--delete") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "--create") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(*p == 0);
-
- assert(c == 0);
- assert(option_index == 4);
- assert(optind == 6);
- assert(optarg == argv[5]);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "--verbose") == 0);
- assert(strcmp(*p++, "--create=c") == 0);
- assert(strcmp(*p++, "--delete") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "files...") == 0);
- assert(strcmp(*p++, "--create") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(*p == 0);
-
- assert(c == 0);
- assert(option_index == 5);
- assert(optind == 7);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "--verbose") == 0);
- assert(strcmp(*p++, "--create=c") == 0);
- assert(strcmp(*p++, "--delete") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "--create") == 0);
- assert(strcmp(*p++, "files...") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(*p == 0);
-
- assert(c == 0);
- assert(option_index == 6);
- assert(optind == 8);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "--verbose") == 0);
- assert(strcmp(*p++, "--create=c") == 0);
- assert(strcmp(*p++, "--delete") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "--create") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(strcmp(*p++, "files...") == 0);
- assert(*p == 0);
-
- assert(c == -1);
- assert(option_index == 6);
- assert(optind == 7);
- assert(optarg == 0);
- assert(optopt == 0);
-}
-
-void
-test13()
-{
- optind = 0;
- argc = 0;
- p = argv;
-
- argc++; *p++ = "command_name";
- argc++; *p++ = "--verbose";
- argc++; *p++ = "--create=c";
- argc++; *p++ = "files...";
- argc++; *p++ = "--delete";
- argc++; *p++ = "d";
- argc++; *p++ = "--"; /* option terminator */
- argc++; *p++ = "--change";
- *p = 0;
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "--verbose") == 0);
- assert(strcmp(*p++, "--create=c") == 0);
- assert(strcmp(*p++, "files...") == 0);
- assert(strcmp(*p++, "--delete") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "--") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(*p == 0);
-
- assert(c == 0);
- assert(option_index == 0);
- assert(optind == 2);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "--verbose") == 0);
- assert(strcmp(*p++, "--create=c") == 0);
- assert(strcmp(*p++, "files...") == 0);
- assert(strcmp(*p++, "--delete") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "--") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(*p == 0);
-
- assert(c == 0);
- assert(option_index == 5);
- assert(optind == 3);
- assert(optarg == argv[2]+9);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "--verbose") == 0);
- assert(strcmp(*p++, "--create=c") == 0);
- assert(strcmp(*p++, "files...") == 0);
- assert(strcmp(*p++, "--delete") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "--") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(*p == 0);
-
- assert(c == 0);
- assert(option_index == 4);
- assert(optind == 6);
- assert(optarg == argv[5]);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "--verbose") == 0);
- assert(strcmp(*p++, "--create=c") == 0);
- assert(strcmp(*p++, "--delete") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "--") == 0);
- assert(strcmp(*p++, "files...") == 0);
- assert(strcmp(*p++, "--change") == 0);
- assert(*p == 0);
-
- assert(c == -1);
- assert(option_index == 4);
- assert(optind == 6);
- assert(optarg == 0);
- assert(optopt == 0);
-
-}
-
-void
-test14()
-{
- optind = 0;
- argc = 0;
- p = argv;
-
- argc++; *p++ = "command_name";
- argc++; *p++ = "-o5";
- argc++; *p++ = "files...";
- *p = 0;
-
- /*************************/
- c = call_getopt_long(argc, argv, "o[567]", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-o5") == 0);
- assert(strcmp(*p++, "files...") == 0);
- assert(*p == 0);
-
- assert(c == 'o');
- assert(option_index == 4); /* no changed */
- assert(optind == 2);
- assert(optarg == argv[1]+2);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc[cde]d[fgh]", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-o5") == 0);
- assert(strcmp(*p++, "files...") == 0);
- assert(*p == 0);
-
- assert(c == -1);
- assert(option_index == 4); /* no changed */
- assert(optind == 2);
- assert(optarg == 0);
- assert(optopt == 0);
-
-}
-
-void
-test15()
-{
- optind = 0;
- argc = 0;
- p = argv;
-
- argc++; *p++ = "command_name";
- argc++; *p++ = "-a";
- argc++; *p++ = "-ccd";
- argc++; *p++ = "-ce";
- argc++; *p++ = "-d";
- argc++; *p++ = "d";
- argc++; *p++ = "-cdd";
- argc++; *p++ = "-d";
- *p = 0;
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc[cde]d[fgh]", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-ccd") == 0);
- assert(strcmp(*p++, "-ce") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "-cdd") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(*p == 0);
-
- assert(c == 'a');
- assert(option_index == 4); /* no changed */
- assert(optind == 2);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc[cde]d[fgh]", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-ccd") == 0);
- assert(strcmp(*p++, "-ce") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "-cdd") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(*p == 0);
-
- assert(c == 'c');
- assert(option_index == 4); /* no changed */
- assert(optind == 2);
- assert(optarg == argv[2]+2);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc[cde]d[fgh]", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-ccd") == 0);
- assert(strcmp(*p++, "-ce") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "-cdd") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(*p == 0);
-
- assert(c == 'd');
- assert(option_index == 4); /* no changed */
- assert(optind == 3);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc[cde]d[fgh]", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-ccd") == 0);
- assert(strcmp(*p++, "-ce") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "-cdd") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(*p == 0);
-
- assert(c == 'c');
- assert(option_index == 4); /* no changed */
- assert(optind == 4);
- assert(optarg == argv[3]+2);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc[cde]d[fgh]", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-ccd") == 0);
- assert(strcmp(*p++, "-ce") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "-cdd") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(*p == 0);
-
- assert(c == 'd');
- assert(option_index == 4); /* no changed */
- assert(optind == 5);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc[cde]d[fgh]", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-ccd") == 0);
- assert(strcmp(*p++, "-ce") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "-cdd") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(*p == 0);
-
- assert(c == 'c');
- assert(option_index == 4); /* no changed */
- assert(optind == 6);
- assert(optarg == argv[6]+2);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc[cde]d[fgh]", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-ccd") == 0);
- assert(strcmp(*p++, "-ce") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "-cdd") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(*p == 0);
-
- assert(c == 'd');
- assert(option_index == 4); /* no changed */
- assert(optind == 7);
- assert(optarg == 0);
- assert(optopt == 0);
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc[cde]d[fgh]", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-ccd") == 0);
- assert(strcmp(*p++, "-ce") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "-cdd") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(*p == 0);
-
- assert(c == 'd');
- assert(option_index == 4); /* no changed */
- assert(optind == 8);
- assert(optarg == 0);
- assert(optopt == 0);
-
-
- /*************************/
- c = call_getopt_long(argc, argv, "abc[cde]d[fgh]", long_options, &option_index);
-
- p = argv;
- assert(strcmp(*p++, "command_name") == 0);
- assert(strcmp(*p++, "-a") == 0);
- assert(strcmp(*p++, "-ccd") == 0);
- assert(strcmp(*p++, "-ce") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "-cdd") == 0);
- assert(strcmp(*p++, "-d") == 0);
- assert(strcmp(*p++, "d") == 0);
- assert(*p == 0);
-
- assert(c == -1);
- assert(option_index == 4); /* no changed */
- assert(optind == 7);
- assert(optarg == 0);
- assert(optopt == 0);
-
-
-}
-
-int
-main()
-{
- opterr = 0;
- optopt = 0;
-
- test1();
- test2();
- test3();
- test4();
- test5();
- test6();
- test7();
- test8();
- test9();
- test10();
- test11();
- test12();
- test13();
-#ifndef USE_GNU
- test14();
- test15();
-#endif
-
- return 0;
-}
-#endif
--- /dev/null
+/*
+ This getopt_long() is compatible with GNU's, however, added original
+ extention (short 1 byte option).
+
+
+ Copyright (c) 2004 Koji Arai
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation files
+ (the "Software"), to deal in the Software without restriction,
+ including without limitation the rights to use, copy, modify, merge,
+ publish, distribute, sublicense, and/or sell copies of the Software,
+ and to permit persons to whom the Software is furnished to do so,
+ subject to the following conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ SOFTWARE.
+
+
+ Compilation for Test:
+
+ GNU:
+ cc -DUSE_GNU -DDEBUG getopt_long.c -o test_getopt_long_gnu
+
+ not GNU:
+ cc -I. -DDEBUG getopt_long.c -o test_getopt_long
+
+ ./test_getopt_long
+ ./test_getopt_long_gnu
+
+ BUGS:
+ * not implemented any features for getopt() and getopt_long().
+*/
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#if DEBUG
+static int
+puts_argv(char **argv)
+{
+ int i;
+
+ for (i = 0; argv[i]; i++) {
+ if (i) printf(" ");
+
+ printf("%s", argv[i]);
+ }
+ printf("\n");
+
+ return 0;
+}
+#endif
+
+#ifndef USE_GNU
+#include <stdio.h>
+#include "getopt_long.h"
+
+char *optarg;
+int optind;
+
+int opterr;
+int optopt;
+
+/*
+ return value 0: no option (include '-')
+ 1: short option like '-x'
+ 2: long option like '--xxx' and just '--'
+*/
+static int
+is_option(char *arg)
+{
+ if (arg[0] == '-') {
+ switch (arg[1]) {
+ case 0: /* just "-" */
+ return 0;
+ case '-': /* long option (include just "--")*/
+ return 2;
+ default: /* short option */
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int
+insert_argv(char **argv, int src, int dest)
+{
+ int i;
+ char *tmp = argv[src];
+
+ if (src > dest) {
+ for (i = src; i > dest; i--)
+ argv[i] = argv[i-1];
+ }
+ if (src < dest) {
+ for (i = src; i < dest; i++)
+ argv[i] = argv[i+1];
+ }
+
+ argv[dest] = tmp;
+
+ return 0;
+}
+
+static int
+search_longopt(char *arg, struct option *longopts)
+{
+ int i, found = -1;
+ int len;
+ for (len = 0; arg[len] && arg[len] != '='; len++)
+ ;
+
+ for (i = 0; longopts[i].name; i++) {
+ if (strncmp(arg, longopts[i].name, len) == 0) {
+ if (found != -1)
+ return -1; /* found some candidate */
+ found = i;
+ }
+ }
+ return found;
+}
+
+/*
+ * implemented my extention feature.
+ * optional 1 byte argument with [...]
+ * e.g.) shortopts = "a[0123]b"
+ * accepts "-a0 -a1b" (same as "-a0 -a1 -b")
+ */
+static int
+has_argument_short(char *arg, const char *shortopts)
+{
+ int i;
+ int open_bracket = 0;
+ for (i = 0; shortopts[i]; i++) {
+ switch (shortopts[i]) {
+ case '[':
+ open_bracket++;
+ continue;
+ case ']':
+ if (open_bracket <= 0) {
+ fprintf(stderr, "getopt_long() -- unbalanced bracket in short options");
+ return -1;
+ }
+ open_bracket--;
+ continue;
+ }
+ if (open_bracket) continue;
+ if (*arg != shortopts[i]) continue;
+
+ switch (shortopts[i+1]) {
+ case ':':
+ if (shortopts[i+2] != ':') {
+ if (arg[1])
+ return 1; /* following string is argument */
+ else
+ return 2; /* next argv is argument */
+ }
+ else {
+ /* '::' means optional argument (GNU extention) */
+ if (arg[1])
+ return 1;
+ else
+ return 0; /* no argument */
+ }
+ case '[':
+ if (arg[1] == '\0')
+ return 0; /* no argument */
+ /* my extention */
+ for (i++; shortopts[i] && shortopts[i] != ']'; i++) {
+ if (arg[1] == shortopts[i])
+ return 3; /* has 1 byte argument */
+ }
+ if (!shortopts[i]) {
+ fprintf(stderr, "getopt_long() -- unbalanced bracket in short options");
+ return -1;
+ }
+ break;
+ default:
+ return 0; /* no argument */
+ }
+ }
+ /* Invalid option */
+ return -1;
+}
+
+static int
+has_argument_long(char *arg, struct option *longopts)
+{
+ int i;
+
+ i = search_longopt(arg, longopts);
+ if (i == -1) {
+ /* Invalid option */
+ return -1;
+ }
+ else {
+ int len = strlen(arg);
+ char *p = strchr(arg, '=');
+ if (p) {
+ len = p - arg;
+ }
+
+ switch (longopts[i].has_arg) {
+ case no_argument:
+ return 0;
+ case required_argument:
+ if (arg[len] == '=')
+ return 1;
+ else
+ return 2;
+ case optional_argument:
+ if (arg[len] == '=')
+ return 1;
+ else
+ return 0;
+ default:
+ assert(0);
+ }
+ }
+}
+
+/*
+ -1: no option
+ 0: no argument
+ 1: has argument in this argv
+ 2: has argument in next argv
+ 3: has 1 byte argument in this argv
+*/
+static int
+has_argument(char *arg,
+ const char *shortopts,
+ struct option *longopts)
+{
+ int i, n;
+
+ switch (is_option(arg)) {
+ case 0: /* no option */
+ return -1;
+ case 1:
+ /* short option */
+ n = -1;
+ for (i = 1; arg[i]; i++) {
+ n = has_argument_short(arg+i, shortopts);
+ if (n == 0 && arg[i+1]) continue;
+ if (n == 3 && arg[i+2]) { i++; continue; }
+ break;
+ }
+ return n;
+ case 2:
+ /* long option */
+ return has_argument_long(arg+2, longopts);
+ break;
+ default:
+ assert(0);
+ }
+}
+
+int
+getopt_long(int argc, char **argv,
+ const char *shortopts,
+ struct option *longopts,
+ int *indexptr)
+{
+ char *opt;
+ int i;
+ static int shortoptind;
+ static int no_optind = 0;
+
+ if (optind == 0) { /* skip first argument (command name) */
+ optind++;
+ no_optind = 0;
+ shortoptind = 0;
+ }
+
+ optarg = 0;
+
+ if (no_optind && !shortoptind) {
+ while (!is_option(argv[no_optind]))
+ insert_argv(argv, no_optind, optind-1);
+
+ if (has_argument(argv[no_optind], shortopts, longopts) == 2)
+ no_optind += 2;
+ else
+ no_optind++;
+
+ if (argv[optind] && strcmp(argv[optind], "--") == 0) {
+ while (!is_option(argv[no_optind]))
+ insert_argv(argv, no_optind, optind);
+ optind = no_optind;
+ no_optind = 0;
+ }
+ }
+
+ if (optind >= argc)
+ goto end_of_option;
+
+ retry:
+ /*
+ puts_argv(&argv[optind]);
+ */
+ opt = argv[optind];
+ if (shortoptind == 0 && is_option(opt) == 1) {
+ shortoptind++;
+ }
+
+ if (shortoptind) {
+ /* short option */
+ char *p = &opt[shortoptind];
+
+ if (*p == '\0')
+ assert(0);
+
+ switch (has_argument_short(p, shortopts)) {
+ case 0:
+ /* no argument */
+ optarg = 0;
+
+ shortoptind++;
+ if (opt[shortoptind] == '\0')
+ optind++, shortoptind = 0;
+ return *p;
+ case 1:
+ /* following character is argument */
+ optind++, shortoptind = 0;
+ optarg = &p[1];
+ return *p;
+ case 2:
+ /* next argv is argument */
+ optind++, shortoptind = 0;
+ optarg = argv[optind++];
+ return *p;
+ case 3:
+ /* has 1 byte argument */
+ optarg = &p[1];
+ if (p[2] == 0)
+ optind++, shortoptind = 0;
+ else
+ shortoptind += 2;
+ return *p;
+ default:
+ /* Invalid option */
+ if (opterr)
+ fprintf(stderr,
+ "%s: invalid option -- %c\n",
+ argv[0],
+ *p);
+
+ optind++, shortoptind = 0;
+ optopt = *p;
+ return '?';
+ }
+ }
+ else if (opt[0] == '-' && opt[1] == '-') {
+ /* long option */
+
+ if (opt[2] == '\0') {
+ /* end of command line switch */
+ optind++;
+ return -1;
+ }
+
+ opt += 2;
+
+ i = search_longopt(opt, longopts);
+ if (i == -1) {
+ optind++;
+ optopt = 0;
+ return '?';
+ }
+ else {
+ int len = strlen(opt);
+ char *p = strchr(opt, '=');
+ if (p) {
+ len = p - opt;
+ }
+
+ switch (longopts[i].has_arg) {
+ case no_argument:
+ break;
+ case required_argument:
+ if (opt[len] == '=')
+ optarg = opt + len + 1;
+ else {
+ optind++;
+ optarg = argv[optind];
+ if (optarg == 0) {
+ if (opterr)
+ fprintf(stderr,
+ "%s: option `--%s' requires an argument\n",
+ argv[0],
+ opt);
+
+ optopt = 0;
+ return '?'; /* no argument */
+ }
+ }
+ break;
+ case optional_argument:
+ if (opt[len] == '=')
+ optarg = opt + len + 1;
+ else {
+ optarg = 0;
+ }
+ break;
+ default:
+ break;
+ }
+
+ *indexptr = i;
+ optind++;
+ if (longopts[i].flag) {
+ *longopts[i].flag = longopts[i].val;
+ return 0;
+ }
+ else {
+ return longopts[i].val;
+ }
+ }
+
+ optind++;
+ optopt = 0;
+ return '?';
+ }
+
+ /* not option */
+ if (no_optind == 0)
+ no_optind = optind;
+
+ for (i = optind; argv[i]; i++) {
+ if (is_option(argv[i])) {
+ optind = i;
+ goto retry;
+ }
+ }
+
+ end_of_option:
+ if (no_optind) {
+ optind = no_optind;
+ no_optind = 0;
+ }
+
+ return -1;
+}
+#endif /* USE_GNU */
+
+#if DEBUG
+
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+
+#if USE_GNU
+#include <getopt.h> /* use GNU getopt_long() */
+#endif
+
+static int verbose_flag;
+static int option_index;
+int argc;
+char *argv[50];
+char **p;
+int c;
+static struct option long_options[] = {
+ {"verbose", no_argument, &verbose_flag, 1},
+ {"brief", no_argument, &verbose_flag, 0},
+ {"add", required_argument, 0, 'a'},
+ {"append", no_argument, 0, 0},
+ {"delete", required_argument, 0, 0},
+ {"create", optional_argument, 0, 0},
+ {"change", optional_argument, 0, 0},
+ {0, 0, 0, 0}
+};
+
+int
+call_getopt_long(int argc, char **argv,
+ const char *shortopts,
+ struct option *longopts,
+ int *indexptr)
+{
+ int c;
+ c = getopt_long(argc, argv, shortopts, longopts, indexptr);
+ puts_argv(argv);
+ printf("ret=%d(%c) option_index=%d ", c, c, option_index);
+ printf("optind=%d optarg=[%s] opterr=%d optopt=%d(%c)\n",
+ optind, optarg, opterr, optopt, optopt);
+ if (c == 0) {
+ struct option *opt;
+ opt = &longopts[*indexptr];
+ printf("long option: --%s has_arg=%d\n", opt->name, opt->has_arg);
+ if (opt->flag)
+ printf(" flag=[%8p] val=%d\n", opt->flag, *opt->flag);
+ }
+
+ return c;
+}
+
+void
+test1()
+{
+ optind = 0;
+ argc = 0;
+ p = argv;
+
+ argc++; *p++ = "command_name";
+ argc++; *p++ = "-a";
+ argc++; *p++ = "-bcd";
+ argc++; *p++ = "-d";
+ argc++; *p++ = "-e";
+ argc++; *p++ = "-f";
+ argc++; *p++ = "-g";
+ *p = 0;
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ assert(c == 'a');
+ assert(option_index == 0);
+ assert(optind == 2);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ assert(c == 'b');
+ assert(option_index == 0);
+ assert(optind == 2);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ assert(c == 'c');
+ assert(option_index == 0);
+ assert(optind == 3);
+ assert(optarg == &argv[2][3]);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ assert(c == 'd');
+ assert(option_index == 0);
+ assert(optind == 5);
+ assert(optarg == argv[4]);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ assert(c == '?');
+ assert(option_index == 0);
+ assert(optind == 6);
+ assert(optarg == 0);
+ assert(optopt == 'f');
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ assert(c == '?');
+ assert(option_index == 0);
+ assert(optind == 7);
+ assert(optarg == 0);
+ assert(optopt == 'g');
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ assert(c == -1);
+ assert(option_index == 0);
+ assert(optind == 7);
+ assert(optarg == 0);
+ assert(optopt == 'g'); /* no changed */
+}
+
+void
+test2()
+{
+ optind = 0;
+ argc = 0;
+ p = argv;
+
+ argc++; *p++ = "command_name";
+ argc++; *p++ = "--verbose";
+ argc++; *p++ = "--brief";
+ argc++; *p++ = "--add";
+ argc++; *p++ = "add_argument";
+ argc++; *p++ = "--add=add_argument";
+ argc++; *p++ = "--append";
+ argc++; *p++ = "--delete=del_argument";
+ argc++; *p++ = "--create=cre_argument";
+ argc++; *p++ = "--create";
+ argc++; *p++ = "files...";
+ *p = 0;
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ assert(c == 0);
+ assert(option_index == 0);
+ assert(optind == 2);
+ assert(optarg == 0);
+ assert(optopt == 'g'); /* no changed */
+ assert(strcmp(long_options[option_index].name, "verbose") == 0);
+ assert(*long_options[option_index].flag == 1);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ assert(c == 0);
+ assert(option_index == 1);
+ assert(optind == 3);
+ assert(optarg == 0);
+ assert(optopt == 'g'); /* no changed */
+ assert(strcmp(long_options[option_index].name, "brief") == 0);
+ assert(*long_options[option_index].flag == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ assert(c == 'a');
+ assert(option_index == 2);
+ assert(optind == 5);
+ assert(optarg == argv[4]);
+ assert(optopt == 'g'); /* no changed */
+ assert(strcmp(long_options[option_index].name, "add") == 0);
+ assert(long_options[option_index].flag == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ assert(c == 'a');
+ assert(option_index == 2);
+ assert(optind == 6);
+ assert(optarg == argv[5]+6);
+ assert(optopt == 'g'); /* no changed */
+ assert(strcmp(long_options[option_index].name, "add") == 0);
+ assert(long_options[option_index].flag == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ assert(c == 0);
+ assert(option_index == 3);
+ assert(optind == 7);
+ assert(optarg == 0);
+ assert(optopt == 'g'); /* no changed */
+ assert(strcmp(long_options[option_index].name, "append") == 0);
+ assert(long_options[option_index].flag == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ assert(c == 0);
+ assert(option_index == 4);
+ assert(optind == 8);
+ assert(optarg == argv[7]+9);
+ assert(optopt == 'g'); /* no changed */
+ assert(strcmp(long_options[option_index].name, "delete") == 0);
+ assert(long_options[option_index].flag == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ assert(c == 0);
+ assert(option_index == 5);
+ assert(optind == 9);
+ assert(optarg == argv[8]+9);
+ assert(optopt == 'g'); /* no changed */
+ assert(strcmp(long_options[option_index].name, "create") == 0);
+ assert(long_options[option_index].flag == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ assert(c == 0);
+ assert(option_index == 5);
+ assert(optind == 10);
+ assert(optarg == 0);
+ assert(optopt == 'g'); /* no changed */
+ assert(strcmp(long_options[option_index].name, "create") == 0);
+ assert(long_options[option_index].flag == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ assert(c == -1);
+ assert(option_index == 5); /* no changed */
+ assert(optind == 10);
+ assert(optarg == 0);
+ assert(optopt == 'g'); /* no changed */
+ assert(strcmp(argv[optind], "files...") == 0);
+
+}
+
+void
+test3()
+{
+ optind = 0;
+ argc = 0;
+ p = argv;
+
+ argc++; *p++ = "command_name";
+ argc++; *p++ = "--delete"; /* required argument has no argument */
+ *p = 0;
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ assert(c == '?');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 2); /* changed */
+ assert(optarg == 0);
+ assert(optopt == 0); /* changed */
+ assert(argv[optind] == 0);
+
+ /* */
+ optind = 0;
+ argc = 0;
+ p = argv;
+
+ argc++; *p++ = "command_name";
+ argc++; *p++ = "--file"; /* not option */
+ *p = 0;
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ assert(c == '?');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 2);
+ assert(optarg == 0);
+ assert(optopt == 0);
+ assert(argv[optind] == 0);
+}
+
+void
+test4()
+{
+ optind = 0;
+ argc = 0;
+ p = argv;
+
+ argc++; *p++ = "command_name";
+ argc++; *p++ = "-a";
+ argc++; *p++ = "a1";
+ argc++; *p++ = "a2";
+ argc++; *p++ = "-b";
+ argc++; *p++ = "b";
+ argc++; *p++ = "-efg"; /* some options in a argument */
+ argc++; *p++ = "g";
+ argc++; *p++ = "-c";
+ argc++; *p++ = "c";
+ argc++; *p++ = "d";
+ *p = 0;
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:efg:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "a1") == 0);
+ assert(strcmp(*p++, "a2") == 0);
+ assert(strcmp(*p++, "-b") == 0);
+ assert(strcmp(*p++, "b") == 0);
+ assert(strcmp(*p++, "-efg") == 0);
+ assert(strcmp(*p++, "g") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "c") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'a');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 2);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:efg:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "a1") == 0);
+ assert(strcmp(*p++, "a2") == 0);
+ assert(strcmp(*p++, "-b") == 0);
+ assert(strcmp(*p++, "b") == 0);
+ assert(strcmp(*p++, "-efg") == 0);
+ assert(strcmp(*p++, "g") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "c") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'b');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 5);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:efg:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-b") == 0);
+ assert(strcmp(*p++, "a1") == 0);
+ assert(strcmp(*p++, "a2") == 0);
+ assert(strcmp(*p++, "b") == 0);
+ assert(strcmp(*p++, "-efg") == 0);
+ assert(strcmp(*p++, "g") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "c") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'e');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 6);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:efg:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-b") == 0);
+ assert(strcmp(*p++, "a1") == 0);
+ assert(strcmp(*p++, "a2") == 0);
+ assert(strcmp(*p++, "b") == 0);
+ assert(strcmp(*p++, "-efg") == 0);
+ assert(strcmp(*p++, "g") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "c") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'f');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 6);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:efg:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-b") == 0);
+ assert(strcmp(*p++, "a1") == 0);
+ assert(strcmp(*p++, "a2") == 0);
+ assert(strcmp(*p++, "b") == 0);
+ assert(strcmp(*p++, "-efg") == 0);
+ assert(strcmp(*p++, "g") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "c") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'g');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 8);
+ assert(optarg == argv[7]);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:efg:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-b") == 0);
+ assert(strcmp(*p++, "-efg") == 0);
+ assert(strcmp(*p++, "g") == 0);
+ assert(strcmp(*p++, "a1") == 0);
+ assert(strcmp(*p++, "a2") == 0);
+ assert(strcmp(*p++, "b") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "c") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'c');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 10);
+ assert(optarg == argv[9]);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:efg:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-b") == 0);
+ assert(strcmp(*p++, "-efg") == 0);
+ assert(strcmp(*p++, "g") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "c") == 0);
+ assert(strcmp(*p++, "a1") == 0);
+ assert(strcmp(*p++, "a2") == 0);
+ assert(strcmp(*p++, "b") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(*p == 0);
+
+ assert(c == -1);
+ assert(option_index == 5); /* no changed */
+ assert(optind == 7);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+}
+
+void
+test5()
+{
+ optind = 0;
+ argc = 0;
+ p = argv;
+
+ argc++; *p++ = "command_name";
+ argc++; *p++ = "-a";
+ argc++; *p++ = "-";
+ argc++; *p++ = "-b";
+ *p = 0;
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(strcmp(*p++, "-b") == 0);
+ assert(*p == 0);
+
+ assert(c == 'a');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 2);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(strcmp(*p++, "-b") == 0);
+ assert(*p == 0);
+
+ assert(c == 'b');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 4);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-b") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(*p == 0);
+
+ assert(c == -1);
+ assert(option_index == 5); /* no changed */
+ assert(optind == 3);
+ assert(optarg == 0);
+ assert(optopt == 0);
+}
+
+void
+test6()
+{
+ optind = 0;
+ argc = 0;
+ p = argv;
+
+ argc++; *p++ = "command_name";
+ argc++; *p++ = "-a";
+ argc++; *p++ = "-";
+ argc++; *p++ = "-";
+ argc++; *p++ = "-b";
+ *p = 0;
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(strcmp(*p++, "-b") == 0);
+ assert(*p == 0);
+
+ assert(c == 'a');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 2);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(strcmp(*p++, "-b") == 0);
+ assert(*p == 0);
+
+ assert(c == 'b');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 5);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-b") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(*p == 0);
+
+ assert(c == -1);
+ assert(option_index == 5); /* no changed */
+ assert(optind == 3);
+ assert(optarg == 0);
+ assert(optopt == 0);
+}
+
+void
+test7()
+{
+ optind = 0;
+ argc = 0;
+ p = argv;
+
+ argc++; *p++ = "command_name";
+ argc++; *p++ = "-a";
+ argc++; *p++ = "-";
+ argc++; *p++ = "-";
+ argc++; *p++ = "-c";
+ argc++; *p++ = "c";
+ *p = 0;
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "c") == 0);
+ assert(*p == 0);
+
+ assert(c == 'a');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 2);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "c") == 0);
+ assert(*p == 0);
+
+ assert(c == 'c');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 6);
+ assert(optarg == argv[5]);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "c") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(*p == 0);
+
+ assert(c == -1);
+ assert(option_index == 5); /* no changed */
+ assert(optind == 4);
+ assert(optarg == 0);
+ assert(optopt == 0);
+}
+
+void
+test8()
+{
+ optind = 0;
+ argc = 0;
+ p = argv;
+
+ argc++; *p++ = "command_name";
+ argc++; *p++ = "-a";
+ argc++; *p++ = "-c";
+ argc++; *p++ = "c";
+ argc++; *p++ = "--";
+ argc++; *p++ = "-d";
+ argc++; *p++ = "d";
+ *p = 0;
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "c") == 0);
+ assert(strcmp(*p++, "--") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'a');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 2);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "c") == 0);
+ assert(strcmp(*p++, "--") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'c');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 4);
+ assert(optarg == argv[3]);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "c") == 0);
+ assert(strcmp(*p++, "--") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(*p == 0);
+
+ assert(c == -1);
+ assert(option_index == 5); /* no changed */
+ assert(optind == 5);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "c") == 0);
+ assert(strcmp(*p++, "--") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'd');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 7);
+ assert(optarg == argv[6]);
+ assert(optopt == 0);
+}
+
+void
+test9()
+{
+ optind = 0;
+ argc = 0;
+ p = argv;
+
+ argc++; *p++ = "command_name";
+ argc++; *p++ = "-a";
+ argc++; *p++ = "-";
+ argc++; *p++ = "-";
+ argc++; *p++ = "-c";
+ argc++; *p++ = "c";
+ argc++; *p++ = "--";
+ argc++; *p++ = "-d";
+ argc++; *p++ = "d";
+ *p = 0;
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "c") == 0);
+ assert(strcmp(*p++, "--") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'a');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 2);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "c") == 0);
+ assert(strcmp(*p++, "--") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'c');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 6);
+ assert(optarg == argv[5]);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "c") == 0);
+ assert(strcmp(*p++, "--") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(*p == 0);
+
+ assert(c == -1);
+ assert(option_index == 5); /* no changed */
+ assert(optind == 5);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc:d:", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "c") == 0);
+ assert(strcmp(*p++, "--") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(strcmp(*p++, "-") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'd');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 9);
+ assert(optarg == argv[8]);
+ assert(optopt == 0);
+}
+
+void
+test10()
+{
+ optind = 0;
+ argc = 0;
+ p = argv;
+
+ argc++; *p++ = "command_name";
+ argc++; *p++ = "-a";
+ argc++; *p++ = "-cc";
+ argc++; *p++ = "-d";
+ argc++; *p++ = "d";
+ argc++; *p++ = "-c"; /* no argument */
+ argc++; *p++ = "-d"; /* at last */
+ *p = 0;
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-cc") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'a');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 2);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-cc") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'c');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 3);
+ assert(optarg == argv[2]+2);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-cc") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'd');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 4);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-cc") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'c');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 6);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-cc") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'd');
+ assert(option_index == 5); /* no changed */
+ assert(optind == 7);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-cc") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "-c") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(*p == 0);
+
+ assert(c == -1);
+ assert(option_index == 5); /* no changed */
+ assert(optind == 6);
+ assert(optarg == 0);
+ assert(optopt == 0);
+}
+
+void
+test11()
+{
+ optind = 0;
+ argc = 0;
+ p = argv;
+
+ argc++; *p++ = "command_name";
+ argc++; *p++ = "--verbose";
+ argc++; *p++ = "--create=c";
+ argc++; *p++ = "--change";
+ argc++; *p++ = "d";
+ argc++; *p++ = "--create"; /* no argument */
+ argc++; *p++ = "--change"; /* at last */
+ *p = 0;
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "--verbose") == 0);
+ assert(strcmp(*p++, "--create=c") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "--create") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(*p == 0);
+
+ assert(c == 0);
+ assert(option_index == 0);
+ assert(optind == 2);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "--verbose") == 0);
+ assert(strcmp(*p++, "--create=c") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "--create") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(*p == 0);
+
+ assert(c == 0);
+ assert(option_index == 5);
+ assert(optind == 3);
+ assert(optarg == argv[2]+9);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "--verbose") == 0);
+ assert(strcmp(*p++, "--create=c") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "--create") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(*p == 0);
+
+ assert(c == 0);
+ assert(option_index == 6);
+ assert(optind == 4);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "--verbose") == 0);
+ assert(strcmp(*p++, "--create=c") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "--create") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(*p == 0);
+
+ assert(c == 0);
+ assert(option_index == 5);
+ assert(optind == 6);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "--verbose") == 0);
+ assert(strcmp(*p++, "--create=c") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(strcmp(*p++, "--create") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(*p == 0);
+
+ assert(c == 0);
+ assert(option_index == 6);
+ assert(optind == 7);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "--verbose") == 0);
+ assert(strcmp(*p++, "--create=c") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(strcmp(*p++, "--create") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(*p == 0);
+
+ assert(c == -1);
+ assert(option_index == 6);
+ assert(optind == 6);
+ assert(optarg == 0);
+ assert(optopt == 0);
+}
+
+void
+test12()
+{
+ optind = 0;
+ argc = 0;
+ p = argv;
+
+ argc++; *p++ = "command_name";
+ argc++; *p++ = "--verbose";
+ argc++; *p++ = "--create=c";
+ argc++; *p++ = "files...";
+ argc++; *p++ = "--delete"; /* required argument */
+ argc++; *p++ = "d";
+ argc++; *p++ = "--create"; /* no argument */
+ argc++; *p++ = "--change"; /* at last */
+ *p = 0;
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "--verbose") == 0);
+ assert(strcmp(*p++, "--create=c") == 0);
+ assert(strcmp(*p++, "files...") == 0);
+ assert(strcmp(*p++, "--delete") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "--create") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(*p == 0);
+
+ assert(c == 0);
+ assert(option_index == 0);
+ assert(optind == 2);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "--verbose") == 0);
+ assert(strcmp(*p++, "--create=c") == 0);
+ assert(strcmp(*p++, "files...") == 0);
+ assert(strcmp(*p++, "--delete") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "--create") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(*p == 0);
+
+ assert(c == 0);
+ assert(option_index == 5);
+ assert(optind == 3);
+ assert(optarg == argv[2]+9);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "--verbose") == 0);
+ assert(strcmp(*p++, "--create=c") == 0);
+ assert(strcmp(*p++, "files...") == 0);
+ assert(strcmp(*p++, "--delete") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "--create") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(*p == 0);
+
+ assert(c == 0);
+ assert(option_index == 4);
+ assert(optind == 6);
+ assert(optarg == argv[5]);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "--verbose") == 0);
+ assert(strcmp(*p++, "--create=c") == 0);
+ assert(strcmp(*p++, "--delete") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "files...") == 0);
+ assert(strcmp(*p++, "--create") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(*p == 0);
+
+ assert(c == 0);
+ assert(option_index == 5);
+ assert(optind == 7);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "--verbose") == 0);
+ assert(strcmp(*p++, "--create=c") == 0);
+ assert(strcmp(*p++, "--delete") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "--create") == 0);
+ assert(strcmp(*p++, "files...") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(*p == 0);
+
+ assert(c == 0);
+ assert(option_index == 6);
+ assert(optind == 8);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "--verbose") == 0);
+ assert(strcmp(*p++, "--create=c") == 0);
+ assert(strcmp(*p++, "--delete") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "--create") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(strcmp(*p++, "files...") == 0);
+ assert(*p == 0);
+
+ assert(c == -1);
+ assert(option_index == 6);
+ assert(optind == 7);
+ assert(optarg == 0);
+ assert(optopt == 0);
+}
+
+void
+test13()
+{
+ optind = 0;
+ argc = 0;
+ p = argv;
+
+ argc++; *p++ = "command_name";
+ argc++; *p++ = "--verbose";
+ argc++; *p++ = "--create=c";
+ argc++; *p++ = "files...";
+ argc++; *p++ = "--delete";
+ argc++; *p++ = "d";
+ argc++; *p++ = "--"; /* option terminator */
+ argc++; *p++ = "--change";
+ *p = 0;
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "--verbose") == 0);
+ assert(strcmp(*p++, "--create=c") == 0);
+ assert(strcmp(*p++, "files...") == 0);
+ assert(strcmp(*p++, "--delete") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "--") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(*p == 0);
+
+ assert(c == 0);
+ assert(option_index == 0);
+ assert(optind == 2);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "--verbose") == 0);
+ assert(strcmp(*p++, "--create=c") == 0);
+ assert(strcmp(*p++, "files...") == 0);
+ assert(strcmp(*p++, "--delete") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "--") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(*p == 0);
+
+ assert(c == 0);
+ assert(option_index == 5);
+ assert(optind == 3);
+ assert(optarg == argv[2]+9);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "--verbose") == 0);
+ assert(strcmp(*p++, "--create=c") == 0);
+ assert(strcmp(*p++, "files...") == 0);
+ assert(strcmp(*p++, "--delete") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "--") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(*p == 0);
+
+ assert(c == 0);
+ assert(option_index == 4);
+ assert(optind == 6);
+ assert(optarg == argv[5]);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc::d::", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "--verbose") == 0);
+ assert(strcmp(*p++, "--create=c") == 0);
+ assert(strcmp(*p++, "--delete") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "--") == 0);
+ assert(strcmp(*p++, "files...") == 0);
+ assert(strcmp(*p++, "--change") == 0);
+ assert(*p == 0);
+
+ assert(c == -1);
+ assert(option_index == 4);
+ assert(optind == 6);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+}
+
+void
+test14()
+{
+ optind = 0;
+ argc = 0;
+ p = argv;
+
+ argc++; *p++ = "command_name";
+ argc++; *p++ = "-o5";
+ argc++; *p++ = "files...";
+ *p = 0;
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "o[567]", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-o5") == 0);
+ assert(strcmp(*p++, "files...") == 0);
+ assert(*p == 0);
+
+ assert(c == 'o');
+ assert(option_index == 4); /* no changed */
+ assert(optind == 2);
+ assert(optarg == argv[1]+2);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc[cde]d[fgh]", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-o5") == 0);
+ assert(strcmp(*p++, "files...") == 0);
+ assert(*p == 0);
+
+ assert(c == -1);
+ assert(option_index == 4); /* no changed */
+ assert(optind == 2);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+}
+
+void
+test15()
+{
+ optind = 0;
+ argc = 0;
+ p = argv;
+
+ argc++; *p++ = "command_name";
+ argc++; *p++ = "-a";
+ argc++; *p++ = "-ccd";
+ argc++; *p++ = "-ce";
+ argc++; *p++ = "-d";
+ argc++; *p++ = "d";
+ argc++; *p++ = "-cdd";
+ argc++; *p++ = "-d";
+ *p = 0;
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc[cde]d[fgh]", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-ccd") == 0);
+ assert(strcmp(*p++, "-ce") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "-cdd") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'a');
+ assert(option_index == 4); /* no changed */
+ assert(optind == 2);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc[cde]d[fgh]", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-ccd") == 0);
+ assert(strcmp(*p++, "-ce") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "-cdd") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'c');
+ assert(option_index == 4); /* no changed */
+ assert(optind == 2);
+ assert(optarg == argv[2]+2);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc[cde]d[fgh]", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-ccd") == 0);
+ assert(strcmp(*p++, "-ce") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "-cdd") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'd');
+ assert(option_index == 4); /* no changed */
+ assert(optind == 3);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc[cde]d[fgh]", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-ccd") == 0);
+ assert(strcmp(*p++, "-ce") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "-cdd") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'c');
+ assert(option_index == 4); /* no changed */
+ assert(optind == 4);
+ assert(optarg == argv[3]+2);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc[cde]d[fgh]", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-ccd") == 0);
+ assert(strcmp(*p++, "-ce") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "-cdd") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'd');
+ assert(option_index == 4); /* no changed */
+ assert(optind == 5);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc[cde]d[fgh]", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-ccd") == 0);
+ assert(strcmp(*p++, "-ce") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "-cdd") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'c');
+ assert(option_index == 4); /* no changed */
+ assert(optind == 6);
+ assert(optarg == argv[6]+2);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc[cde]d[fgh]", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-ccd") == 0);
+ assert(strcmp(*p++, "-ce") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "-cdd") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'd');
+ assert(option_index == 4); /* no changed */
+ assert(optind == 7);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc[cde]d[fgh]", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-ccd") == 0);
+ assert(strcmp(*p++, "-ce") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "-cdd") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(*p == 0);
+
+ assert(c == 'd');
+ assert(option_index == 4); /* no changed */
+ assert(optind == 8);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+
+ /*************************/
+ c = call_getopt_long(argc, argv, "abc[cde]d[fgh]", long_options, &option_index);
+
+ p = argv;
+ assert(strcmp(*p++, "command_name") == 0);
+ assert(strcmp(*p++, "-a") == 0);
+ assert(strcmp(*p++, "-ccd") == 0);
+ assert(strcmp(*p++, "-ce") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "-cdd") == 0);
+ assert(strcmp(*p++, "-d") == 0);
+ assert(strcmp(*p++, "d") == 0);
+ assert(*p == 0);
+
+ assert(c == -1);
+ assert(option_index == 4); /* no changed */
+ assert(optind == 7);
+ assert(optarg == 0);
+ assert(optopt == 0);
+
+
+}
+
+int
+main()
+{
+ opterr = 0;
+ optopt = 0;
+
+ test1();
+ test2();
+ test3();
+ test4();
+ test5();
+ test6();
+ test7();
+ test8();
+ test9();
+ test10();
+ test11();
+ test12();
+ test13();
+#ifndef USE_GNU
+ test14();
+ test15();
+#endif
+
+ return 0;
+}
+#endif
/***********************************************************************/
int main(int argc, char *argv[]){
MothurOut* m = MothurOut::getInstance();
- try {
-
+ try {
signal(SIGINT, ctrlc_handler );
time_t ltime = time(NULL); /* calendar time */
Engine* mothur;
bool bail = 0;
string input;
-
+
if(argc>1){
input = argv[1];
+++ /dev/null
-/*
- A C-program for MT19937, with initialization improved 2002/1/26.
- Coded by Takuji Nishimura and Makoto Matsumoto.
-
- Before using, initialize the state by using init_genrand(seed)
- or init_by_array(init_key, key_length).
-
- Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
- All rights reserved.
-
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions
- are met:
-
- 1. Redistributions of source code must retain the above copyright
- notice, this list of conditions and the following disclaimer.
-
- 2. Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
-
- 3. The names of its contributors may not be used to endorse or promote
- products derived from this software without specific prior written
- permission.
-
- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-
- Any feedback is very welcome.
- http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
- email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
-*/
-
-#include <stdio.h>
-
-#include "prng.h"
-
-/* Period parameters */
-#define N 624
-#define M 397
-#define MATRIX_A 0x9908b0dfUL /* constant vector a */
-#define UPPER_MASK 0x80000000UL /* most significant w-r bits */
-#define LOWER_MASK 0x7fffffffUL /* least significant r bits */
-
-#define NJ_RAND_MAX 0x7fffffffUL
-
-
-static unsigned long mt[N]; /* the array for the state vector */
-static int mti=N+1; /* mti==N+1 means mt[N] is not initialized */
-
-/* initializes mt[N] with a seed */
-void init_genrand(unsigned long s)
-{
- mt[0]= s & 0xffffffffUL;
- for (mti=1; mti<N; mti++) {
- mt[mti] =
- (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
- /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
- /* In the previous versions, MSBs of the seed affect */
- /* only MSBs of the array mt[]. */
- /* 2002/01/09 modified by Makoto Matsumoto */
- mt[mti] &= 0xffffffffUL;
- /* for >32 bit machines */
- }
-}
-
-/* initialize by an array with array-length */
-/* init_key is the array for initializing keys */
-/* key_length is its length */
-/* slight change for C++, 2004/2/26 */
-void init_by_array(unsigned long init_key[], int key_length)
-{
- int i, j, k;
- init_genrand(19650218UL);
- i=1; j=0;
- k = (N>key_length ? N : key_length);
- for (; k; k--) {
- mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1664525UL))
- + init_key[j] + j; /* non linear */
- mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
- i++; j++;
- if (i>=N) { mt[0] = mt[N-1]; i=1; }
- if (j>=key_length) j=0;
- }
- for (k=N-1; k; k--) {
- mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1566083941UL))
- - i; /* non linear */
- mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
- i++;
- if (i>=N) { mt[0] = mt[N-1]; i=1; }
- }
-
- mt[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */
-}
-
-/* generates a random number on [0,0xffffffff]-interval */
-unsigned long genrand_int32(void)
-{
- unsigned long y;
- static unsigned long mag01[2]={0x0UL, MATRIX_A};
- /* mag01[x] = x * MATRIX_A for x=0,1 */
-
- if (mti >= N) { /* generate N words at one time */
- int kk;
-
- if (mti == N+1) /* if init_genrand() has not been called, */
- init_genrand(5489UL); /* a default initial seed is used */
-
- for (kk=0;kk<N-M;kk++) {
- y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
- mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & 0x1UL];
- }
- for (;kk<N-1;kk++) {
- y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
- mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & 0x1UL];
- }
- y = (mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK);
- mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & 0x1UL];
-
- mti = 0;
- }
-
- y = mt[mti++];
-
- /* Tempering */
- y ^= (y >> 11);
- y ^= (y << 7) & 0x9d2c5680UL;
- y ^= (y << 15) & 0xefc60000UL;
- y ^= (y >> 18);
-
- return y;
-}
-
-/* generates a random number on [0,0x7fffffff]-interval */
-long int genrand_int31(void)
-{
- return (long)(genrand_int32()>>1);
-}
-
-/* These real versions are due to Isaku Wada, 2002/01/09 added */
-
-/* generates a random number on [0,1]-real-interval */
-double genrand_real1(void)
-{
- return genrand_int32()*(1.0/4294967295.0);
- /* divided by 2^32-1 */
-}
-
-/* generates a random number on [0,1)-real-interval */
-double genrand_real2(void)
-{
- return genrand_int32()*(1.0/4294967296.0);
- /* divided by 2^32 */
-}
-
-/* generates a random number on (0,1)-real-interval */
-double genrand_real3(void)
-{
- return (((double)genrand_int32()) + 0.5)*(1.0/4294967296.0);
- /* divided by 2^32 */
-}
-
-/* generates a random number on [0,1) with 53-bit resolution*/
-double genrand_res53(void)
-{
- unsigned long a=genrand_int32()>>5, b=genrand_int32()>>6;
- return(a*67108864.0+b)*(1.0/9007199254740992.0);
-}
-
-
-
-
-/*
- * NJ_genrand_int31_top() - Returns an int in the range 0..top
- *
- * This function attempts to remove bias in selecting random
- * integers in a range.
- *
- */
-long int
-NJ_genrand_int31_top(long int top) {
-
- long int overflow;
- long int r;
- long int retval;
-
- if(top <= 0) {
- return(0);
- } else {
- overflow = (NJ_RAND_MAX / top) * top;
- }
-
- while(1) {
- r = genrand_int31();
- if(r < overflow) {
- break;
- }
- }
-
- retval = r % top;
-
- return(retval);
-}
-
-
-
-
-
-
-
-
--- /dev/null
+/*
+ A C-program for MT19937, with initialization improved 2002/1/26.
+ Coded by Takuji Nishimura and Makoto Matsumoto.
+
+ Before using, initialize the state by using init_genrand(seed)
+ or init_by_array(init_key, key_length).
+
+ Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ 3. The names of its contributors may not be used to endorse or promote
+ products derived from this software without specific prior written
+ permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+ Any feedback is very welcome.
+ http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html
+ email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space)
+*/
+
+#include <stdio.h>
+
+#include "prng.h"
+
+/* Period parameters */
+#define N 624
+#define M 397
+#define MATRIX_A 0x9908b0dfUL /* constant vector a */
+#define UPPER_MASK 0x80000000UL /* most significant w-r bits */
+#define LOWER_MASK 0x7fffffffUL /* least significant r bits */
+
+#define NJ_RAND_MAX 0x7fffffffUL
+
+
+static unsigned long mt[N]; /* the array for the state vector */
+static int mti=N+1; /* mti==N+1 means mt[N] is not initialized */
+
+/* initializes mt[N] with a seed */
+void init_genrand(unsigned long s)
+{
+ mt[0]= s & 0xffffffffUL;
+ for (mti=1; mti<N; mti++) {
+ mt[mti] =
+ (1812433253UL * (mt[mti-1] ^ (mt[mti-1] >> 30)) + mti);
+ /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */
+ /* In the previous versions, MSBs of the seed affect */
+ /* only MSBs of the array mt[]. */
+ /* 2002/01/09 modified by Makoto Matsumoto */
+ mt[mti] &= 0xffffffffUL;
+ /* for >32 bit machines */
+ }
+}
+
+/* initialize by an array with array-length */
+/* init_key is the array for initializing keys */
+/* key_length is its length */
+/* slight change for C++, 2004/2/26 */
+void init_by_array(unsigned long init_key[], int key_length)
+{
+ int i, j, k;
+ init_genrand(19650218UL);
+ i=1; j=0;
+ k = (N>key_length ? N : key_length);
+ for (; k; k--) {
+ mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1664525UL))
+ + init_key[j] + j; /* non linear */
+ mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
+ i++; j++;
+ if (i>=N) { mt[0] = mt[N-1]; i=1; }
+ if (j>=key_length) j=0;
+ }
+ for (k=N-1; k; k--) {
+ mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1566083941UL))
+ - i; /* non linear */
+ mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
+ i++;
+ if (i>=N) { mt[0] = mt[N-1]; i=1; }
+ }
+
+ mt[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */
+}
+
+/* generates a random number on [0,0xffffffff]-interval */
+unsigned long genrand_int32(void)
+{
+ unsigned long y;
+ static unsigned long mag01[2]={0x0UL, MATRIX_A};
+ /* mag01[x] = x * MATRIX_A for x=0,1 */
+
+ if (mti >= N) { /* generate N words at one time */
+ int kk;
+
+ if (mti == N+1) /* if init_genrand() has not been called, */
+ init_genrand(5489UL); /* a default initial seed is used */
+
+ for (kk=0;kk<N-M;kk++) {
+ y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
+ mt[kk] = mt[kk+M] ^ (y >> 1) ^ mag01[y & 0x1UL];
+ }
+ for (;kk<N-1;kk++) {
+ y = (mt[kk]&UPPER_MASK)|(mt[kk+1]&LOWER_MASK);
+ mt[kk] = mt[kk+(M-N)] ^ (y >> 1) ^ mag01[y & 0x1UL];
+ }
+ y = (mt[N-1]&UPPER_MASK)|(mt[0]&LOWER_MASK);
+ mt[N-1] = mt[M-1] ^ (y >> 1) ^ mag01[y & 0x1UL];
+
+ mti = 0;
+ }
+
+ y = mt[mti++];
+
+ /* Tempering */
+ y ^= (y >> 11);
+ y ^= (y << 7) & 0x9d2c5680UL;
+ y ^= (y << 15) & 0xefc60000UL;
+ y ^= (y >> 18);
+
+ return y;
+}
+
+/* generates a random number on [0,0x7fffffff]-interval */
+long int genrand_int31(void)
+{
+ return (long)(genrand_int32()>>1);
+}
+
+/* These real versions are due to Isaku Wada, 2002/01/09 added */
+
+/* generates a random number on [0,1]-real-interval */
+double genrand_real1(void)
+{
+ return genrand_int32()*(1.0/4294967295.0);
+ /* divided by 2^32-1 */
+}
+
+/* generates a random number on [0,1)-real-interval */
+double genrand_real2(void)
+{
+ return genrand_int32()*(1.0/4294967296.0);
+ /* divided by 2^32 */
+}
+
+/* generates a random number on (0,1)-real-interval */
+double genrand_real3(void)
+{
+ return (((double)genrand_int32()) + 0.5)*(1.0/4294967296.0);
+ /* divided by 2^32 */
+}
+
+/* generates a random number on [0,1) with 53-bit resolution*/
+double genrand_res53(void)
+{
+ unsigned long a=genrand_int32()>>5, b=genrand_int32()>>6;
+ return(a*67108864.0+b)*(1.0/9007199254740992.0);
+}
+
+
+
+
+/*
+ * NJ_genrand_int31_top() - Returns an int in the range 0..top
+ *
+ * This function attempts to remove bias in selecting random
+ * integers in a range.
+ *
+ */
+long int
+NJ_genrand_int31_top(long int top) {
+
+ long int overflow;
+ long int r;
+ long int retval;
+
+ if(top <= 0) {
+ return(0);
+ } else {
+ overflow = (NJ_RAND_MAX / top) * top;
+ }
+
+ while(1) {
+ r = genrand_int31();
+ if(r < overflow) {
+ break;
+ }
+ }
+
+ retval = r % top;
+
+ return(retval);
+}
+
+
+
+
+
+
+
+
outputNames.push_back(goodNameFile);
ofstream goodNameOut; m->openOutputFile(goodNameFile, goodNameOut);
-
+
while(!inputNames.eof()){
if (m->control_pressed) { goodNameOut.close(); inputNames.close(); remove(goodNameFile.c_str()); return 0; }
}
inputNames.close();
goodNameOut.close();
-
+
//we were unable to remove some of the bad sequences
if (badSeqNames.size() != 0) {
for (it = badSeqNames.begin(); it != badSeqNames.end(); it++) {
class SharedAce : public Calculator {
public:
- SharedAce(int n=10) : abund(n), Calculator("sharedace", 3, false) {};
+ SharedAce(int n=10) : abund(n), Calculator("sharedace", 1, false) {};
EstOutput getValues(SAbundVector*) {return data;};
EstOutput getValues(vector<SharedRAbundVector*>);
private:
class SharedChao1 : public Calculator {
public:
- SharedChao1() : Calculator("sharedchao", 3, true) {};
+ SharedChao1() : Calculator("sharedchao", 1, true) {};
EstOutput getValues(SAbundVector*) {return data;};
EstOutput getValues(vector<SharedRAbundVector*>);
private:
class JAbund : public Calculator {
public:
- JAbund() : Calculator("jabund", 3, false) {};
+ JAbund() : Calculator("jabund", 1, false) {};
EstOutput getValues(SAbundVector*) {return data;};
EstOutput getValues(vector<SharedRAbundVector*>);
private:
class Jclass : public Calculator {
public:
- Jclass() : Calculator("jclass", 3, false) {};
+ Jclass() : Calculator("jclass", 1, false) {};
EstOutput getValues(SAbundVector*) {return data;};
EstOutput getValues(vector<SharedRAbundVector*>);
private:
class Jest : public Calculator {
public:
- Jest() : Calculator("jest", 3, false) {};
+ Jest() : Calculator("jest", 1, false) {};
EstOutput getValues(SAbundVector*) {return data;};
EstOutput getValues(vector<SharedRAbundVector*>);
private:
class SorAbund : public Calculator {
public:
- SorAbund() : Calculator("sorabund", 3, false) {};
+ SorAbund() : Calculator("sorabund", 1, false) {};
EstOutput getValues(SAbundVector*) {return data;};
EstOutput getValues(vector<SharedRAbundVector*>);
private:
class SorClass : public Calculator {
public:
- SorClass() : Calculator("sorclass", 3, false) {};
+ SorClass() : Calculator("sorclass", 1, false) {};
EstOutput getValues(SAbundVector*) {return data;};
EstOutput getValues(vector<SharedRAbundVector*>);
private:
class SorEst : public Calculator {
public:
- SorEst() : Calculator("sorest", 3, false) {};
+ SorEst() : Calculator("sorest", 1, false) {};
EstOutput getValues(SAbundVector*) {return data;};
EstOutput getValues(vector<SharedRAbundVector*>);
private:
class ThetaN : public Calculator {
public:
- ThetaN() : Calculator("thetan", 3, false) {};
+ ThetaN() : Calculator("thetan", 1, false) {};
EstOutput getValues(SAbundVector*) {return data;};
EstOutput getValues(vector<SharedRAbundVector*>);
private:
outputFileHandle << "label" <<'\t' << "comparison" << '\t';
for(int i=0;i<sumCalculators.size();i++){
outputFileHandle << '\t' << sumCalculators[i]->getName();
+ if (sumCalculators[i]->getCols() == 3) { outputFileHandle << "\tlci\thci"; }
}
outputFileHandle << endl;
class Whittaker : public Calculator {
public:
- Whittaker() : Calculator("whittaker", 3, false) {};
+ Whittaker() : Calculator("whittaker", 1, false) {};
EstOutput getValues(SAbundVector*) {return data;};
EstOutput getValues(vector<SharedRAbundVector*>);