6 *****************************************************************************
8 * Copyright (c) 2004, Luke Sheneman
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
15 * + Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * + Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in
19 * the documentation and/or other materials provided with the
21 * + The names of its contributors may not be used to endorse or promote
22 * products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
37 *****************************************************************************
42 * sheneman@cs.uidaho.edu
55 #include "getopt_long.h"
68 NJ_handle_args(int argc,
71 static NJ_ARGS nj_args;
74 struct option NJ_long_options[] = {
76 /* These options don't set a flag */
77 {"in", required_argument, NULL, 'i'},
78 {"out", required_argument, NULL, 'o'},
79 {"seed", required_argument, NULL, 's'},
80 {"matrixout", required_argument, NULL, 'm'},
81 {"ntrees", required_argument, NULL, 'n'},
83 /* These options set a flag */
84 {"verbose", no_argument, &(nj_args.verbose_flag), 1},
85 {"quiet", no_argument, &(nj_args.quiet_flag), 1},
86 {"distance", no_argument, &(nj_args.input_mode), NJ_INPUT_MODE_DISTANCE},
87 {"alignment", no_argument, &(nj_args.input_mode), NJ_INPUT_MODE_ALIGNED_SEQUENCES},
88 {"help", no_argument, &(nj_args.help), 1},
89 {"version", no_argument, &(nj_args.version), 1},
90 {"norandom", no_argument, &(nj_args.norandom), 1},
91 {"shuffle", no_argument, &(nj_args.shuffle), 1},
92 {"stdin", no_argument, &(nj_args.stdin_flag), 1},
93 {"stdout", no_argument, &(nj_args.stdout_flag), 1},
94 {"dna", no_argument, &(nj_args.dna_flag), 1},
95 {"DNA", no_argument, &(nj_args.dna_flag), 1},
96 {"protein", no_argument, &(nj_args.protein_flag), 1},
97 {"neighbor", no_argument, &(nj_args.neighbor), 1},
98 {"expblen", no_argument, &(nj_args.expblen), 1},
99 {"expdist", no_argument, &(nj_args.expdist), 1},
101 {"jukes", no_argument, &(nj_args.jukes_flag), 1},
102 {"kimura", no_argument, &(nj_args.kimura_flag), 1},
108 /* initializes options to their default */
109 nj_args.infilename = NULL;
110 nj_args.outfilename = NULL;
111 nj_args.matrixout = NULL;
112 nj_args.seed = time(0);
113 nj_args.verbose_flag = 0;
114 nj_args.quiet_flag = 0;
115 nj_args.input_mode = NJ_INPUT_MODE_DISTANCE;
118 nj_args.norandom = 0;
120 nj_args.stdin_flag = 0;
121 nj_args.stdout_flag = 0;
122 nj_args.dna_flag = 0;
123 nj_args.protein_flag = 0;
124 nj_args.correction_model = NJ_MODEL_NONE;
125 nj_args.jukes_flag = 0;
126 nj_args.kimura_flag = 0;
127 nj_args.neighbor = 0;
134 c = getopt_long(argc,
136 "i:o:s:m:n:vqduahVSIOrDPjkNeE",
146 if(NJ_long_options[option_index].flag) {
150 printf("option %s", NJ_long_options[option_index].name);
152 printf(" with arg %s", optarg);
158 nj_args.infilename = optarg;
162 nj_args.outfilename = optarg;
166 nj_args.seed = atoi(optarg);
170 nj_args.matrixout = optarg;
174 nj_args.ntrees = atoi(optarg);
178 nj_args.verbose_flag = 1;
182 nj_args.quiet_flag = 1;
186 nj_args.input_mode = NJ_INPUT_MODE_DISTANCE;
190 nj_args.input_mode = NJ_INPUT_MODE_ALIGNED_SEQUENCES;
206 nj_args.stdin_flag = 1;
210 nj_args.stdin_flag = 1;
214 nj_args.norandom = 1;
218 nj_args.dna_flag = 1;
222 nj_args.protein_flag = 1;
226 nj_args.jukes_flag = 1;
230 nj_args.kimura_flag = 1;
234 nj_args.neighbor = 1;
252 fprintf(stderr, "Clearcut: Unknown command-line argument:\n --> %s\n", argv[optind]);
257 if(nj_args.version) {
258 printf("Clearcut Version: %s\n", NJ_VERSION);
267 /* if stdin & explicit filename are specified for input */
268 if(nj_args.stdin_flag) {
269 if(nj_args.infilename) {
270 fprintf(stderr, "Clearcut: Ambiguous input source specified. Specify input filename OR stdin.\n");
276 /* if stdout & explicit filename are specified for output */
277 if(nj_args.stdout_flag) {
278 if(nj_args.outfilename) {
279 fprintf(stderr, "Clearcut: Ambiguous output specified. Specify output filename OR stdout.\n");
285 /* if user did not specify stdin or filename, default to stdin */
286 if(!nj_args.stdin_flag) {
287 if(!nj_args.infilename) {
289 fprintf(stderr, "Clearcut: No input file specified. Using stdin.\n");
290 nj_args.stdin_flag = 1;
294 /* if user did not specify stdout or filename, default to stdout */
295 if(!nj_args.stdout_flag) {
296 if(!nj_args.outfilename) {
298 fprintf(stderr, "Clearcut: No output file specified. Using stdout.\n");
299 nj_args.stdout_flag = 1;
303 /* User must specify distance matrix or alignment */
304 if(nj_args.input_mode == NJ_INPUT_MODE_UNKNOWN) {
305 fprintf(stderr, "Clearcut: Must specify input type (--distance | --alignment)\n");
310 /* do not allow protein or DNA options for distance matrix input */
311 if(nj_args.input_mode == NJ_INPUT_MODE_DISTANCE) {
312 if(nj_args.dna_flag || nj_args.protein_flag) {
313 fprintf(stderr, "Clearcut: Ambiguous arguments. (--protein | --DNA) do not apply to distance \n");
319 /* make sure different filenames were specified for input and output */
320 if(!nj_args.stdin_flag && !nj_args.stdout_flag) {
322 if(!strcmp(nj_args.infilename, nj_args.outfilename)) {
323 fprintf(stderr, "Clearcut: Input filename and output filename must be unique.\n");
329 /* make sure that user specifies DNA or Protein if dealing with alignment input */
330 if(nj_args.input_mode == NJ_INPUT_MODE_ALIGNED_SEQUENCES) {
331 if(!nj_args.dna_flag && !nj_args.protein_flag) {
332 fprintf(stderr, "Clearcut: Must specify protein or DNA for alignment input.\n");
338 /* make sure that user does not specify both protein and DNA when dealing with alignment input */
339 if(nj_args.input_mode == NJ_INPUT_MODE_ALIGNED_SEQUENCES) {
340 if(nj_args.dna_flag && nj_args.protein_flag) {
341 fprintf(stderr, "Clearcut: Specifying protein and DNA sequences are mutually exclusive options\n");
347 /* make sure verbose and quiet were not specified together */
348 if(nj_args.verbose_flag && nj_args.quiet_flag) {
349 fprintf(stderr, "Clearcut: Verbose and Quiet mode are mutually exclusive.\n");
354 /* make sure that a correction model was specified only when providing an alignment */
355 if(nj_args.input_mode == NJ_INPUT_MODE_DISTANCE) {
356 if(nj_args.jukes_flag || nj_args.kimura_flag) {
357 fprintf(stderr, "Clearcut: Only specify correction model for alignment input.\n");
362 if(nj_args.jukes_flag && nj_args.kimura_flag) {
363 fprintf(stderr, "Clearcut: Only specify one correction model\n");
367 if(nj_args.jukes_flag && !nj_args.kimura_flag) {
368 nj_args.correction_model = NJ_MODEL_JUKES;
369 } else if(nj_args.kimura_flag && !nj_args.jukes_flag) {
370 nj_args.correction_model = NJ_MODEL_KIMURA;
372 nj_args.correction_model = NJ_MODEL_NONE; /* DEFAULT */
377 /* make sure that the number of output trees is reasonable */
378 if(nj_args.ntrees <= 0) {
379 fprintf(stderr, "Clearcut: Number of output trees must be a positive integer.\n");
385 * make sure that if exponential distances are specified,
386 * we are dealing with alignment input
388 if(nj_args.expdist && nj_args.input_mode != NJ_INPUT_MODE_ALIGNED_SEQUENCES) {
389 fprintf(stderr, "Clearcut: Exponential notation for distance matrix output requires that input be an alignment\n");
406 NJ_print_args(NJ_ARGS *nj_args) {
410 switch (nj_args->input_mode) {
411 case NJ_INPUT_MODE_DISTANCE:
412 sprintf(input_mode, "Distance Matrix");
414 case NJ_INPUT_MODE_UNALIGNED_SEQUENCES:
415 sprintf(input_mode, "Unaligned Sequences");
417 case NJ_INPUT_MODE_ALIGNED_SEQUENCES:
418 sprintf(input_mode, "Aligned Sequences");
421 sprintf(input_mode, "UNKNOWN");
425 printf("\n*** Command Line Arguments ***\n");
427 printf("Input Mode: %s\n", input_mode);
429 if(nj_args->stdin_flag) {
430 printf("Input from STDIN\n");
432 printf("Input File: %s\n", nj_args->infilename);
435 if(nj_args->stdout_flag) {
436 printf("Output from STDOUT\n");
438 printf("Output File: %s\n", nj_args->outfilename);
441 if(nj_args->input_mode != NJ_INPUT_MODE_DISTANCE) {
442 if(nj_args->aligned_flag) {
443 printf("Input Sequences Aligned: YES\n");
445 printf("Input Sequences Aligned: NO\n");
449 if(nj_args->verbose_flag) {
450 printf("Verbose Mode: ON\n");
452 printf("Verbose Mode: OFF\n");
455 if(nj_args->quiet_flag) {
456 printf("Quiet Mode: ON\n");
458 printf("Quiet Mode: OFF\n");
462 printf("Random Seed: %d\n", nj_args->seed);
465 printf("\n*******\n");
476 * Print a usage message
482 printf("Usage: clearcut --in=<infilename> --out=<outfilename> [options]...\n");
483 printf("GENERAL OPTIONS:\n");
484 printf(" -h, --help Display this information.\n");
485 printf(" -V, --version Print the version of this program.\n");
486 printf(" -v, --verbose More output. (Default: OFF)\n");
487 printf(" -q, --quiet Silent operation. (Default: ON)\n");
488 printf(" -s, --seed=<seed> Explicitly set the PRNG seed to a specific value.\n");
489 printf(" -r, --norandom Attempt joins deterministically. (Default: OFF)\n");
490 printf(" -S, --shuffle Randomly shuffle the distance matrix. (Default: OFF)\n");
491 printf(" -N, --neighbor Use traditional Neighbor-Joining algorithm. (Default: OFF)\n");
494 printf("INPUT OPTIONS:\n");
495 printf(" -I, --stdin Read input from STDIN.\n");
496 printf(" -d, --distance Input file is a distance matrix. (Default: ON)\n");
497 printf(" -a, --alignment Input file is a set of aligned sequences. (Default: OFF)\n");
498 printf(" -D, --DNA Input alignment are DNA sequences.\n");
499 printf(" -P, --protein Input alignment are protein sequences.\n");
502 printf("CORRECTION MODEL FOR COMPUTING DISTANCE MATRIX (Default: NO Correction):\n");
503 printf(" -j, --jukes Use Jukes-Cantor correction for computing distance matrix.\n");
504 printf(" -k, --kimura Use Kimura correction for distance matrix.\n");
507 printf("OUTPUT OPTIONS:\n");
508 printf(" -O, --stdout Output tree to STDOUT.\n");
509 printf(" -m, --matrixout=<file> Output distance matrix to specified file.\n");
510 printf(" -n, --ntrees=<n> Output n trees. (Default: 1)\n");
511 printf(" -e, --expblen Exponential notation for branch lengths. (Default: OFF)\n");
512 printf(" -E, --expdist Exponential notation in distance output. (Default: OFF)\n");
515 printf("EXAMPLES:\n");
516 printf(" Compute tree by supplying distance matrix via stdin:\n");
517 printf(" clearcut --distance < distances.txt > treefile.tre\n");
519 printf(" Compute tree by supplying an alignment of DNA sequences from a file:\n");
520 printf(" clearcut --alignment --DNA --in=alignment.txt --out=treefile.tre\n");