6 *****************************************************************************
8 * Copyright (c) 2004, Luke Sheneman
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
15 * + Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * + Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in
19 * the documentation and/or other materials provided with the
21 * + The names of its contributors may not be used to endorse or promote
22 * products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
37 *****************************************************************************
42 * sheneman@cs.uidaho.edu
55 #include "getopt_long.h"
68 NJ_handle_args(int argc,
71 static NJ_ARGS nj_args;
74 optind = 0; //neccasary to read in arguments if code is run more than once
76 struct option NJ_long_options[] = {
78 /* These options don't set a flag */
79 {"in", required_argument, NULL, 'i'},
80 {"out", required_argument, NULL, 'o'},
81 {"seed", required_argument, NULL, 's'},
82 {"matrixout", required_argument, NULL, 'm'},
83 {"ntrees", required_argument, NULL, 'n'},
85 /* These options set a flag */
86 {"verbose", no_argument, &(nj_args.verbose_flag), 1},
87 {"quiet", no_argument, &(nj_args.quiet_flag), 1},
88 {"distance", no_argument, &(nj_args.input_mode), NJ_INPUT_MODE_DISTANCE},
89 {"alignment", no_argument, &(nj_args.input_mode), NJ_INPUT_MODE_ALIGNED_SEQUENCES},
90 {"help", no_argument, &(nj_args.help), 1},
91 {"version", no_argument, &(nj_args.version), 1},
92 {"norandom", no_argument, &(nj_args.norandom), 1},
93 {"shuffle", no_argument, &(nj_args.shuffle), 1},
94 {"stdin", no_argument, &(nj_args.stdin_flag), 1},
95 {"stdout", no_argument, &(nj_args.stdout_flag), 1},
96 {"dna", no_argument, &(nj_args.dna_flag), 1},
97 {"DNA", no_argument, &(nj_args.dna_flag), 1},
98 {"protein", no_argument, &(nj_args.protein_flag), 1},
99 {"neighbor", no_argument, &(nj_args.neighbor), 1},
100 {"expblen", no_argument, &(nj_args.expblen), 1},
101 {"expdist", no_argument, &(nj_args.expdist), 1},
103 {"jukes", no_argument, &(nj_args.jukes_flag), 1},
104 {"kimura", no_argument, &(nj_args.kimura_flag), 1},
110 /* initializes options to their default */
111 nj_args.infilename = NULL;
112 nj_args.outfilename = NULL;
113 nj_args.matrixout = NULL;
114 nj_args.seed = time(0);
115 nj_args.verbose_flag = 0;
116 nj_args.quiet_flag = 0;
117 nj_args.input_mode = NJ_INPUT_MODE_DISTANCE;
120 nj_args.norandom = 0;
122 nj_args.stdin_flag = 0;
123 nj_args.stdout_flag = 0;
124 nj_args.dna_flag = 0;
125 nj_args.protein_flag = 0;
126 nj_args.correction_model = NJ_MODEL_NONE;
127 nj_args.jukes_flag = 0;
128 nj_args.kimura_flag = 0;
129 nj_args.neighbor = 0;
136 c = getopt_long(argc,
138 "i:o:s:m:n:vqduahVSIOrDPjkNeE",
144 //printf("%d\t%d\n", option_index, argc);
145 //for (int red = 0; red < argc; red++) { printf("%s\n", argv[red]); }
149 if(NJ_long_options[option_index].flag) {
153 printf("option %s", NJ_long_options[option_index].name);
155 printf(" with arg %s", optarg);
161 nj_args.infilename = optarg;
165 nj_args.outfilename = optarg;
169 nj_args.seed = atoi(optarg);
173 nj_args.matrixout = optarg;
177 nj_args.ntrees = atoi(optarg);
181 nj_args.verbose_flag = 1;
185 nj_args.quiet_flag = 1;
189 nj_args.input_mode = NJ_INPUT_MODE_DISTANCE;
193 nj_args.input_mode = NJ_INPUT_MODE_ALIGNED_SEQUENCES;
209 nj_args.stdin_flag = 1;
213 nj_args.stdin_flag = 1;
217 nj_args.norandom = 1;
221 nj_args.dna_flag = 1;
225 nj_args.protein_flag = 1;
229 nj_args.jukes_flag = 1;
233 nj_args.kimura_flag = 1;
237 nj_args.neighbor = 1;
255 fprintf(stderr, "Clearcut: Unknown command-line argument:\n --> %s\n", argv[optind]);
260 if(nj_args.version) {
261 printf("Clearcut Version: %s\n", NJ_VERSION);
270 /* if stdin & explicit filename are specified for input */
271 if(nj_args.stdin_flag) {
272 if(nj_args.infilename) {
273 fprintf(stderr, "Clearcut: Ambiguous input source specified. Specify input filename OR stdin.\n");
279 /* if stdout & explicit filename are specified for output */
280 if(nj_args.stdout_flag) {
281 if(nj_args.outfilename) {
282 fprintf(stderr, "Clearcut: Ambiguous output specified. Specify output filename OR stdout.\n");
288 /* if user did not specify stdin or filename, default to stdin */
289 if(!nj_args.stdin_flag) {
290 if(!nj_args.infilename) {
292 fprintf(stderr, "Clearcut: No input file specified. Using stdin.\n");
293 nj_args.stdin_flag = 1;
297 /* if user did not specify stdout or filename, default to stdout */
298 if(!nj_args.stdout_flag) {
299 if(!nj_args.outfilename) {
301 fprintf(stderr, "Clearcut: No output file specified. Using stdout.\n");
302 nj_args.stdout_flag = 1;
306 /* User must specify distance matrix or alignment */
307 if(nj_args.input_mode == NJ_INPUT_MODE_UNKNOWN) {
308 fprintf(stderr, "Clearcut: Must specify input type (--distance | --alignment)\n");
313 /* do not allow protein or DNA options for distance matrix input */
314 if(nj_args.input_mode == NJ_INPUT_MODE_DISTANCE) {
315 if(nj_args.dna_flag || nj_args.protein_flag) {
316 fprintf(stderr, "Clearcut: Ambiguous arguments. (--protein | --DNA) do not apply to distance \n");
322 /* make sure different filenames were specified for input and output */
323 if(!nj_args.stdin_flag && !nj_args.stdout_flag) {
325 if(!strcmp(nj_args.infilename, nj_args.outfilename)) {
326 fprintf(stderr, "Clearcut: Input filename and output filename must be unique.\n");
332 /* make sure that user specifies DNA or Protein if dealing with alignment input */
333 if(nj_args.input_mode == NJ_INPUT_MODE_ALIGNED_SEQUENCES) {
334 if(!nj_args.dna_flag && !nj_args.protein_flag) {
335 fprintf(stderr, "Clearcut: Must specify protein or DNA for alignment input.\n");
341 /* make sure that user does not specify both protein and DNA when dealing with alignment input */
342 if(nj_args.input_mode == NJ_INPUT_MODE_ALIGNED_SEQUENCES) {
343 if(nj_args.dna_flag && nj_args.protein_flag) {
344 fprintf(stderr, "Clearcut: Specifying protein and DNA sequences are mutually exclusive options\n");
350 /* make sure verbose and quiet were not specified together */
351 if(nj_args.verbose_flag && nj_args.quiet_flag) {
352 fprintf(stderr, "Clearcut: Verbose and Quiet mode are mutually exclusive.\n");
357 /* make sure that a correction model was specified only when providing an alignment */
358 if(nj_args.input_mode == NJ_INPUT_MODE_DISTANCE) {
359 if(nj_args.jukes_flag || nj_args.kimura_flag) {
360 fprintf(stderr, "Clearcut: Only specify correction model for alignment input.\n");
365 if(nj_args.jukes_flag && nj_args.kimura_flag) {
366 fprintf(stderr, "Clearcut: Only specify one correction model\n");
370 if(nj_args.jukes_flag && !nj_args.kimura_flag) {
371 nj_args.correction_model = NJ_MODEL_JUKES;
372 } else if(nj_args.kimura_flag && !nj_args.jukes_flag) {
373 nj_args.correction_model = NJ_MODEL_KIMURA;
375 nj_args.correction_model = NJ_MODEL_NONE; /* DEFAULT */
380 /* make sure that the number of output trees is reasonable */
381 if(nj_args.ntrees <= 0) {
382 fprintf(stderr, "Clearcut: Number of output trees must be a positive integer.\n");
388 * make sure that if exponential distances are specified,
389 * we are dealing with alignment input
391 if(nj_args.expdist && nj_args.input_mode != NJ_INPUT_MODE_ALIGNED_SEQUENCES) {
392 fprintf(stderr, "Clearcut: Exponential notation for distance matrix output requires that input be an alignment\n");
409 NJ_print_args(NJ_ARGS *nj_args) {
413 switch (nj_args->input_mode) {
414 case NJ_INPUT_MODE_DISTANCE:
415 sprintf(input_mode, "Distance Matrix");
417 case NJ_INPUT_MODE_UNALIGNED_SEQUENCES:
418 sprintf(input_mode, "Unaligned Sequences");
420 case NJ_INPUT_MODE_ALIGNED_SEQUENCES:
421 sprintf(input_mode, "Aligned Sequences");
424 sprintf(input_mode, "UNKNOWN");
428 printf("\n*** Command Line Arguments ***\n");
430 printf("Input Mode: %s\n", input_mode);
432 if(nj_args->stdin_flag) {
433 printf("Input from STDIN\n");
435 printf("Input File: %s\n", nj_args->infilename);
438 if(nj_args->stdout_flag) {
439 printf("Output from STDOUT\n");
441 printf("Output File: %s\n", nj_args->outfilename);
444 if(nj_args->input_mode != NJ_INPUT_MODE_DISTANCE) {
445 if(nj_args->aligned_flag) {
446 printf("Input Sequences Aligned: YES\n");
448 printf("Input Sequences Aligned: NO\n");
452 if(nj_args->verbose_flag) {
453 printf("Verbose Mode: ON\n");
455 printf("Verbose Mode: OFF\n");
458 if(nj_args->quiet_flag) {
459 printf("Quiet Mode: ON\n");
461 printf("Quiet Mode: OFF\n");
465 printf("Random Seed: %d\n", nj_args->seed);
468 printf("\n*******\n");
479 * Print a usage message
485 printf("Usage: clearcut --in=<infilename> --out=<outfilename> [options]...\n");
486 printf("GENERAL OPTIONS:\n");
487 printf(" -h, --help Display this information.\n");
488 printf(" -V, --version Print the version of this program.\n");
489 printf(" -v, --verbose More output. (Default: OFF)\n");
490 printf(" -q, --quiet Silent operation. (Default: ON)\n");
491 printf(" -s, --seed=<seed> Explicitly set the PRNG seed to a specific value.\n");
492 printf(" -r, --norandom Attempt joins deterministically. (Default: OFF)\n");
493 printf(" -S, --shuffle Randomly shuffle the distance matrix. (Default: OFF)\n");
494 printf(" -N, --neighbor Use traditional Neighbor-Joining algorithm. (Default: OFF)\n");
497 printf("INPUT OPTIONS:\n");
498 printf(" -I, --stdin Read input from STDIN.\n");
499 printf(" -d, --distance Input file is a distance matrix. (Default: ON)\n");
500 printf(" -a, --alignment Input file is a set of aligned sequences. (Default: OFF)\n");
501 printf(" -D, --DNA Input alignment are DNA sequences.\n");
502 printf(" -P, --protein Input alignment are protein sequences.\n");
505 printf("CORRECTION MODEL FOR COMPUTING DISTANCE MATRIX (Default: NO Correction):\n");
506 printf(" -j, --jukes Use Jukes-Cantor correction for computing distance matrix.\n");
507 printf(" -k, --kimura Use Kimura correction for distance matrix.\n");
510 printf("OUTPUT OPTIONS:\n");
511 printf(" -O, --stdout Output tree to STDOUT.\n");
512 printf(" -m, --matrixout=<file> Output distance matrix to specified file.\n");
513 printf(" -n, --ntrees=<n> Output n trees. (Default: 1)\n");
514 printf(" -e, --expblen Exponential notation for branch lengths. (Default: OFF)\n");
515 printf(" -E, --expdist Exponential notation in distance output. (Default: OFF)\n");
518 printf("EXAMPLES:\n");
519 printf(" Compute tree by supplying distance matrix via stdin:\n");
520 printf(" clearcut --distance < distances.txt > treefile.tre\n");
522 printf(" Compute tree by supplying an alignment of DNA sequences from a file:\n");
523 printf(" clearcut --alignment --DNA --in=alignment.txt --out=treefile.tre\n");