X-Git-Url: https://git.donarmstrong.com/?a=blobdiff_plain;f=cmdargs.cpp;fp=cmdargs.cpp;h=416ced88ddb1a0dc765b73a5c973570f7925709b;hb=1b4aa03040e61f4c2d47ce32cb12726dcdadb917;hp=0000000000000000000000000000000000000000;hpb=87537e4ee46cae9b7041ae0ed4dbbc229936d37c;p=mothur.git diff --git a/cmdargs.cpp b/cmdargs.cpp new file mode 100644 index 0000000..416ced8 --- /dev/null +++ b/cmdargs.cpp @@ -0,0 +1,526 @@ +/* + * cmdargs.c + * + * $Id$ + * + ***************************************************************************** + * + * Copyright (c) 2004, Luke Sheneman + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * + Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + The names of its contributors may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ***************************************************************************** + * + * AUTHOR: + * + * Luke Sheneman + * sheneman@cs.uidaho.edu + * + */ + +#include +#include +#include +#include + + +#ifdef USE_GNU +#include +#else +#include "getopt_long.h" +#endif /* USE_GNU*/ + + +#include "clearcut.h" +#include "cmdargs.h" + + +/* + * NJ_handle_args() - + * + */ +NJ_ARGS * +NJ_handle_args(int argc, + char *argv[]) { + + static NJ_ARGS nj_args; + int option_index, c; + + struct option NJ_long_options[] = { + + /* These options don't set a flag */ + {"in", required_argument, NULL, 'i'}, + {"out", required_argument, NULL, 'o'}, + {"seed", required_argument, NULL, 's'}, + {"matrixout", required_argument, NULL, 'm'}, + {"ntrees", required_argument, NULL, 'n'}, + + /* These options set a flag */ + {"verbose", no_argument, &(nj_args.verbose_flag), 1}, + {"quiet", no_argument, &(nj_args.quiet_flag), 1}, + {"distance", no_argument, &(nj_args.input_mode), NJ_INPUT_MODE_DISTANCE}, + {"alignment", no_argument, &(nj_args.input_mode), NJ_INPUT_MODE_ALIGNED_SEQUENCES}, + {"help", no_argument, &(nj_args.help), 1}, + {"version", no_argument, &(nj_args.version), 1}, + {"norandom", no_argument, &(nj_args.norandom), 1}, + {"shuffle", no_argument, &(nj_args.shuffle), 1}, + {"stdin", no_argument, &(nj_args.stdin_flag), 1}, + {"stdout", no_argument, &(nj_args.stdout_flag), 1}, + {"dna", no_argument, &(nj_args.dna_flag), 1}, + {"DNA", no_argument, &(nj_args.dna_flag), 1}, + {"protein", no_argument, &(nj_args.protein_flag), 1}, + {"neighbor", no_argument, &(nj_args.neighbor), 1}, + {"expblen", no_argument, &(nj_args.expblen), 1}, + {"expdist", no_argument, &(nj_args.expdist), 1}, + + {"jukes", no_argument, &(nj_args.jukes_flag), 1}, + {"kimura", no_argument, &(nj_args.kimura_flag), 1}, + + {0, 0, 0, 0} + + }; + + /* initializes options to their default */ + nj_args.infilename = NULL; + nj_args.outfilename = NULL; + nj_args.matrixout = NULL; + nj_args.seed = time(0); + nj_args.verbose_flag = 0; + nj_args.quiet_flag = 0; + nj_args.input_mode = NJ_INPUT_MODE_DISTANCE; + nj_args.help = 0; + nj_args.version = 0; + nj_args.norandom = 0; + nj_args.shuffle = 0; + nj_args.stdin_flag = 0; + nj_args.stdout_flag = 0; + nj_args.dna_flag = 0; + nj_args.protein_flag = 0; + nj_args.correction_model = NJ_MODEL_NONE; + nj_args.jukes_flag = 0; + nj_args.kimura_flag = 0; + nj_args.neighbor = 0; + nj_args.ntrees = 1; + nj_args.expblen = 0; + nj_args.expdist = 0; + + while(1) { + + c = getopt_long(argc, + argv, + "i:o:s:m:n:vqduahVSIOrDPjkNeE", + NJ_long_options, + &option_index); + if(c == -1) { + break; + } + + switch(c) { + + case 0: + if(NJ_long_options[option_index].flag) { + break; + } + + printf("option %s", NJ_long_options[option_index].name); + if(optarg) { + printf(" with arg %s", optarg); + } + printf("\n"); + break; + + case 'i': + nj_args.infilename = optarg; + break; + + case 'o': + nj_args.outfilename = optarg; + break; + + case 's': + nj_args.seed = atoi(optarg); + break; + + case 'm': + nj_args.matrixout = optarg; + break; + + case 'n': + nj_args.ntrees = atoi(optarg); + break; + + case 'v': + nj_args.verbose_flag = 1; + break; + + case 'q': + nj_args.quiet_flag = 1; + break; + + case 'd': + nj_args.input_mode = NJ_INPUT_MODE_DISTANCE; + break; + + case 'a': + nj_args.input_mode = NJ_INPUT_MODE_ALIGNED_SEQUENCES; + break; + + case 'h': + nj_args.help = 1; + break; + + case 'V': + nj_args.version = 1; + break; + + case 'S': + nj_args.shuffle = 1; + break; + + case 'I': + nj_args.stdin_flag = 1; + break; + + case 'O': + nj_args.stdin_flag = 1; + break; + + case 'r': + nj_args.norandom = 1; + break; + + case 'D': + nj_args.dna_flag = 1; + break; + + case 'P': + nj_args.protein_flag = 1; + break; + + case 'j': + nj_args.jukes_flag = 1; + break; + + case 'k': + nj_args.kimura_flag = 1; + break; + + case 'N': + nj_args.neighbor = 1; + break; + + case 'e': + nj_args.expblen = 1; + break; + + case 'E': + nj_args.expdist = 1; + break; + + default: + NJ_usage(); + exit(-1); + } + } + + if(optind < argc) { + fprintf(stderr, "Clearcut: Unknown command-line argument:\n --> %s\n", argv[optind]); + NJ_usage(); + exit(-1); + } + + if(nj_args.version) { + printf("Clearcut Version: %s\n", NJ_VERSION); + exit(0); + } + + if(nj_args.help) { + NJ_usage(); + exit(0); + } + + /* if stdin & explicit filename are specified for input */ + if(nj_args.stdin_flag) { + if(nj_args.infilename) { + fprintf(stderr, "Clearcut: Ambiguous input source specified. Specify input filename OR stdin.\n"); + NJ_usage(); + exit(-1); + } + } + + /* if stdout & explicit filename are specified for output */ + if(nj_args.stdout_flag) { + if(nj_args.outfilename) { + fprintf(stderr, "Clearcut: Ambiguous output specified. Specify output filename OR stdout.\n"); + NJ_usage(); + exit(-1); + } + } + + /* if user did not specify stdin or filename, default to stdin */ + if(!nj_args.stdin_flag) { + if(!nj_args.infilename) { + + fprintf(stderr, "Clearcut: No input file specified. Using stdin.\n"); + nj_args.stdin_flag = 1; + } + } + + /* if user did not specify stdout or filename, default to stdout */ + if(!nj_args.stdout_flag) { + if(!nj_args.outfilename) { + + fprintf(stderr, "Clearcut: No output file specified. Using stdout.\n"); + nj_args.stdout_flag = 1; + } + } + + /* User must specify distance matrix or alignment */ + if(nj_args.input_mode == NJ_INPUT_MODE_UNKNOWN) { + fprintf(stderr, "Clearcut: Must specify input type (--distance | --alignment)\n"); + NJ_usage(); + exit(-1); + } + + /* do not allow protein or DNA options for distance matrix input */ + if(nj_args.input_mode == NJ_INPUT_MODE_DISTANCE) { + if(nj_args.dna_flag || nj_args.protein_flag) { + fprintf(stderr, "Clearcut: Ambiguous arguments. (--protein | --DNA) do not apply to distance \n"); + NJ_usage(); + exit(-1); + } + } + + /* make sure different filenames were specified for input and output */ + if(!nj_args.stdin_flag && !nj_args.stdout_flag) { + + if(!strcmp(nj_args.infilename, nj_args.outfilename)) { + fprintf(stderr, "Clearcut: Input filename and output filename must be unique.\n"); + NJ_usage(); + exit(-1); + } + } + + /* make sure that user specifies DNA or Protein if dealing with alignment input */ + if(nj_args.input_mode == NJ_INPUT_MODE_ALIGNED_SEQUENCES) { + if(!nj_args.dna_flag && !nj_args.protein_flag) { + fprintf(stderr, "Clearcut: Must specify protein or DNA for alignment input.\n"); + NJ_usage(); + exit(-1); + } + } + + /* make sure that user does not specify both protein and DNA when dealing with alignment input */ + if(nj_args.input_mode == NJ_INPUT_MODE_ALIGNED_SEQUENCES) { + if(nj_args.dna_flag && nj_args.protein_flag) { + fprintf(stderr, "Clearcut: Specifying protein and DNA sequences are mutually exclusive options\n"); + NJ_usage(); + exit(-1); + } + } + + /* make sure verbose and quiet were not specified together */ + if(nj_args.verbose_flag && nj_args.quiet_flag) { + fprintf(stderr, "Clearcut: Verbose and Quiet mode are mutually exclusive.\n"); + NJ_usage(); + exit(-1); + } + + /* make sure that a correction model was specified only when providing an alignment */ + if(nj_args.input_mode == NJ_INPUT_MODE_DISTANCE) { + if(nj_args.jukes_flag || nj_args.kimura_flag) { + fprintf(stderr, "Clearcut: Only specify correction model for alignment input.\n"); + NJ_usage(); + exit(-1); + } + } else { + if(nj_args.jukes_flag && nj_args.kimura_flag) { + fprintf(stderr, "Clearcut: Only specify one correction model\n"); + NJ_usage(); + exit(-1); + } else { + if(nj_args.jukes_flag && !nj_args.kimura_flag) { + nj_args.correction_model = NJ_MODEL_JUKES; + } else if(nj_args.kimura_flag && !nj_args.jukes_flag) { + nj_args.correction_model = NJ_MODEL_KIMURA; + } else { + nj_args.correction_model = NJ_MODEL_NONE; /* DEFAULT */ + } + } + } + + /* make sure that the number of output trees is reasonable */ + if(nj_args.ntrees <= 0) { + fprintf(stderr, "Clearcut: Number of output trees must be a positive integer.\n"); + NJ_usage(); + exit(-1); + } + + /* + * make sure that if exponential distances are specified, + * we are dealing with alignment input + */ + if(nj_args.expdist && nj_args.input_mode != NJ_INPUT_MODE_ALIGNED_SEQUENCES) { + fprintf(stderr, "Clearcut: Exponential notation for distance matrix output requires that input be an alignment\n"); + NJ_usage(); + exit(-1); + } + + return(&nj_args); +} + + + + + +/* + * NJ_print_args() - + * + */ +void +NJ_print_args(NJ_ARGS *nj_args) { + + char input_mode[32]; + + switch (nj_args->input_mode) { + case NJ_INPUT_MODE_DISTANCE: + sprintf(input_mode, "Distance Matrix"); + break; + case NJ_INPUT_MODE_UNALIGNED_SEQUENCES: + sprintf(input_mode, "Unaligned Sequences"); + break; + case NJ_INPUT_MODE_ALIGNED_SEQUENCES: + sprintf(input_mode, "Aligned Sequences"); + break; + default: + sprintf(input_mode, "UNKNOWN"); + break; + } + + printf("\n*** Command Line Arguments ***\n"); + + printf("Input Mode: %s\n", input_mode); + + if(nj_args->stdin_flag) { + printf("Input from STDIN\n"); + } else { + printf("Input File: %s\n", nj_args->infilename); + } + + if(nj_args->stdout_flag) { + printf("Output from STDOUT\n"); + } else { + printf("Output File: %s\n", nj_args->outfilename); + } + + if(nj_args->input_mode != NJ_INPUT_MODE_DISTANCE) { + if(nj_args->aligned_flag) { + printf("Input Sequences Aligned: YES\n"); + } else { + printf("Input Sequences Aligned: NO\n"); + } + } + + if(nj_args->verbose_flag) { + printf("Verbose Mode: ON\n"); + } else { + printf("Verbose Mode: OFF\n"); + } + + if(nj_args->quiet_flag) { + printf("Quiet Mode: ON\n"); + } else { + printf("Quiet Mode: OFF\n"); + } + + if(nj_args->seed) { + printf("Random Seed: %d\n", nj_args->seed); + } + + printf("\n*******\n"); + + return; +} + + + + +/* + * NJ_usage() - + * + * Print a usage message + * + */ +void +NJ_usage(void) { + + printf("Usage: clearcut --in= --out= [options]...\n"); + printf("GENERAL OPTIONS:\n"); + printf(" -h, --help Display this information.\n"); + printf(" -V, --version Print the version of this program.\n"); + printf(" -v, --verbose More output. (Default: OFF)\n"); + printf(" -q, --quiet Silent operation. (Default: ON)\n"); + printf(" -s, --seed= Explicitly set the PRNG seed to a specific value.\n"); + printf(" -r, --norandom Attempt joins deterministically. (Default: OFF)\n"); + printf(" -S, --shuffle Randomly shuffle the distance matrix. (Default: OFF)\n"); + printf(" -N, --neighbor Use traditional Neighbor-Joining algorithm. (Default: OFF)\n"); + + printf("\n"); + printf("INPUT OPTIONS:\n"); + printf(" -I, --stdin Read input from STDIN.\n"); + printf(" -d, --distance Input file is a distance matrix. (Default: ON)\n"); + printf(" -a, --alignment Input file is a set of aligned sequences. (Default: OFF)\n"); + printf(" -D, --DNA Input alignment are DNA sequences.\n"); + printf(" -P, --protein Input alignment are protein sequences.\n"); + + printf("\n"); + printf("CORRECTION MODEL FOR COMPUTING DISTANCE MATRIX (Default: NO Correction):\n"); + printf(" -j, --jukes Use Jukes-Cantor correction for computing distance matrix.\n"); + printf(" -k, --kimura Use Kimura correction for distance matrix.\n"); + + printf("\n"); + printf("OUTPUT OPTIONS:\n"); + printf(" -O, --stdout Output tree to STDOUT.\n"); + printf(" -m, --matrixout= Output distance matrix to specified file.\n"); + printf(" -n, --ntrees= Output n trees. (Default: 1)\n"); + printf(" -e, --expblen Exponential notation for branch lengths. (Default: OFF)\n"); + printf(" -E, --expdist Exponential notation in distance output. (Default: OFF)\n"); + + printf("\n"); + printf("EXAMPLES:\n"); + printf(" Compute tree by supplying distance matrix via stdin:\n"); + printf(" clearcut --distance < distances.txt > treefile.tre\n"); + printf("\n"); + printf(" Compute tree by supplying an alignment of DNA sequences from a file:\n"); + printf(" clearcut --alignment --DNA --in=alignment.txt --out=treefile.tre\n"); + + return; +} + + +