help.h

   1 //uchime by Robert C. Edgar http://drive5.com/uchime This code is donated to the public domain.
   2
   3 "\n"
   4 "Usage\n"
   5 "-----\n"
   6 "\n"
   7 "uchime --input query.fasta [--db db.fasta] [--uchimeout results.uchime]\n"
   8 "    [--uchimealns results.alns]\n"
   9 "\n"
  10 "Options\n"
  11 "-------\n"
  12 "\n"
  13 "--input filename\n"
  14 "    Query sequences in FASTA format.\n"
  15 "    If the --db option is not specificed, uchime uses de novo\n"
  16 "    detection. In de novo mode, relative abundance must be given\n"
  17 "    by a string /ab=xxx/ somewhere in the label, where xxx is a\n"
  18 "    floating-point number, e.g. >F00QGH67HG/ab=1.2/.\n"
  19 "\n"
  20 "--db filename\n"
  21 "    Reference database in FASTA format.\n"
  22 "    Optional, if not specified uchime uses de novo mode.\n"
  23 "\n"
  24 "    ***WARNING*** The database is searched ONLY on the plus strand.\n"
  25 "    You MUST include reverse-complemented sequences in the database\n"
  26 "    if you want both strands to be searched.\n"
  27 "\n"
  28 "--abskew x\n"
  29 "    Minimum abundance skew. Default 1.9. De novo mode only.\n"
  30 "    Abundance skew is:\n"
  31 "        min [ abund(parent1), abund(parent2) ] / abund(query).\n"
  32 "\n"
  33 "--uchimeout filename\n"
  34 "    Output in tabbed format with one record per query sequence.\n"
  35 "    First field is score (h), second field is query label.\n"
  36 "    For details, see manual.\n"
  37 "\n"
  38 "--uchimealns filename\n"
  39 "    Multiple alignments of query sequences to parents in human-\n"
  40 "    readable format. Alignments show columns with differences\n"
  41 "    that support or contradict a chimeric model.\n"
  42 "\n"
  43 "--minh h\n"
  44 "    Mininum score to report chimera. Default 0.3. Values from 0.1\n"
  45 "    to 5 might be reasonable. Lower values increase sensitivity\n"
  46 "    but may report more false positives. If you decrease --xn,\n"
  47 "    you may need to increase --minh, and vice versa.\n"
  48 "\n"
  49 "--mindiv div\n"
  50 "    Minimum divergence ratio, default 0.5. Div ratio is 100%% - \n"
  51 "    %%identity between query sequence and the closest candidate for\n"
  52 "    being a parent. If you don't care about very close chimeras,\n"
  53 "    then you could increase --mindiv to, say, 1.0 or 2.0, and\n"
  54 "    also decrease --min h, say to 0.1, to increase sensitivity.\n"
  55 "    How well this works will depend on your data. Best is to\n"
  56 "    tune parameters on a good benchmark.\n"
  57 "\n"
  58 "--xn beta\n"
  59 "    Weight of a no vote, also called the beta parameter. Default 8.0.\n"
  60 "    Decreasing this weight to around 3 or 4 may give better\n"
  61 "    performance on denoised data.\n"
  62 "\n"
  63 "--dn n\n"
  64 "    Pseudo-count prior on number of no votes. Default 1.4. Probably\n"
  65 "    no good reason to change this unless you can retune to a good\n"
  66 "    benchmark for your data. Reasonable values are probably in the\n"
  67 "    range from 0.2 to 2.\n"
  68 "\n"
  69 "--xa w\n"
  70 "    Weight of an abstain vote. Default 1. So far, results do not\n"
  71 "    seem to be very sensitive to this parameter, but if you have\n"
  72 "    a good training set might be worth trying. Reasonable values\n"
  73 "    might range from 0.1 to 2.\n"
  74 "\n"
  75 "--chunks n\n"
  76 "    Number of chunks to extract from the query sequence when searching\n"
  77 "    for parents. Default 4.\n"
  78 "\n"
  79 "--[no]ovchunks\n"
  80 "    [Do not] use overlapping chunks. Default do not.\n"
  81 "\n"
  82 "--minchunk n\n"
  83 "    Minimum length of a chunk. Default 64.\n"
  84 "\n"
  85 "--idsmoothwindow w\n"
  86 "    Length of id smoothing window. Default 32.\n"
  87 "\n"
  88 "--minsmoothid f\n"
  89 "    Minimum factional identity over smoothed window of candidate parent.\n"
  90 "    Default 0.95.\n"
  91 "\n"
  92 "--maxp n\n"
  93 "    Maximum number of candidate parents to consider. Default 2. In tests so\n"
  94 "    far, increasing --maxp gives only a very small improvement in sensivity\n"
  95 "    but tends to increase the error rate quite a bit.\n"
  96 "\n"
  97 "--[no]skipgaps\n"
  98 "--[no]skipgaps2\n"
  99 "    These options control how gapped columns affect counting of diffs.\n"
 100 "    If --skipgaps is specified, columns containing gaps do not found as diffs.\n"
 101 "    If --skipgaps2 is specified, if column is immediately adjacent to\n"
 102 "    a column containing a gap, it is not counted as a diff.\n"
 103 "    Default is --skipgaps --skipgaps2.\n"
 104 "\n"
 105 "--minlen L\n"
 106 "--maxlen L\n"
 107 "    Minimum and maximum sequence length. Defaults 10, 10000.\n"
 108 "    Applies to both query and reference sequences.\n"
 109 "\n"
 110 "--ucl\n"
 111 "    Use local-X alignments. Default is global-X. On tests so far, global-X\n"
 112 "    is always better; this option is retained because it just might work\n"
 113 "    well on some future type of data.\n"
 114 "\n"
 115 "--queryfract f\n"
 116 "    Minimum fraction of the query sequence that must be covered by a local-X\n"
 117 "    alignment. Default 0.5. Applies only when --ucl is specified.\n"
 118 "\n"
 119 "--quiet\n"
 120 "    Do not display progress messages on stderr.\n"
 121 "\n"
 122 "--log filename\n"
 123 "    Write miscellaneous information to the log file. Mostly of interest\n"
 124 "    to me (the algorithm developer). Use --verbose to get more info.\n"
 125 "\n"
 126 "--self\n"
 127 "    In reference database mode, exclude a reference sequence if it has\n"
 128 "    the same label as the query. This is useful for benchmarking by using\n"
 129 "    the ref db as a query to test for false positives.\n"