AOBJS= bam_tview.o bam_plcmd.o sam_view.o \
bam_rmdup.o bam_rmdupse.o bam_mate.o bam_stat.o bam_color.o \
bamtk.o kaln.o bam2bcf.o bam2bcf_indel.o errmod.o sample.o \
- cut_target.o phase.o bam2depth.o padding.o bedcov.o bamshuf.o
+ cut_target.o phase.o bam2depth.o padding.o bedcov.o bamshuf.o \
+ bam_tview_curses.o bam_tview_html.o
PROG= samtools
INCLUDES= -I.
SUBDIRS= . bcftools misc
$(CC) $(CFLAGS) -o $@ $(AOBJS) $(LDFLAGS) libbam.a -Lbcftools -lbcf $(LIBPATH) $(LIBCURSES) -lm -lz -lpthread
razip:razip.o razf.o $(KNETFILE_O)
- $(CC) $(CFLAGS) -o $@ razf.o razip.o $(KNETFILE_O) -lz
+ $(CC) $(CFLAGS) -o $@ $^ -lz
bgzip:bgzip.o bgzf.o $(KNETFILE_O)
- $(CC) $(CFLAGS) -o $@ bgzf.o bgzip.o $(KNETFILE_O) -lz -lpthread
+ $(CC) $(CFLAGS) -o $@ $^ -lz -lpthread
bgzf.o:bgzf.c bgzf.h
$(CC) -c $(CFLAGS) $(DFLAGS) -DBGZF_CACHE $(INCLUDES) bgzf.c -o $@
bam_plcmd.o:bam.h faidx.h bcftools/bcf.h bam2bcf.h
bam_index.o:bam.h khash.h ksort.h razf.h bam_endian.h
bam_lpileup.o:bam.h ksort.h
-bam_tview.o:bam.h faidx.h
+bam_tview.o:bam.h faidx.h bam_tview.h
+bam_tview_curses.o:bam.h faidx.h bam_tview.h
+bam_tview_html.o:bam.h faidx.h bam_tview.h
bam_sort.o:bam.h ksort.h razf.h
bam_md.o:bam.h faidx.h
sam_header.o:sam_header.h khash.h
return ret;
}
+int read_file_list(const char *file_list,int *n,char **argv[]);
+
#ifdef _MAIN_BAM2DEPTH
int main(int argc, char *argv[])
#else
int main_depth(int argc, char *argv[])
#endif
{
- int i, n, tid, beg, end, pos, *n_plp, baseQ = 0, mapQ = 0, min_len = 0;
+ int i, n, tid, beg, end, pos, *n_plp, baseQ = 0, mapQ = 0, min_len = 0, nfiles;
const bam_pileup1_t **plp;
char *reg = 0; // specified region
void *bed = 0; // BED data structure
+ char *file_list = NULL, **fn = NULL;
bam_header_t *h = 0; // BAM header of the 1st input
aux_t **data;
bam_mplp_t mplp;
// parse the command line
- while ((n = getopt(argc, argv, "r:b:q:Q:l:")) >= 0) {
+ while ((n = getopt(argc, argv, "r:b:q:Q:l:f:")) >= 0) {
switch (n) {
case 'l': min_len = atoi(optarg); break; // minimum query length
case 'r': reg = strdup(optarg); break; // parsing a region requires a BAM header
case 'b': bed = bed_read(optarg); break; // BED or position list file can be parsed now
case 'q': baseQ = atoi(optarg); break; // base quality threshold
case 'Q': mapQ = atoi(optarg); break; // mapping quality threshold
+ case 'f': file_list = optarg; break;
}
}
- if (optind == argc) {
- fprintf(stderr, "Usage: bam2depth [-r reg] [-q baseQthres] [-Q mapQthres] [-l minQLen] [-b in.bed] <in1.bam> [...]\n");
+ if (optind == argc && !file_list) {
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Usage: samtools depth [options] in1.bam [in2.bam [...]]\n");
+ fprintf(stderr, "Options:\n");
+ fprintf(stderr, " -b <bed> list of positions or regions\n");
+ fprintf(stderr, " -f <list> list of input BAM filenames, one per line [null]\n");
+ fprintf(stderr, " -l <int> minQLen\n");
+ fprintf(stderr, " -q <int> base quality threshold\n");
+ fprintf(stderr, " -Q <int> mapping quality threshold\n");
+ fprintf(stderr, " -r <chr:from-to> region\n");
+ fprintf(stderr, "\n");
return 1;
}
// initialize the auxiliary data structures
- n = argc - optind; // the number of BAMs on the command line
+ if (file_list)
+ {
+ if ( read_file_list(file_list,&nfiles,&fn) ) return 1;
+ n = nfiles;
+ argv = fn;
+ optind = 0;
+ }
+ else
+ n = argc - optind; // the number of BAMs on the command line
data = calloc(n, sizeof(void*)); // data[i] for the i-th input
beg = 0; end = 1<<30; tid = -1; // set the default region
for (i = 0; i < n; ++i) {
}
free(data); free(reg);
if (bed) bed_destroy(bed);
+ if ( file_list )
+ {
+ for (i=0; i<n; i++) free(fn[i]);
+ free(fn);
+ }
return 0;
}
} else s[name_end] = ':', name_end = l;
}
} else iter = kh_get(s, h, str);
+ if (iter == kh_end(h)) {
+ free(s);
+ return -1;
+ }
*ref_id = kh_val(h, iter);
// parse the interval
if (name_end < l) {
strcat(strcpy(fnidx, fn), ".bai");
fprintf(stderr, "[bam_index_load] attempting to download the remote index file.\n");
download_from_remote(fnidx);
+ free(fnidx);
idx = bam_index_load_local(fn);
}
if (idx == 0) fprintf(stderr, "[bam_index_load] fail to load BAM index.\n");
if (fpidx == 0) {
fprintf(stderr, "[bam_index_build2] fail to create the index file.\n");
free(fnidx);
+ bam_index_destroy(idx);
return -1;
}
bam_index_save(idx, fpidx);
#include <ctype.h>
#include <string.h>
#include <errno.h>
+#include <sys/stat.h>
+#include <getopt.h>
#include "sam.h"
#include "faidx.h"
#include "kstring.h"
typedef struct {
int max_mq, min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth, fmt_flag;
+ int rflag_require, rflag_filter;
int openQ, extQ, tandemQ, min_support; // for indels
double min_frac; // for indels
char *reg, *pl_list;
skip = 1;
continue;
}
+ if (ma->conf->rflag_require && !(ma->conf->rflag_require&b->core.flag)) { skip = 1; continue; }
+ if (ma->conf->rflag_filter && ma->conf->rflag_filter&b->core.flag) { skip = 1; continue; }
if (ma->conf->bed) { // test overlap
skip = !bed_overlap(ma->conf->bed, ma->h->target_name[b->core.tid], b->core.pos, bam_calend(&b->core, bam1_cigar(b)));
if (skip) continue;
}
#define MAX_PATH_LEN 1024
-static int read_file_list(const char *file_list,int *n,char **argv[])
+int read_file_list(const char *file_list,int *n,char **argv[])
{
char buf[MAX_PATH_LEN];
- int len, nfiles;
- char **files;
+ int len, nfiles = 0;
+ char **files = NULL;
+ struct stat sb;
+
+ *n = 0;
+ *argv = NULL;
FILE *fh = fopen(file_list,"r");
if ( !fh )
return 1;
}
- // Speed is not an issue here, determine the number of files by reading the file twice
- nfiles = 0;
- while ( fgets(buf,MAX_PATH_LEN,fh) ) nfiles++;
-
- if ( fseek(fh, 0L, SEEK_SET) )
- {
- fprintf(stderr,"%s: %s\n", file_list,strerror(errno));
- return 1;
- }
-
files = calloc(nfiles,sizeof(char*));
nfiles = 0;
while ( fgets(buf,MAX_PATH_LEN,fh) )
{
+ // allow empty lines and trailing spaces
len = strlen(buf);
while ( len>0 && isspace(buf[len-1]) ) len--;
if ( !len ) continue;
- files[nfiles] = malloc(sizeof(char)*(len+1));
- strncpy(files[nfiles],buf,len);
- files[nfiles][len] = 0;
+ // check sanity of the file list
+ buf[len] = 0;
+ if (stat(buf, &sb) != 0)
+ {
+ // no such file, check if it is safe to print its name
+ int i, safe_to_print = 1;
+ for (i=0; i<len; i++)
+ if (!isprint(buf[i])) { safe_to_print = 0; break; }
+ if ( safe_to_print )
+ fprintf(stderr,"The file list \"%s\" appears broken, could not locate: %s\n", file_list,buf);
+ else
+ fprintf(stderr,"Does the file \"%s\" really contain a list of files and do all exist?\n", file_list);
+ return 1;
+ }
+
nfiles++;
+ files = realloc(files,nfiles*sizeof(char*));
+ files[nfiles-1] = strdup(buf);
}
fclose(fh);
if ( !nfiles )
mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100;
mplp.min_frac = 0.002; mplp.min_support = 1;
mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN;
- while ((c = getopt(argc, argv, "Agf:r:l:M:q:Q:uaRC:BDSd:L:b:P:po:e:h:Im:F:EG:6OsV")) >= 0) {
+ static struct option lopts[] =
+ {
+ {"rf",1,0,1}, // require flag
+ {"ff",1,0,2}, // filter flag
+ {0,0,0,0}
+ };
+ while ((c = getopt_long(argc, argv, "Agf:r:l:M:q:Q:uaRC:BDSd:L:b:P:po:e:h:Im:F:EG:6OsV1:2:",lopts,NULL)) >= 0) {
switch (c) {
+ case 1 : mplp.rflag_require = strtol(optarg,0,0); break;
+ case 2 : mplp.rflag_filter = strtol(optarg,0,0); break;
case 'f':
mplp.fai = fai_load(optarg);
if (mplp.fai == 0) return 1;
fprintf(stderr, " -6 assume the quality is in the Illumina-1.3+ encoding\n");
fprintf(stderr, " -A count anomalous read pairs\n");
fprintf(stderr, " -B disable BAQ computation\n");
- fprintf(stderr, " -b FILE list of input BAM files [null]\n");
+ fprintf(stderr, " -b FILE list of input BAM filenames, one per line [null]\n");
fprintf(stderr, " -C INT parameter for adjusting mapQ; 0 to disable [0]\n");
fprintf(stderr, " -d INT max per-BAM depth to avoid excessive memory usage [%d]\n", mplp.max_depth);
fprintf(stderr, " -E recalculate extended BAQ on the fly thus ignoring existing BQs\n");
fprintf(stderr, " -R ignore RG tags\n");
fprintf(stderr, " -q INT skip alignments with mapQ smaller than INT [%d]\n", mplp.min_mq);
fprintf(stderr, " -Q INT skip bases with baseQ/BAQ smaller than INT [%d]\n", mplp.min_baseQ);
+ fprintf(stderr, " --rf INT required flags: skip reads with mask bits unset []\n");
+ fprintf(stderr, " --ff INT filter flags: skip reads with mask bits set []\n");
fprintf(stderr, "\nOutput options:\n\n");
fprintf(stderr, " -D output per-sample DP in BCF (require -g/-u)\n");
fprintf(stderr, " -g generate BCF output (genotype likelihoods)\n");
@param prefix prefix of the output and the temporary files; upon
sucessess, prefix.bam will be written.
@param max_mem approxiate maximum memory (very inaccurate)
+ @param full_path the given output path is the full path and not just the prefix
@discussion It may create multiple temporary subalignment files
and then merge them by calling bam_merge_core(). This function is
NOT thread safe.
*/
-void bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size_t _max_mem, int is_stdout, int n_threads, int level)
+void bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size_t _max_mem, int is_stdout, int n_threads, int level, int full_path)
{
int ret, i, n_files = 0;
size_t mem, max_k, k, max_mem;
bamFile fp;
bam1_t *b, **buf;
char *fnout = 0;
+ char const *suffix = ".bam";
+ if (full_path) suffix += 4;
if (n_threads < 2) n_threads = 1;
g_is_by_qname = is_by_qname;
// output file name
fnout = calloc(strlen(prefix) + 20, 1);
if (is_stdout) sprintf(fnout, "-");
- else sprintf(fnout, "%s.bam", prefix);
+ else sprintf(fnout, "%s%s", prefix, suffix);
// write the final output
if (n_files == 0) { // a single block
char mode[8];
fns = (char**)calloc(n_files, sizeof(char*));
for (i = 0; i < n_files; ++i) {
fns[i] = (char*)calloc(strlen(prefix) + 20, 1);
- sprintf(fns[i], "%s.%.4d.bam", prefix, i);
+ sprintf(fns[i], "%s.%.4d%s", prefix, i, suffix);
}
bam_merge_core2(is_by_qname, fnout, 0, n_files, fns, 0, 0, n_threads, level);
for (i = 0; i < n_files; ++i) {
void bam_sort_core(int is_by_qname, const char *fn, const char *prefix, size_t max_mem)
{
- bam_sort_core_ext(is_by_qname, fn, prefix, max_mem, 0, 0, -1);
+ bam_sort_core_ext(is_by_qname, fn, prefix, max_mem, 0, 0, -1, 0);
}
int bam_sort(int argc, char *argv[])
{
size_t max_mem = 768<<20; // 512MB
- int c, is_by_qname = 0, is_stdout = 0, n_threads = 0, level = -1;
- while ((c = getopt(argc, argv, "nom:@:l:")) >= 0) {
+ int c, is_by_qname = 0, is_stdout = 0, n_threads = 0, level = -1, full_path = 0;
+ while ((c = getopt(argc, argv, "fnom:@:l:")) >= 0) {
switch (c) {
+ case 'f': full_path = 1; break;
case 'o': is_stdout = 1; break;
case 'n': is_by_qname = 1; break;
case 'm': {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: samtools sort [options] <in.bam> <out.prefix>\n\n");
fprintf(stderr, "Options: -n sort by read name\n");
+ fprintf(stderr, " -f use <out.prefix> as full file name instead of prefix\n");
fprintf(stderr, " -o final output to stdout\n");
fprintf(stderr, " -l INT compression level, from 0 to 9 [-1]\n");
fprintf(stderr, " -@ INT number of sorting and compression threads [1]\n");
fprintf(stderr, "\n");
return 1;
}
- bam_sort_core_ext(is_by_qname, argv[optind], argv[optind+1], max_mem, is_stdout, n_threads, level);
+ bam_sort_core_ext(is_by_qname, argv[optind], argv[optind+1], max_mem, is_stdout, n_threads, level, full_path);
return 0;
}
-#undef _HAVE_CURSES
-
-#if _CURSES_LIB == 0
-#elif _CURSES_LIB == 1
-#include <curses.h>
-#ifndef NCURSES_VERSION
-#warning "_CURSES_LIB=1 but NCURSES_VERSION not defined; tview is NOT compiled"
-#else
-#define _HAVE_CURSES
-#endif
-#elif _CURSES_LIB == 2
-#include <xcurses.h>
-#define _HAVE_CURSES
-#else
-#warning "_CURSES_LIB is not 0, 1 or 2; tview is NOT compiled"
-#endif
-
-#ifdef _HAVE_CURSES
-#include <ctype.h>
#include <assert.h>
-#include <string.h>
-#include <math.h>
-#include <unistd.h>
-#include "bam.h"
-#include "faidx.h"
-#include "bam2bcf.h"
-#include "sam_header.h"
-#include "khash.h"
-
-KHASH_MAP_INIT_STR(kh_rg, const char *)
+#include "bam_tview.h"
-char bam_aux_getCEi(bam1_t *b, int i);
-char bam_aux_getCSi(bam1_t *b, int i);
-char bam_aux_getCQi(bam1_t *b, int i);
-
-#define TV_MIN_ALNROW 2
-#define TV_MAX_GOTO 40
-#define TV_LOW_MAPQ 10
+int base_tv_init(tview_t* tv,const char *fn, const char *fn_fa, const char *samples)
+ {
+ assert(tv!=NULL);
+ assert(fn!=NULL);
+ tv->mrow = 24; tv->mcol = 80;
+ tv->color_for = TV_COLOR_MAPQ;
+ tv->is_dot = 1;
+
+ tv->fp = bam_open(fn, "r");
+ if(tv->fp==0)
+ {
+ fprintf(stderr,"bam_open %s. %s\n", fn,fn_fa);
+ exit(EXIT_FAILURE);
+ }
+ bgzf_set_cache_size(tv->fp, 8 * 1024 *1024);
+ assert(tv->fp);
+
+ tv->header = bam_header_read(tv->fp);
+ if(tv->header==0)
+ {
+ fprintf(stderr,"Cannot read '%s'.\n", fn);
+ exit(EXIT_FAILURE);
+ }
+ tv->idx = bam_index_load(fn);
+ if (tv->idx == 0)
+ {
+ fprintf(stderr,"Cannot read index for '%s'.\n", fn);
+ exit(EXIT_FAILURE);
+ }
+ tv->lplbuf = bam_lplbuf_init(tv_pl_func, tv);
+ if (fn_fa) tv->fai = fai_load(fn_fa);
+ tv->bca = bcf_call_init(0.83, 13);
+ tv->ins = 1;
-#define TV_COLOR_MAPQ 0
-#define TV_COLOR_BASEQ 1
-#define TV_COLOR_NUCL 2
-#define TV_COLOR_COL 3
-#define TV_COLOR_COLQ 4
+ if ( samples )
+ {
+ if ( !tv->header->dict ) tv->header->dict = sam_header_parse2(tv->header->text);
+ void *iter = tv->header->dict;
+ const char *key, *val;
+ int n = 0;
+ tv->rg_hash = kh_init(kh_rg);
+ while ( (iter = sam_header2key_val(iter, "RG","ID","SM", &key, &val)) )
+ {
+ if ( !strcmp(samples,key) || (val && !strcmp(samples,val)) )
+ {
+ khiter_t k = kh_get(kh_rg, tv->rg_hash, key);
+ if ( k != kh_end(tv->rg_hash) ) continue;
+ int ret;
+ k = kh_put(kh_rg, tv->rg_hash, key, &ret);
+ kh_value(tv->rg_hash, k) = val;
+ n++;
+ }
+ }
+ if ( !n )
+ {
+ fprintf(stderr,"The sample or read group \"%s\" not present.\n", samples);
+ exit(EXIT_FAILURE);
+ }
+ }
-#define TV_BASE_NUCL 0
-#define TV_BASE_COLOR_SPACE 1
+ return 0;
+ }
-typedef struct {
- int mrow, mcol;
- WINDOW *wgoto, *whelp;
- bam_index_t *idx;
- bam_lplbuf_t *lplbuf;
- bam_header_t *header;
- bamFile fp;
- int curr_tid, left_pos;
- faidx_t *fai;
- bcf_callaux_t *bca;
+void base_tv_destroy(tview_t* tv)
+ {
+ bam_lplbuf_destroy(tv->lplbuf);
+ bcf_call_destroy(tv->bca);
+ bam_index_destroy(tv->idx);
+ if (tv->fai) fai_destroy(tv->fai);
+ free(tv->ref);
+ bam_header_destroy(tv->header);
+ bam_close(tv->fp);
+ }
- int ccol, last_pos, row_shift, base_for, color_for, is_dot, l_ref, ins, no_skip, show_name;
- char *ref;
- khash_t(kh_rg) *rg_hash;
-} tview_t;
int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data)
{
// print referece
rb = (tv->ref && pos - tv->left_pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N';
for (i = tv->last_pos + 1; i < pos; ++i) {
- if (i%10 == 0 && tv->mcol - tv->ccol >= 10) mvprintw(0, tv->ccol, "%-d", i+1);
+ if (i%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", i+1);
c = tv->ref? tv->ref[i - tv->left_pos] : 'N';
- mvaddch(1, tv->ccol++, c);
+ tv->my_mvaddch(tv,1, tv->ccol++, c);
}
- if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) mvprintw(0, tv->ccol, "%-d", pos+1);
+ if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", pos+1);
{ // call consensus
bcf_callret1_t bcr;
int qsum[4], a1, a2, tmp;
else if (p[2] < p[1] && p[2] < p[0]) call = (1<<a2)<<16 | (int)((p[0]<p[1]?p[0]:p[1]) - p[2] + .499);
else call = (1<<a1|1<<a2)<<16 | (int)((p[0]<p[2]?p[0]:p[2]) - p[1] + .499);
}
- attr = A_UNDERLINE;
+ attr = tv->my_underline(tv);
c = ",ACMGRSVTWYHKDBN"[call>>16&0xf];
i = (call&0xffff)/10+1;
if (i > 4) i = 4;
- attr |= COLOR_PAIR(i);
+ attr |= tv->my_colorpair(tv,i);
if (c == toupper(rb)) c = '.';
- attron(attr);
- mvaddch(2, tv->ccol, c);
- attroff(attr);
+ tv->my_attron(tv,attr);
+ tv->my_mvaddch(tv,2, tv->ccol, c);
+ tv->my_attroff(tv,attr);
if(tv->ins) {
// calculate maximum insert
for (i = 0; i < n; ++i) {
int x;
attr = 0;
if (((p->b->core.flag&BAM_FPAIRED) && !(p->b->core.flag&BAM_FPROPER_PAIR))
- || (p->b->core.flag & BAM_FSECONDARY)) attr |= A_UNDERLINE;
+ || (p->b->core.flag & BAM_FSECONDARY)) attr |= tv->my_underline(tv);
if (tv->color_for == TV_COLOR_BASEQ) {
x = bam1_qual(p->b)[p->qpos]/10 + 1;
if (x > 4) x = 4;
- attr |= COLOR_PAIR(x);
+ attr |= tv->my_colorpair(tv,x);
} else if (tv->color_for == TV_COLOR_MAPQ) {
x = p->b->core.qual/10 + 1;
if (x > 4) x = 4;
- attr |= COLOR_PAIR(x);
+ attr |= tv->my_colorpair(tv,x);
} else if (tv->color_for == TV_COLOR_NUCL) {
x = bam_nt16_nt4_table[bam1_seqi(bam1_seq(p->b), p->qpos)] + 5;
- attr |= COLOR_PAIR(x);
+ attr |= tv->my_colorpair(tv,x);
} else if(tv->color_for == TV_COLOR_COL) {
x = 0;
switch(bam_aux_getCSi(p->b, p->qpos)) {
default: x = bam_nt16_nt4_table[bam1_seqi(bam1_seq(p->b), p->qpos)]; break;
}
x+=5;
- attr |= COLOR_PAIR(x);
+ attr |= tv->my_colorpair(tv,x);
} else if(tv->color_for == TV_COLOR_COLQ) {
x = bam_aux_getCQi(p->b, p->qpos);
if(0 == x) x = bam1_qual(p->b)[p->qpos];
x = x/10 + 1;
if (x > 4) x = 4;
- attr |= COLOR_PAIR(x);
+ attr |= tv->my_colorpair(tv,x);
}
- attron(attr);
- mvaddch(row, tv->ccol, bam1_strand(p->b)? tolower(c) : toupper(c));
- attroff(attr);
+ tv->my_attron(tv,attr);
+ tv->my_mvaddch(tv,row, tv->ccol, bam1_strand(p->b)? tolower(c) : toupper(c));
+ tv->my_attroff(tv,attr);
}
}
c = j? '*' : rb;
if (c == '*') {
- attr = COLOR_PAIR(8);
- attron(attr);
- mvaddch(1, tv->ccol++, c);
- attroff(attr);
- } else mvaddch(1, tv->ccol++, c);
+ attr = tv->my_colorpair(tv,8);
+ tv->my_attron(tv,attr);
+ tv->my_mvaddch(tv,1, tv->ccol++, c);
+ tv->my_attroff(tv,attr);
+ } else tv->my_mvaddch(tv,1, tv->ccol++, c);
}
tv->last_pos = pos;
return 0;
}
-tview_t *tv_init(const char *fn, const char *fn_fa, char *samples)
-{
- tview_t *tv = (tview_t*)calloc(1, sizeof(tview_t));
- tv->is_dot = 1;
- tv->fp = bam_open(fn, "r");
- bgzf_set_cache_size(tv->fp, 8 * 1024 *1024);
- assert(tv->fp);
- tv->header = bam_header_read(tv->fp);
- tv->idx = bam_index_load(fn);
- if (tv->idx == 0) exit(1);
- tv->lplbuf = bam_lplbuf_init(tv_pl_func, tv);
- if (fn_fa) tv->fai = fai_load(fn_fa);
- tv->bca = bcf_call_init(0.83, 13);
- tv->ins = 1;
- if ( samples )
- {
- if ( !tv->header->dict ) tv->header->dict = sam_header_parse2(tv->header->text);
- void *iter = tv->header->dict;
- const char *key, *val;
- int n = 0;
- tv->rg_hash = kh_init(kh_rg);
- while ( (iter = sam_header2key_val(iter, "RG","ID","SM", &key, &val)) )
- {
- if ( !strcmp(samples,key) || (val && !strcmp(samples,val)) )
- {
- khiter_t k = kh_get(kh_rg, tv->rg_hash, key);
- if ( k != kh_end(tv->rg_hash) ) continue;
- int ret;
- k = kh_put(kh_rg, tv->rg_hash, key, &ret);
- kh_value(tv->rg_hash, k) = val;
- n++;
- }
- }
- if ( !n )
- {
- fprintf(stderr,"The sample or read group \"%s\" not present.\n", samples);
- exit(-1);
- }
- }
-
- initscr();
- keypad(stdscr, TRUE);
- clear();
- noecho();
- cbreak();
- tv->mrow = 24; tv->mcol = 80;
- getmaxyx(stdscr, tv->mrow, tv->mcol);
- tv->wgoto = newwin(3, TV_MAX_GOTO + 10, 10, 5);
- tv->whelp = newwin(29, 40, 5, 5);
- tv->color_for = TV_COLOR_MAPQ;
- start_color();
- init_pair(1, COLOR_BLUE, COLOR_BLACK);
- init_pair(2, COLOR_GREEN, COLOR_BLACK);
- init_pair(3, COLOR_YELLOW, COLOR_BLACK);
- init_pair(4, COLOR_WHITE, COLOR_BLACK);
- init_pair(5, COLOR_GREEN, COLOR_BLACK);
- init_pair(6, COLOR_CYAN, COLOR_BLACK);
- init_pair(7, COLOR_YELLOW, COLOR_BLACK);
- init_pair(8, COLOR_RED, COLOR_BLACK);
- init_pair(9, COLOR_BLUE, COLOR_BLACK);
- return tv;
-}
-
-void tv_destroy(tview_t *tv)
-{
- delwin(tv->wgoto); delwin(tv->whelp);
- endwin();
- bam_lplbuf_destroy(tv->lplbuf);
- bcf_call_destroy(tv->bca);
- bam_index_destroy(tv->idx);
- if (tv->fai) fai_destroy(tv->fai);
- free(tv->ref);
- bam_header_destroy(tv->header);
- bam_close(tv->fp);
- free(tv);
-}
int tv_fetch_func(const bam1_t *b, void *data)
{
return 0;
}
-int tv_draw_aln(tview_t *tv, int tid, int pos)
-{
+int base_draw_aln(tview_t *tv, int tid, int pos)
+ {
+ assert(tv!=NULL);
// reset
- clear();
+ tv->my_clear(tv);
tv->curr_tid = tid; tv->left_pos = pos;
tv->last_pos = tv->left_pos - 1;
tv->ccol = 0;
if (tv->fai) {
char *str;
if (tv->ref) free(tv->ref);
+ assert(tv->curr_tid>=0);
+
str = (char*)calloc(strlen(tv->header->target_name[tv->curr_tid]) + 30, 1);
+ assert(str!=NULL);
sprintf(str, "%s:%d-%d", tv->header->target_name[tv->curr_tid], tv->left_pos + 1, tv->left_pos + tv->mcol);
tv->ref = fai_fetch(tv->fai, str, &tv->l_ref);
free(str);
while (tv->ccol < tv->mcol) {
int pos = tv->last_pos + 1;
- if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) mvprintw(0, tv->ccol, "%-d", pos+1);
- mvaddch(1, tv->ccol++, (tv->ref && pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N');
+ if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", pos+1);
+ tv->my_mvaddch(tv,1, tv->ccol++, (tv->ref && pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N');
++tv->last_pos;
}
return 0;
}
-static void tv_win_goto(tview_t *tv, int *tid, int *pos)
-{
- char str[256], *p;
- int i, l = 0;
- wborder(tv->wgoto, '|', '|', '-', '-', '+', '+', '+', '+');
- mvwprintw(tv->wgoto, 1, 2, "Goto: ");
- for (;;) {
- int c = wgetch(tv->wgoto);
- wrefresh(tv->wgoto);
- if (c == KEY_BACKSPACE || c == '\010' || c == '\177') {
- if(l > 0) --l;
- } else if (c == KEY_ENTER || c == '\012' || c == '\015') {
- int _tid = -1, _beg, _end;
- if (str[0] == '=') {
- _beg = strtol(str+1, &p, 10) - 1;
- if (_beg > 0) {
- *pos = _beg;
- return;
- }
- } else {
- bam_parse_region(tv->header, str, &_tid, &_beg, &_end);
- if (_tid >= 0) {
- *tid = _tid; *pos = _beg;
- return;
- }
- }
- } else if (isgraph(c)) {
- if (l < TV_MAX_GOTO) str[l++] = c;
- } else if (c == '\027') l = 0;
- else if (c == '\033') return;
- str[l] = '\0';
- for (i = 0; i < TV_MAX_GOTO; ++i) mvwaddch(tv->wgoto, 1, 8 + i, ' ');
- mvwprintw(tv->wgoto, 1, 8, "%s", str);
- }
-}
-static void tv_win_help(tview_t *tv) {
- int r = 1;
- WINDOW *win = tv->whelp;
- wborder(win, '|', '|', '-', '-', '+', '+', '+', '+');
- mvwprintw(win, r++, 2, " -=- Help -=- ");
- r++;
- mvwprintw(win, r++, 2, "? This window");
- mvwprintw(win, r++, 2, "Arrows Small scroll movement");
- mvwprintw(win, r++, 2, "h,j,k,l Small scroll movement");
- mvwprintw(win, r++, 2, "H,J,K,L Large scroll movement");
- mvwprintw(win, r++, 2, "ctrl-H Scroll 1k left");
- mvwprintw(win, r++, 2, "ctrl-L Scroll 1k right");
- mvwprintw(win, r++, 2, "space Scroll one screen");
- mvwprintw(win, r++, 2, "backspace Scroll back one screen");
- mvwprintw(win, r++, 2, "g Go to specific location");
- mvwprintw(win, r++, 2, "m Color for mapping qual");
- mvwprintw(win, r++, 2, "n Color for nucleotide");
- mvwprintw(win, r++, 2, "b Color for base quality");
- mvwprintw(win, r++, 2, "c Color for cs color");
- mvwprintw(win, r++, 2, "z Color for cs qual");
- mvwprintw(win, r++, 2, ". Toggle on/off dot view");
- mvwprintw(win, r++, 2, "s Toggle on/off ref skip");
- mvwprintw(win, r++, 2, "r Toggle on/off rd name");
- mvwprintw(win, r++, 2, "N Turn on nt view");
- mvwprintw(win, r++, 2, "C Turn on cs view");
- mvwprintw(win, r++, 2, "i Toggle on/off ins");
- mvwprintw(win, r++, 2, "q Exit");
- r++;
- mvwprintw(win, r++, 2, "Underline: Secondary or orphan");
- mvwprintw(win, r++, 2, "Blue: 0-9 Green: 10-19");
- mvwprintw(win, r++, 2, "Yellow: 20-29 White: >=30");
- wrefresh(win);
- wgetch(win);
-}
-void tv_loop(tview_t *tv)
-{
- int tid, pos;
- tid = tv->curr_tid; pos = tv->left_pos;
- while (1) {
- int c = getch();
- switch (c) {
- case '?': tv_win_help(tv); break;
- case '\033':
- case 'q': goto end_loop;
- case '/':
- case 'g': tv_win_goto(tv, &tid, &pos); break;
- case 'm': tv->color_for = TV_COLOR_MAPQ; break;
- case 'b': tv->color_for = TV_COLOR_BASEQ; break;
- case 'n': tv->color_for = TV_COLOR_NUCL; break;
- case 'c': tv->color_for = TV_COLOR_COL; break;
- case 'z': tv->color_for = TV_COLOR_COLQ; break;
- case 's': tv->no_skip = !tv->no_skip; break;
- case 'r': tv->show_name = !tv->show_name; break;
- case KEY_LEFT:
- case 'h': --pos; break;
- case KEY_RIGHT:
- case 'l': ++pos; break;
- case KEY_SLEFT:
- case 'H': pos -= 20; break;
- case KEY_SRIGHT:
- case 'L': pos += 20; break;
- case '.': tv->is_dot = !tv->is_dot; break;
- case 'N': tv->base_for = TV_BASE_NUCL; break;
- case 'C': tv->base_for = TV_BASE_COLOR_SPACE; break;
- case 'i': tv->ins = !tv->ins; break;
- case '\010': pos -= 1000; break;
- case '\014': pos += 1000; break;
- case ' ': pos += tv->mcol; break;
- case KEY_UP:
- case 'j': --tv->row_shift; break;
- case KEY_DOWN:
- case 'k': ++tv->row_shift; break;
- case KEY_BACKSPACE:
- case '\177': pos -= tv->mcol; break;
- case KEY_RESIZE: getmaxyx(stdscr, tv->mrow, tv->mcol); break;
- default: continue;
- }
- if (pos < 0) pos = 0;
- if (tv->row_shift < 0) tv->row_shift = 0;
- tv_draw_aln(tv, tid, pos);
- }
-end_loop:
- return;
-}
-void error(const char *format, ...)
+static void error(const char *format, ...)
{
if ( !format )
{
fprintf(stderr, "\n");
fprintf(stderr, "Usage: bamtk tview [options] <aln.bam> [ref.fasta]\n");
fprintf(stderr, "Options:\n");
+ fprintf(stderr, " -d display output as (H)tml or (C)urses or (T)ext \n");
fprintf(stderr, " -p chr:pos go directly to this position\n");
- fprintf(stderr, " -s STR display only reads from this sample or grou\n");
+ fprintf(stderr, " -s STR display only reads from this sample or group\n");
fprintf(stderr, "\n\n");
}
else
exit(-1);
}
+enum dipsay_mode {display_ncurses,display_html,display_text};
+extern tview_t* curses_tv_init(const char *fn, const char *fn_fa, const char *samples);
+extern tview_t* html_tv_init(const char *fn, const char *fn_fa, const char *samples);
+extern tview_t* text_tv_init(const char *fn, const char *fn_fa, const char *samples);
int bam_tview_main(int argc, char *argv[])
-{
- tview_t *tv;
+ {
+ int view_mode=display_ncurses;
+ tview_t* tv=NULL;
char *samples=NULL, *position=NULL;
int c;
- while ((c = getopt(argc, argv, "s:p:")) >= 0) {
+ while ((c = getopt(argc, argv, "s:p:d:")) >= 0) {
switch (c) {
case 's': samples=optarg; break;
case 'p': position=optarg; break;
+ case 'd':
+ {
+ switch(optarg[0])
+ {
+ case 'H': case 'h': view_mode=display_html;break;
+ case 'T': case 't': view_mode=display_text;break;
+ case 'C': case 'c': view_mode=display_ncurses;break;
+ default: view_mode=display_ncurses;break;
+ }
+ break;
+ }
default: error(NULL);
}
}
if (argc==optind) error(NULL);
- tv = tv_init(argv[optind], (optind+1>=argc)? 0 : argv[optind+1], samples);
- if ( position )
- {
- int _tid = -1, _beg, _end;
- bam_parse_region(tv->header, position, &_tid, &_beg, &_end);
- if (_tid >= 0) { tv->curr_tid = _tid; tv->left_pos = _beg; }
- }
- tv_draw_aln(tv, tv->curr_tid, tv->left_pos);
- tv_loop(tv);
- tv_destroy(tv);
- return 0;
-}
-#else // #ifdef _HAVE_CURSES
-#include <stdio.h>
-#warning "No curses library is available; tview is disabled."
-int bam_tview_main(int argc, char *argv[])
-{
- fprintf(stderr, "[bam_tview_main] The ncurses library is unavailable; tview is not compiled.\n");
- return 1;
-}
-#endif // #ifdef _HAVE_CURSES
+
+ switch(view_mode)
+ {
+ case display_ncurses:
+ {
+ tv = curses_tv_init(argv[optind], (optind+1>=argc)? 0 : argv[optind+1], samples);
+ break;
+ }
+ case display_text:
+ {
+ tv = text_tv_init(argv[optind], (optind+1>=argc)? 0 : argv[optind+1], samples);
+ break;
+ }
+ case display_html:
+ {
+ tv = html_tv_init(argv[optind], (optind+1>=argc)? 0 : argv[optind+1], samples);
+ break;
+ }
+ }
+ if(tv==NULL)
+ {
+ error("cannot create view");
+ return EXIT_FAILURE;
+ }
+
+ if ( position )
+ {
+ int _tid = -1, _beg, _end;
+ bam_parse_region(tv->header, position, &_tid, &_beg, &_end);
+ if (_tid >= 0) { tv->curr_tid = _tid; tv->left_pos = _beg; }
+ }
+ tv->my_drawaln(tv, tv->curr_tid, tv->left_pos);
+ tv->my_loop(tv);
+ tv->my_destroy(tv);
+
+ return EXIT_SUCCESS;
+ }
--- /dev/null
+#ifndef BAM_TVIEW_H
+#define BAM_TVIEW_H
+
+#include <ctype.h>
+#include <assert.h>
+#include <string.h>
+#include <math.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include "bam.h"
+#include "faidx.h"
+#include "bam2bcf.h"
+#include "sam_header.h"
+#include "khash.h"
+
+KHASH_MAP_INIT_STR(kh_rg, const char *)
+
+typedef struct AbstractTview {
+ int mrow, mcol;
+
+ bam_index_t *idx;
+ bam_lplbuf_t *lplbuf;
+ bam_header_t *header;
+ bamFile fp;
+ int curr_tid, left_pos;
+ faidx_t *fai;
+ bcf_callaux_t *bca;
+
+ int ccol, last_pos, row_shift, base_for, color_for, is_dot, l_ref, ins, no_skip, show_name;
+ char *ref;
+ khash_t(kh_rg) *rg_hash;
+ /* callbacks */
+ void (*my_destroy)(struct AbstractTview* );
+ void (*my_mvprintw)(struct AbstractTview* ,int,int,const char*,...);
+ void (*my_mvaddch)(struct AbstractTview*,int,int,int);
+ void (*my_attron)(struct AbstractTview*,int);
+ void (*my_attroff)(struct AbstractTview*,int);
+ void (*my_clear)(struct AbstractTview*);
+ int (*my_colorpair)(struct AbstractTview*,int);
+ int (*my_drawaln)(struct AbstractTview*,int,int);
+ int (*my_loop)(struct AbstractTview*);
+ int (*my_underline)(struct AbstractTview*);
+} tview_t;
+
+
+char bam_aux_getCEi(bam1_t *b, int i);
+char bam_aux_getCSi(bam1_t *b, int i);
+char bam_aux_getCQi(bam1_t *b, int i);
+
+#define TV_MIN_ALNROW 2
+#define TV_MAX_GOTO 40
+#define TV_LOW_MAPQ 10
+
+#define TV_COLOR_MAPQ 0
+#define TV_COLOR_BASEQ 1
+#define TV_COLOR_NUCL 2
+#define TV_COLOR_COL 3
+#define TV_COLOR_COLQ 4
+
+#define TV_BASE_NUCL 0
+#define TV_BASE_COLOR_SPACE 1
+
+int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data);
+int base_tv_init(tview_t*,const char *fn, const char *fn_fa, const char *samples);
+void base_tv_destroy(tview_t*);
+int base_draw_aln(tview_t *tv, int tid, int pos);
+
+typedef struct Tixel
+ {
+ int ch;
+ int attributes;
+ }tixel_t;
+
+#endif
+
--- /dev/null
+#undef _HAVE_CURSES
+
+#if _CURSES_LIB == 0
+#elif _CURSES_LIB == 1
+#include <curses.h>
+#ifndef NCURSES_VERSION
+#warning "_CURSES_LIB=1 but NCURSES_VERSION not defined; tview is NOT compiled"
+#else
+#define _HAVE_CURSES
+#endif
+#elif _CURSES_LIB == 2
+#include <xcurses.h>
+#define _HAVE_CURSES
+#else
+#warning "_CURSES_LIB is not 0, 1 or 2; tview is NOT compiled"
+#endif
+
+
+#include "bam_tview.h"
+
+#ifdef _HAVE_CURSES
+
+
+
+typedef struct CursesTview {
+ tview_t view;
+ WINDOW *wgoto, *whelp;
+ } curses_tview_t;
+
+
+
+
+#define FROM_TV(ptr) ((curses_tview_t*)ptr)
+
+static void curses_destroy(tview_t* base)
+ {
+ curses_tview_t* tv=(curses_tview_t*)base;
+
+
+ delwin(tv->wgoto); delwin(tv->whelp);
+ endwin();
+
+ base_tv_destroy(base);
+
+ free(tv);
+ }
+
+/*
+ void (*my_mvprintw)(struct AbstractTview* ,int,int,const char*,...);
+ void (*my_)(struct AbstractTview*,int,int,int);
+ void (*my_attron)(struct AbstractTview*,int);
+ void (*my_attroff)(struct AbstractTview*,int);
+ void (*my_clear)(struct AbstractTview*);
+ int (*my_colorpair)(struct AbstractTview*,int);
+*/
+
+static void curses_mvprintw(struct AbstractTview* tv,int y ,int x,const char* fmt,...)
+ {
+ unsigned int size=tv->mcol+2;
+ char* str=malloc(size);
+ if(str==0) exit(EXIT_FAILURE);
+ va_list argptr;
+ va_start(argptr, fmt);
+ vsnprintf(str,size, fmt, argptr);
+ va_end(argptr);
+ mvprintw(y,x,str);
+ free(str);
+ }
+
+static void curses_mvaddch(struct AbstractTview* tv,int y,int x,int ch)
+ {
+ mvaddch(y,x,ch);
+ }
+
+static void curses_attron(struct AbstractTview* tv,int flag)
+ {
+ attron(flag);
+ }
+static void curses_attroff(struct AbstractTview* tv,int flag)
+ {
+ attroff(flag);
+ }
+static void curses_clear(struct AbstractTview* tv)
+ {
+ clear();
+ }
+
+static int curses_colorpair(struct AbstractTview* tv,int flag)
+ {
+ return COLOR_PAIR(flag);
+ }
+
+static int curses_drawaln(struct AbstractTview* tv, int tid, int pos)
+ {
+ return base_draw_aln(tv, tid, pos);
+ }
+
+
+
+static void tv_win_goto(curses_tview_t *tv, int *tid, int *pos)
+ {
+ char str[256], *p;
+ int i, l = 0;
+ tview_t *base=(tview_t*)tv;
+ wborder(tv->wgoto, '|', '|', '-', '-', '+', '+', '+', '+');
+ mvwprintw(tv->wgoto, 1, 2, "Goto: ");
+ for (;;) {
+ int c = wgetch(tv->wgoto);
+ wrefresh(tv->wgoto);
+ if (c == KEY_BACKSPACE || c == '\010' || c == '\177') {
+ if(l > 0) --l;
+ } else if (c == KEY_ENTER || c == '\012' || c == '\015') {
+ int _tid = -1, _beg, _end;
+ if (str[0] == '=') {
+ _beg = strtol(str+1, &p, 10) - 1;
+ if (_beg > 0) {
+ *pos = _beg;
+ return;
+ }
+ } else {
+ bam_parse_region(base->header, str, &_tid, &_beg, &_end);
+ if (_tid >= 0) {
+ *tid = _tid; *pos = _beg;
+ return;
+ }
+ }
+ } else if (isgraph(c)) {
+ if (l < TV_MAX_GOTO) str[l++] = c;
+ } else if (c == '\027') l = 0;
+ else if (c == '\033') return;
+ str[l] = '\0';
+ for (i = 0; i < TV_MAX_GOTO; ++i) mvwaddch(tv->wgoto, 1, 8 + i, ' ');
+ mvwprintw(tv->wgoto, 1, 8, "%s", str);
+ }
+}
+
+
+
+
+static void tv_win_help(curses_tview_t *tv) {
+ int r = 1;
+ tview_t* base=(tview_t*)base;
+ WINDOW *win = tv->whelp;
+ wborder(win, '|', '|', '-', '-', '+', '+', '+', '+');
+ mvwprintw(win, r++, 2, " -=- Help -=- ");
+ r++;
+ mvwprintw(win, r++, 2, "? This window");
+ mvwprintw(win, r++, 2, "Arrows Small scroll movement");
+ mvwprintw(win, r++, 2, "h,j,k,l Small scroll movement");
+ mvwprintw(win, r++, 2, "H,J,K,L Large scroll movement");
+ mvwprintw(win, r++, 2, "ctrl-H Scroll 1k left");
+ mvwprintw(win, r++, 2, "ctrl-L Scroll 1k right");
+ mvwprintw(win, r++, 2, "space Scroll one screen");
+ mvwprintw(win, r++, 2, "backspace Scroll back one screen");
+ mvwprintw(win, r++, 2, "g Go to specific location");
+ mvwprintw(win, r++, 2, "m Color for mapping qual");
+ mvwprintw(win, r++, 2, "n Color for nucleotide");
+ mvwprintw(win, r++, 2, "b Color for base quality");
+ mvwprintw(win, r++, 2, "c Color for cs color");
+ mvwprintw(win, r++, 2, "z Color for cs qual");
+ mvwprintw(win, r++, 2, ". Toggle on/off dot view");
+ mvwprintw(win, r++, 2, "s Toggle on/off ref skip");
+ mvwprintw(win, r++, 2, "r Toggle on/off rd name");
+ mvwprintw(win, r++, 2, "N Turn on nt view");
+ mvwprintw(win, r++, 2, "C Turn on cs view");
+ mvwprintw(win, r++, 2, "i Toggle on/off ins");
+ mvwprintw(win, r++, 2, "q Exit");
+ r++;
+ mvwprintw(win, r++, 2, "Underline: Secondary or orphan");
+ mvwprintw(win, r++, 2, "Blue: 0-9 Green: 10-19");
+ mvwprintw(win, r++, 2, "Yellow: 20-29 White: >=30");
+ wrefresh(win);
+ wgetch(win);
+}
+
+static int curses_underline(tview_t* tv)
+ {
+ return A_UNDERLINE;
+ }
+
+static int curses_loop(tview_t* tv)
+ {
+ int tid, pos;
+ curses_tview_t *CTV=(curses_tview_t *)tv;
+ tid = tv->curr_tid; pos = tv->left_pos;
+ while (1) {
+ int c = getch();
+ switch (c) {
+ case '?': tv_win_help(CTV); break;
+ case '\033':
+ case 'q': goto end_loop;
+ case '/':
+ case 'g': tv_win_goto(CTV, &tid, &pos); break;
+ case 'm': tv->color_for = TV_COLOR_MAPQ; break;
+ case 'b': tv->color_for = TV_COLOR_BASEQ; break;
+ case 'n': tv->color_for = TV_COLOR_NUCL; break;
+ case 'c': tv->color_for = TV_COLOR_COL; break;
+ case 'z': tv->color_for = TV_COLOR_COLQ; break;
+ case 's': tv->no_skip = !tv->no_skip; break;
+ case 'r': tv->show_name = !tv->show_name; break;
+ case KEY_LEFT:
+ case 'h': --pos; break;
+ case KEY_RIGHT:
+ case 'l': ++pos; break;
+ case KEY_SLEFT:
+ case 'H': pos -= 20; break;
+ case KEY_SRIGHT:
+ case 'L': pos += 20; break;
+ case '.': tv->is_dot = !tv->is_dot; break;
+ case 'N': tv->base_for = TV_BASE_NUCL; break;
+ case 'C': tv->base_for = TV_BASE_COLOR_SPACE; break;
+ case 'i': tv->ins = !tv->ins; break;
+ case '\010': pos -= 1000; break;
+ case '\014': pos += 1000; break;
+ case ' ': pos += tv->mcol; break;
+ case KEY_UP:
+ case 'j': --tv->row_shift; break;
+ case KEY_DOWN:
+ case 'k': ++tv->row_shift; break;
+ case KEY_BACKSPACE:
+ case '\177': pos -= tv->mcol; break;
+ case KEY_RESIZE: getmaxyx(stdscr, tv->mrow, tv->mcol); break;
+ default: continue;
+ }
+ if (pos < 0) pos = 0;
+ if (tv->row_shift < 0) tv->row_shift = 0;
+ tv->my_drawaln(tv, tid, pos);
+ }
+end_loop:
+ return 0;
+}
+
+
+
+
+tview_t* curses_tv_init(const char *fn, const char *fn_fa, const char *samples)
+ {
+ curses_tview_t *tv = (curses_tview_t*)calloc(1, sizeof(curses_tview_t));
+ tview_t* base=(tview_t*)tv;
+ if(tv==0)
+ {
+ fprintf(stderr,"Calloc failed\n");
+ return 0;
+ }
+
+ base_tv_init(base,fn,fn_fa,samples);
+ /* initialize callbacks */
+#define SET_CALLBACK(fun) base->my_##fun=curses_##fun;
+ SET_CALLBACK(destroy);
+ SET_CALLBACK(mvprintw);
+ SET_CALLBACK(mvaddch);
+ SET_CALLBACK(attron);
+ SET_CALLBACK(attroff);
+ SET_CALLBACK(clear);
+ SET_CALLBACK(colorpair);
+ SET_CALLBACK(drawaln);
+ SET_CALLBACK(loop);
+ SET_CALLBACK(underline);
+#undef SET_CALLBACK
+
+ initscr();
+ keypad(stdscr, TRUE);
+ clear();
+ noecho();
+ cbreak();
+
+ getmaxyx(stdscr, base->mrow, base->mcol);
+ tv->wgoto = newwin(3, TV_MAX_GOTO + 10, 10, 5);
+ tv->whelp = newwin(29, 40, 5, 5);
+
+ start_color();
+ init_pair(1, COLOR_BLUE, COLOR_BLACK);
+ init_pair(2, COLOR_GREEN, COLOR_BLACK);
+ init_pair(3, COLOR_YELLOW, COLOR_BLACK);
+ init_pair(4, COLOR_WHITE, COLOR_BLACK);
+ init_pair(5, COLOR_GREEN, COLOR_BLACK);
+ init_pair(6, COLOR_CYAN, COLOR_BLACK);
+ init_pair(7, COLOR_YELLOW, COLOR_BLACK);
+ init_pair(8, COLOR_RED, COLOR_BLACK);
+ init_pair(9, COLOR_BLUE, COLOR_BLACK);
+ return base;
+ }
+
+
+#else // #ifdef _HAVE_CURSES
+#include <stdio.h>
+#warning "No curses library is available; tview with curses is disabled."
+
+extern tview_t* text_tv_init(const char *fn, const char *fn_fa, const char *samples);
+
+tview_t* curses_tv_init(const char *fn, const char *fn_fa, const char *samples)
+ {
+ return text_tv_init(fn,fn_fa,samples);
+ }
+#endif // #ifdef _HAVE_CURSES
+
+
--- /dev/null
+#include <unistd.h>
+#include "bam_tview.h"
+
+#define UNDERLINE_FLAG 10
+
+typedef struct HtmlTview {
+ tview_t view;
+ int row_count;
+ tixel_t** screen;
+ FILE* out;
+ int attributes;/* color... */
+ } html_tview_t;
+
+#define FROM_TV(ptr) ((html_tview_t*)ptr)
+
+static void html_destroy(tview_t* base)
+ {
+ int i;
+ html_tview_t* tv=(html_tview_t*)base;
+ if(tv->screen!=NULL)
+ {
+ for(i=0;i< tv->row_count;++i) free(tv->screen[i]);
+ free(tv->screen);
+ }
+ base_tv_destroy(base);
+ free(tv);
+ }
+
+/*
+ void (*my_mvprintw)(struct AbstractTview* ,int,int,const char*,...);
+ void (*my_)(struct AbstractTview*,int,int,int);
+ void (*my_attron)(struct AbstractTview*,int);
+ void (*my_attroff)(struct AbstractTview*,int);
+ void (*my_clear)(struct AbstractTview*);
+ int (*my_colorpair)(struct AbstractTview*,int);
+*/
+
+static void html_mvprintw(struct AbstractTview* tv,int y ,int x,const char* fmt,...)
+ {
+ int i,nchars=0;
+ unsigned int size=tv->mcol+2;
+ char* str=malloc(size);
+ if(str==0) exit(EXIT_FAILURE);
+ va_list argptr;
+ va_start(argptr, fmt);
+ nchars=vsnprintf(str,size, fmt, argptr);
+ va_end(argptr);
+
+ for(i=0;i< nchars;++i)
+ {
+ tv->my_mvaddch(tv,y,x+i,str[i]);
+ }
+ free(str);
+ }
+
+static void html_mvaddch(struct AbstractTview* tv,int y,int x,int ch)
+ {
+ tixel_t* row=NULL;
+ html_tview_t* ptr=FROM_TV(tv);
+ if( x >= tv->mcol ) return; //out of screen
+ while(ptr->row_count<=y)
+ {
+ int x;
+ row=(tixel_t*)calloc(tv->mcol,sizeof(tixel_t));
+ if(row==0) exit(EXIT_FAILURE);
+ for(x=0;x<tv->mcol;++x) {row[x].ch=' ';row[x].attributes=0;}
+ ptr->screen=(tixel_t**)realloc(ptr->screen,sizeof(tixel_t*)*(ptr->row_count+1));
+ ptr->screen[ptr->row_count++]=row;
+ }
+ row=ptr->screen[y];
+ row[x].ch=ch;
+ row[x].attributes=ptr->attributes;
+ }
+
+static void html_attron(struct AbstractTview* tv,int flag)
+ {
+ html_tview_t* ptr=FROM_TV(tv);
+ ptr->attributes |= flag;
+
+
+ }
+
+static void html_attroff(struct AbstractTview* tv,int flag)
+ {
+ html_tview_t* ptr=FROM_TV(tv);
+ ptr->attributes &= ~(flag);
+ }
+
+static void html_clear(struct AbstractTview* tv)
+ {
+ html_tview_t* ptr=FROM_TV(tv);
+ if(ptr->screen!=NULL)
+ {
+ int i;
+ for(i=0;i< ptr->row_count;++i) free(ptr->screen[i]);
+ free(ptr->screen);
+ ptr->screen=NULL;
+ }
+ ptr->row_count=0;
+ ptr->attributes=0;
+ }
+
+static int html_colorpair(struct AbstractTview* tv,int flag)
+ {
+ return (1 << (flag));
+ }
+
+static int html_drawaln(struct AbstractTview* tv, int tid, int pos)
+ {
+ int y,x;
+ html_tview_t* ptr=FROM_TV(tv);
+ html_clear(tv);
+ base_draw_aln(tv, tid, pos);
+ fputs("<html><head>",ptr->out);
+ fprintf(ptr->out,"<title>%s:%d</title>",
+ tv->header->target_name[tid],
+ pos+1
+ );
+ //style
+
+ fputs("<style type='text/css'>\n",ptr->out);
+ fputs(".tviewbody { margin:5px; background-color:white;text-align:center;}\n",ptr->out);
+ fputs(".tviewtitle {text-align:center;}\n",ptr->out);
+ fputs(".tviewpre { margin:5px; background-color:white;}\n",ptr->out);
+ #define CSS(id,col) fprintf(ptr->out,".tviewc%d {color:%s;}\n.tviewcu%d {color:%s;text-decoration:underline;}\n",id,col,id,col);
+ CSS(0, "black");
+ CSS(1, "blue");
+ CSS(2, "green");
+ CSS(3, "yellow");
+ CSS(4, "black");
+ CSS(5, "green");
+ CSS(6, "cyan");
+ CSS(7, "yellow");
+ CSS(8, "red");
+ CSS(9, "blue");
+ #undef CSS
+ fputs("</style>",ptr->out);
+
+ fputs("</head><body>",ptr->out);
+
+ fprintf(ptr->out,"<div class='tviewbody'><div class='tviewtitle'>%s:%d</div>",
+ tv->header->target_name[tid],
+ pos+1
+ );
+
+ fputs("<pre class='tviewpre'>",ptr->out);
+ for(y=0;y< ptr->row_count;++y)
+ {
+
+ for(x=0;x< tv->mcol;++x)
+ {
+
+
+ if(x== 0 || ptr->screen[y][x].attributes != ptr->screen[y][x-1].attributes)
+ {
+ int css=0;
+ fprintf(ptr->out,"<span");
+ while(css<32)
+ {
+ //if(y>1) fprintf(stderr,"css=%d pow2=%d vs %d\n",css,(1 << (css)),ptr->screen[y][x].attributes);
+ if(( (ptr->screen[y][x].attributes) & (1 << (css)))!=0)
+ {
+
+ fprintf(ptr->out," class='tviewc%s%d'",
+ (( (ptr->screen[y][x].attributes) & (1 << (UNDERLINE_FLAG)) )!=0?"u":""),
+ css);
+ break;
+ }
+ ++css;
+ }
+
+
+ fputs(">",ptr->out);
+ }
+
+ int ch=ptr->screen[y][x].ch;
+ switch(ch)
+ {
+ case '<': fputs("<",ptr->out);break;
+ case '>': fputs(">",ptr->out);break;
+ case '&': fputs("&",ptr->out);break;
+ default: fputc(ch,ptr->out); break;
+ }
+
+
+ if(x+1 == tv->mcol || ptr->screen[y][x].attributes!=ptr->screen[y][x+1].attributes)
+ {
+ fputs("</span>",ptr->out);
+ }
+ }
+ if(y+1 < ptr->row_count) fputs("<br/>",ptr->out);
+ }
+ fputs("</pre></div></body></html>",ptr->out);
+ return 0;
+ }
+
+
+#define ANSI_COLOR_RED "\x1b[31m"
+#define ANSI_COLOR_GREEN "\x1b[32m"
+#define ANSI_COLOR_YELLOW "\x1b[33m"
+#define ANSI_COLOR_BLUE "\x1b[34m"
+#define ANSI_COLOR_MAGENTA "\x1b[35m"
+#define ANSI_COLOR_CYAN "\x1b[36m"
+#define ANSI_COLOR_BLACK "\x1b[0m"
+#define ANSI_COLOR_RESET ANSI_COLOR_BLACK
+
+#define ANSI_UNDERLINE_SET "\033[4m"
+#define ANSI_UNDERLINE_UNSET "\033[0m"
+
+static int text_drawaln(struct AbstractTview* tv, int tid, int pos)
+ {
+ int y,x;
+ html_tview_t* ptr=FROM_TV(tv);
+ html_clear(tv);
+ base_draw_aln(tv, tid, pos);
+ int is_term= isatty(fileno(ptr->out));
+
+ for(y=0;y< ptr->row_count;++y)
+ {
+ for(x=0;x< tv->mcol;++x)
+ {
+ if(is_term)
+ {
+ int css=0;
+ while(css<32)
+ {
+ if(( (ptr->screen[y][x].attributes) & (1 << (css)))!=0)
+ {
+ break;
+ }
+ ++css;
+ }
+ switch(css)
+ {
+ //CSS(0, "black");
+ case 1: fputs(ANSI_COLOR_BLUE,ptr->out); break;
+ case 2: fputs(ANSI_COLOR_GREEN,ptr->out); break;
+ case 3: fputs(ANSI_COLOR_YELLOW,ptr->out); break;
+ //CSS(4, "black");
+ case 5: fputs(ANSI_COLOR_GREEN,ptr->out); break;
+ case 6: fputs(ANSI_COLOR_CYAN,ptr->out); break;
+ case 7: fputs(ANSI_COLOR_YELLOW,ptr->out); break;
+ case 8: fputs(ANSI_COLOR_RED,ptr->out); break;
+ case 9: fputs(ANSI_COLOR_BLUE,ptr->out); break;
+ default:break;
+ }
+ if(( (ptr->screen[y][x].attributes) & (1 << (UNDERLINE_FLAG)))!=0)
+ {
+ fputs(ANSI_UNDERLINE_SET,ptr->out);
+ }
+
+ }
+
+
+ int ch=ptr->screen[y][x].ch;
+
+ fputc(ch,ptr->out);
+ if(is_term)
+ {
+ fputs(ANSI_COLOR_RESET,ptr->out);
+ if(( (ptr->screen[y][x].attributes) & (1 << (UNDERLINE_FLAG)))!=0)
+ {
+ fputs(ANSI_UNDERLINE_UNSET,ptr->out);
+ }
+ }
+ }
+ fputc('\n',ptr->out);
+ }
+ return 0;
+ }
+
+
+static int html_loop(tview_t* tv)
+ {
+ //tv->my_drawaln(tv, tv->curr_tid, tv->left_pos);
+ return 0;
+ }
+
+static int html_underline(tview_t* tv)
+ {
+ return (1 << UNDERLINE_FLAG);
+ }
+
+/*
+static void init_pair(html_tview_t *tv,int id_ge_1, const char* pen, const char* paper)
+ {
+
+ }
+*/
+
+tview_t* html_tv_init(const char *fn, const char *fn_fa, const char *samples)
+ {
+ char* colstr=getenv("COLUMNS");
+ html_tview_t *tv = (html_tview_t*)calloc(1, sizeof(html_tview_t));
+ tview_t* base=(tview_t*)tv;
+ if(tv==0)
+ {
+ fprintf(stderr,"Calloc failed\n");
+ return 0;
+ }
+ tv->row_count=0;
+ tv->screen=NULL;
+ tv->out=stdout;
+ tv->attributes=0;
+ base_tv_init(base,fn,fn_fa,samples);
+ /* initialize callbacks */
+#define SET_CALLBACK(fun) base->my_##fun=html_##fun;
+ SET_CALLBACK(destroy);
+ SET_CALLBACK(mvprintw);
+ SET_CALLBACK(mvaddch);
+ SET_CALLBACK(attron);
+ SET_CALLBACK(attroff);
+ SET_CALLBACK(clear);
+ SET_CALLBACK(colorpair);
+ SET_CALLBACK(drawaln);
+ SET_CALLBACK(loop);
+ SET_CALLBACK(underline);
+#undef SET_CALLBACK
+
+
+ if(colstr!=0)
+ {
+ base->mcol=atoi(colstr);
+ if(base->mcol<10) base->mcol=80;
+ }
+ base->mrow=99999;
+
+/*
+ init_pair(tv,1, "blue", "white");
+ init_pair(tv,2, "green", "white");
+ init_pair(tv,3, "yellow", "white");
+ init_pair(tv,4, "white", "white");
+ init_pair(tv,5, "green", "white");
+ init_pair(tv,6, "cyan", "white");
+ init_pair(tv,7, "yellow", "white");
+ init_pair(tv,8, "red", "white");
+ init_pair(tv,9, "blue", "white");
+ */
+ return base;
+ }
+
+
+tview_t* text_tv_init(const char *fn, const char *fn_fa, const char *samples)
+ {
+ tview_t* tv=html_tv_init(fn,fn_fa,samples);
+ tv->my_drawaln=text_drawaln;
+ return tv;
+ }
+
extern int bcf_trio_call(uint32_t *prep, const bcf1_t *b, int *llr, int64_t *gt);
extern int bcf_pair_call(const bcf1_t *b);
extern int bcf_min_diff(const bcf1_t *b);
+ extern int bcf_p1_get_M(bcf_p1aux_t *b);
+
+ extern gzFile bcf_p1_fp_lk;
bcf_t *bp, *bout = 0;
bcf1_t *b, *blast;
memset(&vc, 0, sizeof(viewconf_t));
vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; vc.indel_frac = -1.; vc.n_perm = 0; vc.min_perm_p = 0.01; vc.min_smpl_frac = 0; vc.min_lrt = 1; vc.min_ma_lrt = -1;
memset(qcnt, 0, 8 * 256);
- while ((c = getopt(argc, argv, "FN1:l:cC:eHAGvbSuP:t:p:QgLi:IMs:D:U:X:d:T:Ywm:")) >= 0) {
+ while ((c = getopt(argc, argv, "FN1:l:cC:eHAGvbSuP:t:p:QgLi:IMs:D:U:X:d:T:Ywm:K:")) >= 0) {
switch (c) {
case '1': vc.n1 = atoi(optarg); break;
- case 'l': vc.bed = bed_read(optarg); break;
+ case 'l': vc.bed = bed_read(optarg); if (!vc.bed) fprintf(stderr,"Could not read \"%s\"\n", optarg); return 1; break;
case 'D': vc.fn_dict = strdup(optarg); break;
case 'F': vc.flag |= VC_FIX_PL; break;
case 'N': vc.flag |= VC_ACGT_ONLY; break;
case 'C': vc.min_lrt = atof(optarg); break;
case 'X': vc.min_perm_p = atof(optarg); break;
case 'd': vc.min_smpl_frac = atof(optarg); break;
+ case 'K': bcf_p1_fp_lk = gzopen(optarg, "w"); break;
case 's': vc.subsam = read_samples(optarg, &vc.n_sub);
vc.ploidy = calloc(vc.n_sub + 1, 1);
for (tid = 0; tid < vc.n_sub; ++tid) vc.ploidy[tid] = vc.subsam[tid][strlen(vc.subsam[tid]) + 1];
vc.sublist = calloc(vc.n_sub, sizeof(int));
hout = bcf_hdr_subsam(hin, vc.n_sub, vc.subsam, vc.sublist);
}
- if (vc.flag & VC_CALL) write_header(hout);
+ write_header(hout); // always print the header
vcf_hdr_write(bout, hout);
}
if (vc.flag & VC_CALL) {
}
}
}
+ if (bcf_p1_fp_lk && p1) {
+ int32_t M = bcf_p1_get_M(p1);
+ gzwrite(bcf_p1_fp_lk, &M, 4);
+ }
while (vcf_read(bp, hin, b) > 0) {
int is_indel, cons_llr = -1;
int64_t cons_gt = -1;
int i;
for (i = 0; i < 9; ++i) em[i] = -1.;
}
- if ( !(vc.flag&VC_KEEPALT) && vc.flag&VC_CALL && vc.min_ma_lrt>=0 )
+ if ( !(vc.flag&VC_KEEPALT) && (vc.flag&VC_CALL) && vc.min_ma_lrt>=0 )
{
bcf_p1_set_ploidy(b, p1); // could be improved: do this per site to allow pseudo-autosomal regions
int gts = call_multiallelic_gt(b, p1, vc.min_ma_lrt, vc.flag&VC_VARONLY);
}
else if (vc.flag & VC_CALL) { // call variants
bcf_p1rst_t pr;
- int calret = bcf_p1_cal(b, (em[7] >= 0 && em[7] < vc.min_lrt), p1, &pr);
+ int calret;
+ gzwrite(bcf_p1_fp_lk, &b->tid, 4);
+ gzwrite(bcf_p1_fp_lk, &b->pos, 4);
+ gzwrite(bcf_p1_fp_lk, &em[0], sizeof(double));
+ calret = bcf_p1_cal(b, (em[7] >= 0 && em[7] < vc.min_lrt), p1, &pr);
if (n_processed % 100000 == 0) {
fprintf(stderr, "[%s] %ld sites processed.\n", __func__, (long)n_processed);
bcf_p1_dump_afs(p1);
} else bcf_fix_gt(b);
vcf_write(bout, hout, b);
}
+
+ if (bcf_p1_fp_lk) gzclose(bcf_p1_fp_lk);
if (vc.prior_file) free(vc.prior_file);
if (vc.flag & VC_CALL) bcf_p1_dump_afs(p1);
if (hin != hout) bcf_hdr_destroy(hout);
if (fpidx == 0) {
fprintf(stderr, "[bcf_idx_build2] fail to create the index file.\n");
free(fnidx);
+ bcf_idx_destroy(idx);
return -1;
}
bcf_idx_save(idx, fpidx);
#include <errno.h>
#include <assert.h>
#include <limits.h>
+#include <zlib.h>
#include "prob1.h"
#include "kstring.h"
#define MC_EM_EPS 1e-5
#define MC_DEF_INDEL 0.15
+gzFile bcf_p1_fp_lk;
+
unsigned char seq_nt4_table[256] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
return ma;
}
+int bcf_p1_get_M(bcf_p1aux_t *b) { return b->M; }
+
int bcf_p1_set_n1(bcf_p1aux_t *b, int n1)
{
if (n1 == 0 || n1 >= b->n) return -1;
}
}
if (z[0] != ma->z) memcpy(ma->z, z[0], sizeof(double) * (ma->M + 1));
+ if (bcf_p1_fp_lk)
+ gzwrite(bcf_p1_fp_lk, ma->z, sizeof(double) * (ma->M + 1));
}
static void mc_cal_y(bcf_p1aux_t *ma)
memset(&smpl, 0, sizeof(kstring_t));
while (ks_getuntil(v->ks, '\n', &v->line, &dret) >= 0) {
if (v->line.l < 2) continue;
- if (v->line.s[0] != '#') return 0; // no sample line
+ if (v->line.s[0] != '#') {
+ free(meta.s);
+ free(smpl.s);
+ free(h);
+ return 0; // no sample line
+ }
if (v->line.s[0] == '#' && v->line.s[1] == '#') {
kputsn(v->line.s, v->line.l, &meta); kputc('\n', &meta);
} else if (v->line.s[0] == '#') {
/* Contact: Heng Li <lh3@sanger.ac.uk> */
/*
+ 2012-12-11 (0.1.4):
+
+ * Defined __ks_insertsort_##name as static to compile with C99.
+
2008-11-16 (0.1.4):
* Fixed a bug in introsort() that happens in rare cases.
tmp = *l; *l = l[i]; l[i] = tmp; ks_heapadjust_##name(0, i, l); \
} \
} \
- inline void __ks_insertsort_##name(type_t *s, type_t *t) \
+ static inline void __ks_insertsort_##name(type_t *s, type_t *t) \
{ \
type_t *i, *j, swap_tmp; \
for (i = s + 1; i < t; ++i) \
lib:
bamcheck:bamcheck.o
- $(CC) $(CFLAGS) -o $@ bamcheck.o -lm -lz -L.. -lbam -lpthread
+ $(CC) $(CFLAGS) -o $@ bamcheck.o -L.. -lm -lbam -lpthread -lz
bamcheck.o:bamcheck.c ../faidx.h ../khash.h ../sam.h ../razf.h
$(CC) $(CFLAGS) -c -I.. -o $@ bamcheck.c
uint64_t total_len_dup;
uint64_t nreads_1st;
uint64_t nreads_2nd;
+ uint64_t nreads_filtered;
uint64_t nreads_dup;
uint64_t nreads_unmapped;
uint64_t nreads_unpaired;
uint64_t nreads_paired;
+ uint64_t nreads_anomalous;
uint64_t nreads_mq0;
uint64_t nbases_mapped;
uint64_t nbases_mapped_cigar;
uint64_t nbases_trimmed; // bwa trimmed bases
uint64_t nmismatches;
+ uint64_t nreads_QCfailed, nreads_secondary;
// GC-depth related data
uint32_t ngcd, igcd; // The maximum number of GC depth bins and index of the current bin
{
uint8_t qual = quals[iread] + 1;
if ( qual>=stats->nquals )
- error("TODO: quality too high %d>=%d\n", quals[iread],stats->nquals);
+ error("TODO: quality too high %d>=%d (%s %d %s)\n", qual,stats->nquals, stats->sam->header->target_name[bam_line->core.tid],bam_line->core.pos+1,bam1_qname(bam_line));
int idx = is_fwd ? icycle : read_len-icycle-1;
if ( idx>stats->max_len )
int n = 1 + stats->gcd_ref_size / (stats->gcd_bin_size - seq_len);
if ( n <= stats->igcd )
- error("Uh: n=%d igcd=%d\n", n,stats->igcd );
+ error("The --GC-depth bin size is too small or reference genome too big; please decrease the bin size or increase the reference length\n");
- if ( n >= stats->ngcd )
+ if ( n > stats->ngcd )
{
stats->gcd = realloc(stats->gcd, n*sizeof(gc_depth_t));
if ( !stats->gcd )
if ( k == kh_end(stats->rg_hash) ) return;
}
if ( stats->flag_require && (bam_line->core.flag & stats->flag_require)!=stats->flag_require )
+ {
+ stats->nreads_filtered++;
return;
+ }
if ( stats->flag_filter && (bam_line->core.flag & stats->flag_filter) )
+ {
+ stats->nreads_filtered++;
return;
-
+ }
if ( !is_in_regions(bam_line,stats) )
return;
+ if ( stats->filter_readlen!=-1 && bam_line->core.l_qseq!=stats->filter_readlen )
+ return;
+
+ if ( bam_line->core.flag & BAM_FQCFAIL ) stats->nreads_QCfailed++;
+ if ( bam_line->core.flag & BAM_FSECONDARY ) stats->nreads_secondary++;
int seq_len = bam_line->core.l_qseq;
if ( !seq_len ) return;
- if ( stats->filter_readlen!=-1 && seq_len!=stats->filter_readlen ) return;
+
if ( seq_len >= stats->nbases )
realloc_buffers(stats,seq_len);
if ( stats->max_len<seq_len )
{
uint8_t qual = bam_quals[ reverse ? seq_len-i-1 : i];
if ( qual>=stats->nquals )
- error("TODO: quality too high %d>=%d\n", quals[i],stats->nquals);
+ error("TODO: quality too high %d>=%d (%s %d %s)\n", qual,stats->nquals,stats->sam->header->target_name[bam_line->core.tid],bam_line->core.pos+1,bam1_qname(bam_line));
if ( qual>stats->max_qual )
stats->max_qual = qual;
count_indels(stats,bam_line);
- // The insert size is tricky, because for long inserts the libraries are
- // prepared differently and the pairs point in other direction. BWA does
- // not set the paired flag for them. Similar thing is true also for 454
- // reads. Therefore, do the insert size stats for all mapped reads.
- int32_t isize = bam_line->core.isize;
- if ( isize<0 ) isize = -isize;
- if ( IS_PAIRED(bam_line) && isize!=0 )
+ if ( !IS_PAIRED(bam_line) )
+ stats->nreads_unpaired++;
+ else
{
stats->nreads_paired++;
- if ( isize >= stats->nisize )
- isize=stats->nisize-1;
- int pos_fst = bam_line->core.mpos - bam_line->core.pos;
- int is_fst = IS_READ1(bam_line) ? 1 : -1;
- int is_fwd = IS_REVERSE(bam_line) ? -1 : 1;
- int is_mfwd = IS_MATE_REVERSE(bam_line) ? -1 : 1;
+ if ( bam_line->core.tid!=bam_line->core.mtid )
+ stats->nreads_anomalous++;
- if ( is_fwd*is_mfwd>0 )
- stats->isize_other[isize]++;
- else if ( is_fst*pos_fst>0 )
- {
- if ( is_fst*is_fwd>0 )
- stats->isize_inward[isize]++;
- else
- stats->isize_outward[isize]++;
- }
- else if ( is_fst*pos_fst<0 )
+ // The insert size is tricky, because for long inserts the libraries are
+ // prepared differently and the pairs point in other direction. BWA does
+ // not set the paired flag for them. Similar thing is true also for 454
+ // reads. Mates mapped to different chromosomes have isize==0.
+ int32_t isize = bam_line->core.isize;
+ if ( isize<0 ) isize = -isize;
+ if ( isize >= stats->nisize )
+ isize = stats->nisize-1;
+ if ( isize>0 || bam_line->core.tid==bam_line->core.mtid )
{
- if ( is_fst*is_fwd>0 )
- stats->isize_outward[isize]++;
- else
- stats->isize_inward[isize]++;
+ int pos_fst = bam_line->core.mpos - bam_line->core.pos;
+ int is_fst = IS_READ1(bam_line) ? 1 : -1;
+ int is_fwd = IS_REVERSE(bam_line) ? -1 : 1;
+ int is_mfwd = IS_MATE_REVERSE(bam_line) ? -1 : 1;
+
+ if ( is_fwd*is_mfwd>0 )
+ stats->isize_other[isize]++;
+ else if ( is_fst*pos_fst>0 )
+ {
+ if ( is_fst*is_fwd>0 )
+ stats->isize_inward[isize]++;
+ else
+ stats->isize_outward[isize]++;
+ }
+ else if ( is_fst*pos_fst<0 )
+ {
+ if ( is_fst*is_fwd>0 )
+ stats->isize_outward[isize]++;
+ else
+ stats->isize_inward[isize]++;
+ }
}
}
- else
- stats->nreads_unpaired++;
// Number of mismatches
uint8_t *nm = bam_aux_get(bam_line,"NM");
// Calculate average insert size and standard deviation (from the main bulk data only)
int isize, ibulk=0;
uint64_t nisize=0, nisize_inward=0, nisize_outward=0, nisize_other=0;
- for (isize=1; isize<stats->nisize; isize++)
+ for (isize=0; isize<stats->nisize; isize++)
{
// Each pair was counted twice
stats->isize_inward[isize] *= 0.5;
}
double bulk=0, avg_isize=0, sd_isize=0;
- for (isize=1; isize<stats->nisize; isize++)
+ for (isize=0; isize<stats->nisize; isize++)
{
bulk += stats->isize_inward[isize] + stats->isize_outward[isize] + stats->isize_other[isize];
avg_isize += isize * (stats->isize_inward[isize] + stats->isize_outward[isize] + stats->isize_other[isize]);
printf(" %s",stats->argv[i]);
printf("\n");
printf("# Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part.\n");
+ printf("SN\traw total sequences:\t%ld\n", (long)(stats->nreads_filtered+stats->nreads_1st+stats->nreads_2nd));
+ printf("SN\tfiltered sequences:\t%ld\n", (long)stats->nreads_filtered);
printf("SN\tsequences:\t%ld\n", (long)(stats->nreads_1st+stats->nreads_2nd));
printf("SN\tis paired:\t%d\n", stats->nreads_1st&&stats->nreads_2nd ? 1 : 0);
printf("SN\tis sorted:\t%d\n", stats->is_sorted ? 1 : 0);
printf("SN\treads paired:\t%ld\n", (long)stats->nreads_paired);
printf("SN\treads duplicated:\t%ld\n", (long)stats->nreads_dup);
printf("SN\treads MQ0:\t%ld\n", (long)stats->nreads_mq0);
+ printf("SN\treads QC failed:\t%ld\n", (long)stats->nreads_QCfailed);
+ printf("SN\tnon-primary alignments:\t%ld\n", (long)stats->nreads_secondary);
printf("SN\ttotal length:\t%ld\n", (long)stats->total_len);
printf("SN\tbases mapped:\t%ld\n", (long)stats->nbases_mapped);
printf("SN\tbases mapped (cigar):\t%ld\n", (long)stats->nbases_mapped_cigar);
printf("SN\tinward oriented pairs:\t%ld\n", (long)nisize_inward);
printf("SN\toutward oriented pairs:\t%ld\n", (long)nisize_outward);
printf("SN\tpairs with other orientation:\t%ld\n", (long)nisize_other);
+ printf("SN\tpairs on different chromosomes:\t%ld\n", (long)stats->nreads_anomalous/2);
int ibase,iqual;
if ( stats->max_len<stats->nbases ) stats->max_len++;
printf("GCC\t%d\t%.2f\t%.2f\t%.2f\t%.2f\n", ibase,100.*ptr[0]/sum,100.*ptr[1]/sum,100.*ptr[2]/sum,100.*ptr[3]/sum);
}
printf("# Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: pairs total, inward oriented pairs, outward oriented pairs, other pairs\n");
- for (isize=1; isize<ibulk; isize++)
+ for (isize=0; isize<ibulk; isize++)
printf("IS\t%d\t%ld\t%ld\t%ld\t%ld\n", isize, (long)(stats->isize_inward[isize]+stats->isize_outward[isize]+stats->isize_other[isize]),
(long)stats->isize_inward[isize], (long)stats->isize_outward[isize], (long)stats->isize_other[isize]);
}
printf("# Coverage distribution. Use `grep ^COV | cut -f 2-` to extract this part.\n");
- printf("COV\t[<%d]\t%d\t%ld\n",stats->cov_min,stats->cov_min-1, (long)stats->cov[0]);
+ if ( stats->cov[0] )
+ printf("COV\t[<%d]\t%d\t%ld\n",stats->cov_min,stats->cov_min-1, (long)stats->cov[0]);
int icov;
for (icov=1; icov<stats->ncov-1; icov++)
- printf("COV\t[%d-%d]\t%d\t%ld\n",stats->cov_min + (icov-1)*stats->cov_step, stats->cov_min + icov*stats->cov_step-1,stats->cov_min + icov*stats->cov_step-1, (long)stats->cov[icov]);
- printf("COV\t[%d<]\t%d\t%ld\n",stats->cov_min + (stats->ncov-2)*stats->cov_step-1,stats->cov_min + (stats->ncov-2)*stats->cov_step-1, (long)stats->cov[stats->ncov-1]);
-
+ if ( stats->cov[icov] )
+ printf("COV\t[%d-%d]\t%d\t%ld\n",stats->cov_min + (icov-1)*stats->cov_step, stats->cov_min + icov*stats->cov_step-1,stats->cov_min + icov*stats->cov_step-1, (long)stats->cov[icov]);
+ if ( stats->cov[stats->ncov-1] )
+ printf("COV\t[%d<]\t%d\t%ld\n",stats->cov_min + (stats->ncov-2)*stats->cov_step-1,stats->cov_min + (stats->ncov-2)*stats->cov_step-1, (long)stats->cov[stats->ncov-1]);
// Calculate average GC content, then sort by GC and depth
printf("# GC-depth. Use `grep ^GCD | cut -f 2-` to extract this part. The columns are: GC%%, unique sequence percentiles, 10th, 25th, 50th, 75th and 90th depth percentile\n");
printf(" -d, --remove-dups Exlude from statistics reads marked as duplicates\n");
printf(" -f, --required-flag <int> Required flag, 0 for unset [0]\n");
printf(" -F, --filtering-flag <int> Filtering flag, 0 for unset [0]\n");
- printf(" --GC-depth <float,float> Bin size for GC-depth graph and the maximum reference length [2e4,6e9]\n");
+ printf(" --GC-depth <float,float> Bin size for GC-depth graph and the maximum reference length [2e4,4.2e9]\n");
printf(" -h, --help This help message\n");
printf(" -i, --insert-size <int> Maximum insert size [8000]\n");
printf(" -I, --id <string> Include only listed read group or sample name\n");
stats_t *stats = calloc(1,sizeof(stats_t));
stats->ngc = 200;
- stats->nquals = 95;
+ stats->nquals = 256;
stats->nbases = 300;
stats->nisize = 8000;
stats->max_len = 30;
stats->max_qual = 40;
stats->isize_main_bulk = 0.99; // There are always outliers at the far end
stats->gcd_bin_size = 20e3;
- stats->gcd_ref_size = 3e9;
+ stats->gcd_ref_size = 4.2e9;
stats->rseq_pos = -1;
- stats->tid = stats->gcd_pos = stats->igcd = -1;
+ stats->tid = stats->gcd_pos = -1;
+ stats->igcd = 0;
stats->is_sorted = 1;
stats->cov_min = 1;
stats->cov_max = 1000;
while (*to && *to=='\t') to++;
if ( to-from != 1 ) {
debug("[sam_header_line_parse] multiple tabs on line [%s] (%d)\n", headerLine,(int)(to-from));
+ free(hline);
return 0;
}
from = to;
.TP
.B sort
-samtools sort [-no] [-m maxMem] <in.bam> <out.prefix>
+samtools sort [-nof] [-m maxMem] <in.bam> <out.prefix>
Sort alignments by leftmost coordinates. File
.I <out.prefix>.bam
.B -n
Sort by read names rather than by chromosomal coordinates
.TP
+.B -f
+Use
+.I <out.prefix>
+as the full output path and do not append
+.I .bam
+suffix.
+.TP
.BI -m \ INT
Approximately the maximum required memory. [500000000]
.RE