From: Petr Danecek Date: Tue, 12 Mar 2013 13:25:17 +0000 (+0000) Subject: Merge remote branch 'remotes/master/master' into fb-annots X-Git-Url: https://git.donarmstrong.com/?p=samtools.git;a=commitdiff_plain;h=60e0a8467ddbd0b89f15d201dcfe10c8796552b2;hp=6842e4470dcbd381d0893690b7d07344fd08e831 Merge remote branch 'remotes/master/master' into fb-annots --- diff --git a/Makefile b/Makefile index dbd90af..2f51bfc 100644 --- a/Makefile +++ b/Makefile @@ -9,7 +9,8 @@ LOBJS= bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \ AOBJS= bam_tview.o bam_plcmd.o sam_view.o \ bam_rmdup.o bam_rmdupse.o bam_mate.o bam_stat.o bam_color.o \ bamtk.o kaln.o bam2bcf.o bam2bcf_indel.o errmod.o sample.o \ - cut_target.o phase.o bam2depth.o padding.o bedcov.o bamshuf.o + cut_target.o phase.o bam2depth.o padding.o bedcov.o bamshuf.o \ + bam_tview_curses.o bam_tview_html.o PROG= samtools INCLUDES= -I. SUBDIRS= . bcftools misc @@ -46,10 +47,10 @@ samtools:lib-recur $(AOBJS) $(CC) $(CFLAGS) -o $@ $(AOBJS) $(LDFLAGS) libbam.a -Lbcftools -lbcf $(LIBPATH) $(LIBCURSES) -lm -lz -lpthread razip:razip.o razf.o $(KNETFILE_O) - $(CC) $(CFLAGS) -o $@ razf.o razip.o $(KNETFILE_O) -lz + $(CC) $(CFLAGS) -o $@ $^ -lz bgzip:bgzip.o bgzf.o $(KNETFILE_O) - $(CC) $(CFLAGS) -o $@ bgzf.o bgzip.o $(KNETFILE_O) -lz -lpthread + $(CC) $(CFLAGS) -o $@ $^ -lz -lpthread bgzf.o:bgzf.c bgzf.h $(CC) -c $(CFLAGS) $(DFLAGS) -DBGZF_CACHE $(INCLUDES) bgzf.c -o $@ @@ -62,7 +63,9 @@ bam_pileup.o:bam.h razf.h ksort.h bam_plcmd.o:bam.h faidx.h bcftools/bcf.h bam2bcf.h bam_index.o:bam.h khash.h ksort.h razf.h bam_endian.h bam_lpileup.o:bam.h ksort.h -bam_tview.o:bam.h faidx.h +bam_tview.o:bam.h faidx.h bam_tview.h +bam_tview_curses.o:bam.h faidx.h bam_tview.h +bam_tview_html.o:bam.h faidx.h bam_tview.h bam_sort.o:bam.h ksort.h razf.h bam_md.o:bam.h faidx.h sam_header.o:sam_header.h khash.h diff --git a/bam2depth.c b/bam2depth.c index 87a4c5b..02311ef 100644 --- a/bam2depth.c +++ b/bam2depth.c @@ -32,37 +32,58 @@ static int read_bam(void *data, bam1_t *b) // read level filters better go here return ret; } +int read_file_list(const char *file_list,int *n,char **argv[]); + #ifdef _MAIN_BAM2DEPTH int main(int argc, char *argv[]) #else int main_depth(int argc, char *argv[]) #endif { - int i, n, tid, beg, end, pos, *n_plp, baseQ = 0, mapQ = 0, min_len = 0; + int i, n, tid, beg, end, pos, *n_plp, baseQ = 0, mapQ = 0, min_len = 0, nfiles; const bam_pileup1_t **plp; char *reg = 0; // specified region void *bed = 0; // BED data structure + char *file_list = NULL, **fn = NULL; bam_header_t *h = 0; // BAM header of the 1st input aux_t **data; bam_mplp_t mplp; // parse the command line - while ((n = getopt(argc, argv, "r:b:q:Q:l:")) >= 0) { + while ((n = getopt(argc, argv, "r:b:q:Q:l:f:")) >= 0) { switch (n) { case 'l': min_len = atoi(optarg); break; // minimum query length case 'r': reg = strdup(optarg); break; // parsing a region requires a BAM header case 'b': bed = bed_read(optarg); break; // BED or position list file can be parsed now case 'q': baseQ = atoi(optarg); break; // base quality threshold case 'Q': mapQ = atoi(optarg); break; // mapping quality threshold + case 'f': file_list = optarg; break; } } - if (optind == argc) { - fprintf(stderr, "Usage: bam2depth [-r reg] [-q baseQthres] [-Q mapQthres] [-l minQLen] [-b in.bed] [...]\n"); + if (optind == argc && !file_list) { + fprintf(stderr, "\n"); + fprintf(stderr, "Usage: samtools depth [options] in1.bam [in2.bam [...]]\n"); + fprintf(stderr, "Options:\n"); + fprintf(stderr, " -b list of positions or regions\n"); + fprintf(stderr, " -f list of input BAM filenames, one per line [null]\n"); + fprintf(stderr, " -l minQLen\n"); + fprintf(stderr, " -q base quality threshold\n"); + fprintf(stderr, " -Q mapping quality threshold\n"); + fprintf(stderr, " -r region\n"); + fprintf(stderr, "\n"); return 1; } // initialize the auxiliary data structures - n = argc - optind; // the number of BAMs on the command line + if (file_list) + { + if ( read_file_list(file_list,&nfiles,&fn) ) return 1; + n = nfiles; + argv = fn; + optind = 0; + } + else + n = argc - optind; // the number of BAMs on the command line data = calloc(n, sizeof(void*)); // data[i] for the i-th input beg = 0; end = 1<<30; tid = -1; // set the default region for (i = 0; i < n; ++i) { @@ -113,5 +134,10 @@ int main_depth(int argc, char *argv[]) } free(data); free(reg); if (bed) bed_destroy(bed); + if ( file_list ) + { + for (i=0; i #include #include +#include +#include #include "sam.h" #include "faidx.h" #include "kstring.h" @@ -80,6 +82,7 @@ int bed_overlap(const void *_h, const char *chr, int beg, int end); typedef struct { int max_mq, min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth, fmt_flag; + int rflag_require, rflag_filter; int openQ, extQ, tandemQ, min_support; // for indels double min_frac; // for indels char *reg, *pl_list; @@ -117,6 +120,8 @@ static int mplp_func(void *data, bam1_t *b) skip = 1; continue; } + if (ma->conf->rflag_require && !(ma->conf->rflag_require&b->core.flag)) { skip = 1; continue; } + if (ma->conf->rflag_filter && ma->conf->rflag_filter&b->core.flag) { skip = 1; continue; } if (ma->conf->bed) { // test overlap skip = !bed_overlap(ma->conf->bed, ma->h->target_name[b->core.tid], b->core.pos, bam_calend(&b->core, bam1_cigar(b))); if (skip) continue; @@ -397,11 +402,15 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) } #define MAX_PATH_LEN 1024 -static int read_file_list(const char *file_list,int *n,char **argv[]) +int read_file_list(const char *file_list,int *n,char **argv[]) { char buf[MAX_PATH_LEN]; - int len, nfiles; - char **files; + int len, nfiles = 0; + char **files = NULL; + struct stat sb; + + *n = 0; + *argv = NULL; FILE *fh = fopen(file_list,"r"); if ( !fh ) @@ -410,28 +419,33 @@ static int read_file_list(const char *file_list,int *n,char **argv[]) return 1; } - // Speed is not an issue here, determine the number of files by reading the file twice - nfiles = 0; - while ( fgets(buf,MAX_PATH_LEN,fh) ) nfiles++; - - if ( fseek(fh, 0L, SEEK_SET) ) - { - fprintf(stderr,"%s: %s\n", file_list,strerror(errno)); - return 1; - } - files = calloc(nfiles,sizeof(char*)); nfiles = 0; while ( fgets(buf,MAX_PATH_LEN,fh) ) { + // allow empty lines and trailing spaces len = strlen(buf); while ( len>0 && isspace(buf[len-1]) ) len--; if ( !len ) continue; - files[nfiles] = malloc(sizeof(char)*(len+1)); - strncpy(files[nfiles],buf,len); - files[nfiles][len] = 0; + // check sanity of the file list + buf[len] = 0; + if (stat(buf, &sb) != 0) + { + // no such file, check if it is safe to print its name + int i, safe_to_print = 1; + for (i=0; i= 0) { + static struct option lopts[] = + { + {"rf",1,0,1}, // require flag + {"ff",1,0,2}, // filter flag + {0,0,0,0} + }; + while ((c = getopt_long(argc, argv, "Agf:r:l:M:q:Q:uaRC:BDSd:L:b:P:po:e:h:Im:F:EG:6OsV1:2:",lopts,NULL)) >= 0) { switch (c) { + case 1 : mplp.rflag_require = strtol(optarg,0,0); break; + case 2 : mplp.rflag_filter = strtol(optarg,0,0); break; case 'f': mplp.fai = fai_load(optarg); if (mplp.fai == 0) return 1; @@ -517,7 +539,7 @@ int bam_mpileup(int argc, char *argv[]) fprintf(stderr, " -6 assume the quality is in the Illumina-1.3+ encoding\n"); fprintf(stderr, " -A count anomalous read pairs\n"); fprintf(stderr, " -B disable BAQ computation\n"); - fprintf(stderr, " -b FILE list of input BAM files [null]\n"); + fprintf(stderr, " -b FILE list of input BAM filenames, one per line [null]\n"); fprintf(stderr, " -C INT parameter for adjusting mapQ; 0 to disable [0]\n"); fprintf(stderr, " -d INT max per-BAM depth to avoid excessive memory usage [%d]\n", mplp.max_depth); fprintf(stderr, " -E recalculate extended BAQ on the fly thus ignoring existing BQs\n"); @@ -529,6 +551,8 @@ int bam_mpileup(int argc, char *argv[]) fprintf(stderr, " -R ignore RG tags\n"); fprintf(stderr, " -q INT skip alignments with mapQ smaller than INT [%d]\n", mplp.min_mq); fprintf(stderr, " -Q INT skip bases with baseQ/BAQ smaller than INT [%d]\n", mplp.min_baseQ); + fprintf(stderr, " --rf INT required flags: skip reads with mask bits unset []\n"); + fprintf(stderr, " --ff INT filter flags: skip reads with mask bits set []\n"); fprintf(stderr, "\nOutput options:\n\n"); fprintf(stderr, " -D output per-sample DP in BCF (require -g/-u)\n"); fprintf(stderr, " -g generate BCF output (genotype likelihoods)\n"); diff --git a/bam_sort.c b/bam_sort.c index 7d00cd1..c46bce3 100644 --- a/bam_sort.c +++ b/bam_sort.c @@ -434,12 +434,13 @@ static int sort_blocks(int n_files, size_t k, bam1_p *buf, const char *prefix, c @param prefix prefix of the output and the temporary files; upon sucessess, prefix.bam will be written. @param max_mem approxiate maximum memory (very inaccurate) + @param full_path the given output path is the full path and not just the prefix @discussion It may create multiple temporary subalignment files and then merge them by calling bam_merge_core(). This function is NOT thread safe. */ -void bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size_t _max_mem, int is_stdout, int n_threads, int level) +void bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size_t _max_mem, int is_stdout, int n_threads, int level, int full_path) { int ret, i, n_files = 0; size_t mem, max_k, k, max_mem; @@ -447,6 +448,8 @@ void bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size bamFile fp; bam1_t *b, **buf; char *fnout = 0; + char const *suffix = ".bam"; + if (full_path) suffix += 4; if (n_threads < 2) n_threads = 1; g_is_by_qname = is_by_qname; @@ -489,7 +492,7 @@ void bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size // output file name fnout = calloc(strlen(prefix) + 20, 1); if (is_stdout) sprintf(fnout, "-"); - else sprintf(fnout, "%s.bam", prefix); + else sprintf(fnout, "%s%s", prefix, suffix); // write the final output if (n_files == 0) { // a single block char mode[8]; @@ -504,7 +507,7 @@ void bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size fns = (char**)calloc(n_files, sizeof(char*)); for (i = 0; i < n_files; ++i) { fns[i] = (char*)calloc(strlen(prefix) + 20, 1); - sprintf(fns[i], "%s.%.4d.bam", prefix, i); + sprintf(fns[i], "%s.%.4d%s", prefix, i, suffix); } bam_merge_core2(is_by_qname, fnout, 0, n_files, fns, 0, 0, n_threads, level); for (i = 0; i < n_files; ++i) { @@ -527,15 +530,16 @@ void bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size void bam_sort_core(int is_by_qname, const char *fn, const char *prefix, size_t max_mem) { - bam_sort_core_ext(is_by_qname, fn, prefix, max_mem, 0, 0, -1); + bam_sort_core_ext(is_by_qname, fn, prefix, max_mem, 0, 0, -1, 0); } int bam_sort(int argc, char *argv[]) { size_t max_mem = 768<<20; // 512MB - int c, is_by_qname = 0, is_stdout = 0, n_threads = 0, level = -1; - while ((c = getopt(argc, argv, "nom:@:l:")) >= 0) { + int c, is_by_qname = 0, is_stdout = 0, n_threads = 0, level = -1, full_path = 0; + while ((c = getopt(argc, argv, "fnom:@:l:")) >= 0) { switch (c) { + case 'f': full_path = 1; break; case 'o': is_stdout = 1; break; case 'n': is_by_qname = 1; break; case 'm': { @@ -554,6 +558,7 @@ int bam_sort(int argc, char *argv[]) fprintf(stderr, "\n"); fprintf(stderr, "Usage: samtools sort [options] \n\n"); fprintf(stderr, "Options: -n sort by read name\n"); + fprintf(stderr, " -f use as full file name instead of prefix\n"); fprintf(stderr, " -o final output to stdout\n"); fprintf(stderr, " -l INT compression level, from 0 to 9 [-1]\n"); fprintf(stderr, " -@ INT number of sorting and compression threads [1]\n"); @@ -561,6 +566,6 @@ int bam_sort(int argc, char *argv[]) fprintf(stderr, "\n"); return 1; } - bam_sort_core_ext(is_by_qname, argv[optind], argv[optind+1], max_mem, is_stdout, n_threads, level); + bam_sort_core_ext(is_by_qname, argv[optind], argv[optind+1], max_mem, is_stdout, n_threads, level, full_path); return 0; } diff --git a/bam_tview.c b/bam_tview.c index f8a1f2c..06d5e33 100644 --- a/bam_tview.c +++ b/bam_tview.c @@ -1,67 +1,81 @@ -#undef _HAVE_CURSES - -#if _CURSES_LIB == 0 -#elif _CURSES_LIB == 1 -#include -#ifndef NCURSES_VERSION -#warning "_CURSES_LIB=1 but NCURSES_VERSION not defined; tview is NOT compiled" -#else -#define _HAVE_CURSES -#endif -#elif _CURSES_LIB == 2 -#include -#define _HAVE_CURSES -#else -#warning "_CURSES_LIB is not 0, 1 or 2; tview is NOT compiled" -#endif - -#ifdef _HAVE_CURSES -#include #include -#include -#include -#include -#include "bam.h" -#include "faidx.h" -#include "bam2bcf.h" -#include "sam_header.h" -#include "khash.h" - -KHASH_MAP_INIT_STR(kh_rg, const char *) +#include "bam_tview.h" -char bam_aux_getCEi(bam1_t *b, int i); -char bam_aux_getCSi(bam1_t *b, int i); -char bam_aux_getCQi(bam1_t *b, int i); - -#define TV_MIN_ALNROW 2 -#define TV_MAX_GOTO 40 -#define TV_LOW_MAPQ 10 +int base_tv_init(tview_t* tv,const char *fn, const char *fn_fa, const char *samples) + { + assert(tv!=NULL); + assert(fn!=NULL); + tv->mrow = 24; tv->mcol = 80; + tv->color_for = TV_COLOR_MAPQ; + tv->is_dot = 1; + + tv->fp = bam_open(fn, "r"); + if(tv->fp==0) + { + fprintf(stderr,"bam_open %s. %s\n", fn,fn_fa); + exit(EXIT_FAILURE); + } + bgzf_set_cache_size(tv->fp, 8 * 1024 *1024); + assert(tv->fp); + + tv->header = bam_header_read(tv->fp); + if(tv->header==0) + { + fprintf(stderr,"Cannot read '%s'.\n", fn); + exit(EXIT_FAILURE); + } + tv->idx = bam_index_load(fn); + if (tv->idx == 0) + { + fprintf(stderr,"Cannot read index for '%s'.\n", fn); + exit(EXIT_FAILURE); + } + tv->lplbuf = bam_lplbuf_init(tv_pl_func, tv); + if (fn_fa) tv->fai = fai_load(fn_fa); + tv->bca = bcf_call_init(0.83, 13); + tv->ins = 1; -#define TV_COLOR_MAPQ 0 -#define TV_COLOR_BASEQ 1 -#define TV_COLOR_NUCL 2 -#define TV_COLOR_COL 3 -#define TV_COLOR_COLQ 4 + if ( samples ) + { + if ( !tv->header->dict ) tv->header->dict = sam_header_parse2(tv->header->text); + void *iter = tv->header->dict; + const char *key, *val; + int n = 0; + tv->rg_hash = kh_init(kh_rg); + while ( (iter = sam_header2key_val(iter, "RG","ID","SM", &key, &val)) ) + { + if ( !strcmp(samples,key) || (val && !strcmp(samples,val)) ) + { + khiter_t k = kh_get(kh_rg, tv->rg_hash, key); + if ( k != kh_end(tv->rg_hash) ) continue; + int ret; + k = kh_put(kh_rg, tv->rg_hash, key, &ret); + kh_value(tv->rg_hash, k) = val; + n++; + } + } + if ( !n ) + { + fprintf(stderr,"The sample or read group \"%s\" not present.\n", samples); + exit(EXIT_FAILURE); + } + } -#define TV_BASE_NUCL 0 -#define TV_BASE_COLOR_SPACE 1 + return 0; + } -typedef struct { - int mrow, mcol; - WINDOW *wgoto, *whelp; - bam_index_t *idx; - bam_lplbuf_t *lplbuf; - bam_header_t *header; - bamFile fp; - int curr_tid, left_pos; - faidx_t *fai; - bcf_callaux_t *bca; +void base_tv_destroy(tview_t* tv) + { + bam_lplbuf_destroy(tv->lplbuf); + bcf_call_destroy(tv->bca); + bam_index_destroy(tv->idx); + if (tv->fai) fai_destroy(tv->fai); + free(tv->ref); + bam_header_destroy(tv->header); + bam_close(tv->fp); + } - int ccol, last_pos, row_shift, base_for, color_for, is_dot, l_ref, ins, no_skip, show_name; - char *ref; - khash_t(kh_rg) *rg_hash; -} tview_t; int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data) { @@ -73,11 +87,11 @@ int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void // print referece rb = (tv->ref && pos - tv->left_pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N'; for (i = tv->last_pos + 1; i < pos; ++i) { - if (i%10 == 0 && tv->mcol - tv->ccol >= 10) mvprintw(0, tv->ccol, "%-d", i+1); + if (i%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", i+1); c = tv->ref? tv->ref[i - tv->left_pos] : 'N'; - mvaddch(1, tv->ccol++, c); + tv->my_mvaddch(tv,1, tv->ccol++, c); } - if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) mvprintw(0, tv->ccol, "%-d", pos+1); + if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", pos+1); { // call consensus bcf_callret1_t bcr; int qsum[4], a1, a2, tmp; @@ -95,15 +109,15 @@ int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void else if (p[2] < p[1] && p[2] < p[0]) call = (1<my_underline(tv); c = ",ACMGRSVTWYHKDBN"[call>>16&0xf]; i = (call&0xffff)/10+1; if (i > 4) i = 4; - attr |= COLOR_PAIR(i); + attr |= tv->my_colorpair(tv,i); if (c == toupper(rb)) c = '.'; - attron(attr); - mvaddch(2, tv->ccol, c); - attroff(attr); + tv->my_attron(tv,attr); + tv->my_mvaddch(tv,2, tv->ccol, c); + tv->my_attroff(tv,attr); if(tv->ins) { // calculate maximum insert for (i = 0; i < n; ++i) { @@ -153,18 +167,18 @@ int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void int x; attr = 0; if (((p->b->core.flag&BAM_FPAIRED) && !(p->b->core.flag&BAM_FPROPER_PAIR)) - || (p->b->core.flag & BAM_FSECONDARY)) attr |= A_UNDERLINE; + || (p->b->core.flag & BAM_FSECONDARY)) attr |= tv->my_underline(tv); if (tv->color_for == TV_COLOR_BASEQ) { x = bam1_qual(p->b)[p->qpos]/10 + 1; if (x > 4) x = 4; - attr |= COLOR_PAIR(x); + attr |= tv->my_colorpair(tv,x); } else if (tv->color_for == TV_COLOR_MAPQ) { x = p->b->core.qual/10 + 1; if (x > 4) x = 4; - attr |= COLOR_PAIR(x); + attr |= tv->my_colorpair(tv,x); } else if (tv->color_for == TV_COLOR_NUCL) { x = bam_nt16_nt4_table[bam1_seqi(bam1_seq(p->b), p->qpos)] + 5; - attr |= COLOR_PAIR(x); + attr |= tv->my_colorpair(tv,x); } else if(tv->color_for == TV_COLOR_COL) { x = 0; switch(bam_aux_getCSi(p->b, p->qpos)) { @@ -176,109 +190,33 @@ int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void default: x = bam_nt16_nt4_table[bam1_seqi(bam1_seq(p->b), p->qpos)]; break; } x+=5; - attr |= COLOR_PAIR(x); + attr |= tv->my_colorpair(tv,x); } else if(tv->color_for == TV_COLOR_COLQ) { x = bam_aux_getCQi(p->b, p->qpos); if(0 == x) x = bam1_qual(p->b)[p->qpos]; x = x/10 + 1; if (x > 4) x = 4; - attr |= COLOR_PAIR(x); + attr |= tv->my_colorpair(tv,x); } - attron(attr); - mvaddch(row, tv->ccol, bam1_strand(p->b)? tolower(c) : toupper(c)); - attroff(attr); + tv->my_attron(tv,attr); + tv->my_mvaddch(tv,row, tv->ccol, bam1_strand(p->b)? tolower(c) : toupper(c)); + tv->my_attroff(tv,attr); } } c = j? '*' : rb; if (c == '*') { - attr = COLOR_PAIR(8); - attron(attr); - mvaddch(1, tv->ccol++, c); - attroff(attr); - } else mvaddch(1, tv->ccol++, c); + attr = tv->my_colorpair(tv,8); + tv->my_attron(tv,attr); + tv->my_mvaddch(tv,1, tv->ccol++, c); + tv->my_attroff(tv,attr); + } else tv->my_mvaddch(tv,1, tv->ccol++, c); } tv->last_pos = pos; return 0; } -tview_t *tv_init(const char *fn, const char *fn_fa, char *samples) -{ - tview_t *tv = (tview_t*)calloc(1, sizeof(tview_t)); - tv->is_dot = 1; - tv->fp = bam_open(fn, "r"); - bgzf_set_cache_size(tv->fp, 8 * 1024 *1024); - assert(tv->fp); - tv->header = bam_header_read(tv->fp); - tv->idx = bam_index_load(fn); - if (tv->idx == 0) exit(1); - tv->lplbuf = bam_lplbuf_init(tv_pl_func, tv); - if (fn_fa) tv->fai = fai_load(fn_fa); - tv->bca = bcf_call_init(0.83, 13); - tv->ins = 1; - if ( samples ) - { - if ( !tv->header->dict ) tv->header->dict = sam_header_parse2(tv->header->text); - void *iter = tv->header->dict; - const char *key, *val; - int n = 0; - tv->rg_hash = kh_init(kh_rg); - while ( (iter = sam_header2key_val(iter, "RG","ID","SM", &key, &val)) ) - { - if ( !strcmp(samples,key) || (val && !strcmp(samples,val)) ) - { - khiter_t k = kh_get(kh_rg, tv->rg_hash, key); - if ( k != kh_end(tv->rg_hash) ) continue; - int ret; - k = kh_put(kh_rg, tv->rg_hash, key, &ret); - kh_value(tv->rg_hash, k) = val; - n++; - } - } - if ( !n ) - { - fprintf(stderr,"The sample or read group \"%s\" not present.\n", samples); - exit(-1); - } - } - - initscr(); - keypad(stdscr, TRUE); - clear(); - noecho(); - cbreak(); - tv->mrow = 24; tv->mcol = 80; - getmaxyx(stdscr, tv->mrow, tv->mcol); - tv->wgoto = newwin(3, TV_MAX_GOTO + 10, 10, 5); - tv->whelp = newwin(29, 40, 5, 5); - tv->color_for = TV_COLOR_MAPQ; - start_color(); - init_pair(1, COLOR_BLUE, COLOR_BLACK); - init_pair(2, COLOR_GREEN, COLOR_BLACK); - init_pair(3, COLOR_YELLOW, COLOR_BLACK); - init_pair(4, COLOR_WHITE, COLOR_BLACK); - init_pair(5, COLOR_GREEN, COLOR_BLACK); - init_pair(6, COLOR_CYAN, COLOR_BLACK); - init_pair(7, COLOR_YELLOW, COLOR_BLACK); - init_pair(8, COLOR_RED, COLOR_BLACK); - init_pair(9, COLOR_BLUE, COLOR_BLACK); - return tv; -} - -void tv_destroy(tview_t *tv) -{ - delwin(tv->wgoto); delwin(tv->whelp); - endwin(); - bam_lplbuf_destroy(tv->lplbuf); - bcf_call_destroy(tv->bca); - bam_index_destroy(tv->idx); - if (tv->fai) fai_destroy(tv->fai); - free(tv->ref); - bam_header_destroy(tv->header); - bam_close(tv->fp); - free(tv); -} int tv_fetch_func(const bam1_t *b, void *data) { @@ -302,10 +240,11 @@ int tv_fetch_func(const bam1_t *b, void *data) return 0; } -int tv_draw_aln(tview_t *tv, int tid, int pos) -{ +int base_draw_aln(tview_t *tv, int tid, int pos) + { + assert(tv!=NULL); // reset - clear(); + tv->my_clear(tv); tv->curr_tid = tid; tv->left_pos = pos; tv->last_pos = tv->left_pos - 1; tv->ccol = 0; @@ -313,7 +252,10 @@ int tv_draw_aln(tview_t *tv, int tid, int pos) if (tv->fai) { char *str; if (tv->ref) free(tv->ref); + assert(tv->curr_tid>=0); + str = (char*)calloc(strlen(tv->header->target_name[tv->curr_tid]) + 30, 1); + assert(str!=NULL); sprintf(str, "%s:%d-%d", tv->header->target_name[tv->curr_tid], tv->left_pos + 1, tv->left_pos + tv->mcol); tv->ref = fai_fetch(tv->fai, str, &tv->l_ref); free(str); @@ -325,144 +267,26 @@ int tv_draw_aln(tview_t *tv, int tid, int pos) while (tv->ccol < tv->mcol) { int pos = tv->last_pos + 1; - if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) mvprintw(0, tv->ccol, "%-d", pos+1); - mvaddch(1, tv->ccol++, (tv->ref && pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N'); + if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) tv->my_mvprintw(tv,0, tv->ccol, "%-d", pos+1); + tv->my_mvaddch(tv,1, tv->ccol++, (tv->ref && pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N'); ++tv->last_pos; } return 0; } -static void tv_win_goto(tview_t *tv, int *tid, int *pos) -{ - char str[256], *p; - int i, l = 0; - wborder(tv->wgoto, '|', '|', '-', '-', '+', '+', '+', '+'); - mvwprintw(tv->wgoto, 1, 2, "Goto: "); - for (;;) { - int c = wgetch(tv->wgoto); - wrefresh(tv->wgoto); - if (c == KEY_BACKSPACE || c == '\010' || c == '\177') { - if(l > 0) --l; - } else if (c == KEY_ENTER || c == '\012' || c == '\015') { - int _tid = -1, _beg, _end; - if (str[0] == '=') { - _beg = strtol(str+1, &p, 10) - 1; - if (_beg > 0) { - *pos = _beg; - return; - } - } else { - bam_parse_region(tv->header, str, &_tid, &_beg, &_end); - if (_tid >= 0) { - *tid = _tid; *pos = _beg; - return; - } - } - } else if (isgraph(c)) { - if (l < TV_MAX_GOTO) str[l++] = c; - } else if (c == '\027') l = 0; - else if (c == '\033') return; - str[l] = '\0'; - for (i = 0; i < TV_MAX_GOTO; ++i) mvwaddch(tv->wgoto, 1, 8 + i, ' '); - mvwprintw(tv->wgoto, 1, 8, "%s", str); - } -} -static void tv_win_help(tview_t *tv) { - int r = 1; - WINDOW *win = tv->whelp; - wborder(win, '|', '|', '-', '-', '+', '+', '+', '+'); - mvwprintw(win, r++, 2, " -=- Help -=- "); - r++; - mvwprintw(win, r++, 2, "? This window"); - mvwprintw(win, r++, 2, "Arrows Small scroll movement"); - mvwprintw(win, r++, 2, "h,j,k,l Small scroll movement"); - mvwprintw(win, r++, 2, "H,J,K,L Large scroll movement"); - mvwprintw(win, r++, 2, "ctrl-H Scroll 1k left"); - mvwprintw(win, r++, 2, "ctrl-L Scroll 1k right"); - mvwprintw(win, r++, 2, "space Scroll one screen"); - mvwprintw(win, r++, 2, "backspace Scroll back one screen"); - mvwprintw(win, r++, 2, "g Go to specific location"); - mvwprintw(win, r++, 2, "m Color for mapping qual"); - mvwprintw(win, r++, 2, "n Color for nucleotide"); - mvwprintw(win, r++, 2, "b Color for base quality"); - mvwprintw(win, r++, 2, "c Color for cs color"); - mvwprintw(win, r++, 2, "z Color for cs qual"); - mvwprintw(win, r++, 2, ". Toggle on/off dot view"); - mvwprintw(win, r++, 2, "s Toggle on/off ref skip"); - mvwprintw(win, r++, 2, "r Toggle on/off rd name"); - mvwprintw(win, r++, 2, "N Turn on nt view"); - mvwprintw(win, r++, 2, "C Turn on cs view"); - mvwprintw(win, r++, 2, "i Toggle on/off ins"); - mvwprintw(win, r++, 2, "q Exit"); - r++; - mvwprintw(win, r++, 2, "Underline: Secondary or orphan"); - mvwprintw(win, r++, 2, "Blue: 0-9 Green: 10-19"); - mvwprintw(win, r++, 2, "Yellow: 20-29 White: >=30"); - wrefresh(win); - wgetch(win); -} -void tv_loop(tview_t *tv) -{ - int tid, pos; - tid = tv->curr_tid; pos = tv->left_pos; - while (1) { - int c = getch(); - switch (c) { - case '?': tv_win_help(tv); break; - case '\033': - case 'q': goto end_loop; - case '/': - case 'g': tv_win_goto(tv, &tid, &pos); break; - case 'm': tv->color_for = TV_COLOR_MAPQ; break; - case 'b': tv->color_for = TV_COLOR_BASEQ; break; - case 'n': tv->color_for = TV_COLOR_NUCL; break; - case 'c': tv->color_for = TV_COLOR_COL; break; - case 'z': tv->color_for = TV_COLOR_COLQ; break; - case 's': tv->no_skip = !tv->no_skip; break; - case 'r': tv->show_name = !tv->show_name; break; - case KEY_LEFT: - case 'h': --pos; break; - case KEY_RIGHT: - case 'l': ++pos; break; - case KEY_SLEFT: - case 'H': pos -= 20; break; - case KEY_SRIGHT: - case 'L': pos += 20; break; - case '.': tv->is_dot = !tv->is_dot; break; - case 'N': tv->base_for = TV_BASE_NUCL; break; - case 'C': tv->base_for = TV_BASE_COLOR_SPACE; break; - case 'i': tv->ins = !tv->ins; break; - case '\010': pos -= 1000; break; - case '\014': pos += 1000; break; - case ' ': pos += tv->mcol; break; - case KEY_UP: - case 'j': --tv->row_shift; break; - case KEY_DOWN: - case 'k': ++tv->row_shift; break; - case KEY_BACKSPACE: - case '\177': pos -= tv->mcol; break; - case KEY_RESIZE: getmaxyx(stdscr, tv->mrow, tv->mcol); break; - default: continue; - } - if (pos < 0) pos = 0; - if (tv->row_shift < 0) tv->row_shift = 0; - tv_draw_aln(tv, tid, pos); - } -end_loop: - return; -} -void error(const char *format, ...) +static void error(const char *format, ...) { if ( !format ) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: bamtk tview [options] [ref.fasta]\n"); fprintf(stderr, "Options:\n"); + fprintf(stderr, " -d display output as (H)tml or (C)urses or (T)ext \n"); fprintf(stderr, " -p chr:pos go directly to this position\n"); - fprintf(stderr, " -s STR display only reads from this sample or grou\n"); + fprintf(stderr, " -s STR display only reads from this sample or group\n"); fprintf(stderr, "\n\n"); } else @@ -475,38 +299,70 @@ void error(const char *format, ...) exit(-1); } +enum dipsay_mode {display_ncurses,display_html,display_text}; +extern tview_t* curses_tv_init(const char *fn, const char *fn_fa, const char *samples); +extern tview_t* html_tv_init(const char *fn, const char *fn_fa, const char *samples); +extern tview_t* text_tv_init(const char *fn, const char *fn_fa, const char *samples); int bam_tview_main(int argc, char *argv[]) -{ - tview_t *tv; + { + int view_mode=display_ncurses; + tview_t* tv=NULL; char *samples=NULL, *position=NULL; int c; - while ((c = getopt(argc, argv, "s:p:")) >= 0) { + while ((c = getopt(argc, argv, "s:p:d:")) >= 0) { switch (c) { case 's': samples=optarg; break; case 'p': position=optarg; break; + case 'd': + { + switch(optarg[0]) + { + case 'H': case 'h': view_mode=display_html;break; + case 'T': case 't': view_mode=display_text;break; + case 'C': case 'c': view_mode=display_ncurses;break; + default: view_mode=display_ncurses;break; + } + break; + } default: error(NULL); } } if (argc==optind) error(NULL); - tv = tv_init(argv[optind], (optind+1>=argc)? 0 : argv[optind+1], samples); - if ( position ) - { - int _tid = -1, _beg, _end; - bam_parse_region(tv->header, position, &_tid, &_beg, &_end); - if (_tid >= 0) { tv->curr_tid = _tid; tv->left_pos = _beg; } - } - tv_draw_aln(tv, tv->curr_tid, tv->left_pos); - tv_loop(tv); - tv_destroy(tv); - return 0; -} -#else // #ifdef _HAVE_CURSES -#include -#warning "No curses library is available; tview is disabled." -int bam_tview_main(int argc, char *argv[]) -{ - fprintf(stderr, "[bam_tview_main] The ncurses library is unavailable; tview is not compiled.\n"); - return 1; -} -#endif // #ifdef _HAVE_CURSES + + switch(view_mode) + { + case display_ncurses: + { + tv = curses_tv_init(argv[optind], (optind+1>=argc)? 0 : argv[optind+1], samples); + break; + } + case display_text: + { + tv = text_tv_init(argv[optind], (optind+1>=argc)? 0 : argv[optind+1], samples); + break; + } + case display_html: + { + tv = html_tv_init(argv[optind], (optind+1>=argc)? 0 : argv[optind+1], samples); + break; + } + } + if(tv==NULL) + { + error("cannot create view"); + return EXIT_FAILURE; + } + + if ( position ) + { + int _tid = -1, _beg, _end; + bam_parse_region(tv->header, position, &_tid, &_beg, &_end); + if (_tid >= 0) { tv->curr_tid = _tid; tv->left_pos = _beg; } + } + tv->my_drawaln(tv, tv->curr_tid, tv->left_pos); + tv->my_loop(tv); + tv->my_destroy(tv); + + return EXIT_SUCCESS; + } diff --git a/bam_tview.h b/bam_tview.h new file mode 100644 index 0000000..80f0464 --- /dev/null +++ b/bam_tview.h @@ -0,0 +1,75 @@ +#ifndef BAM_TVIEW_H +#define BAM_TVIEW_H + +#include +#include +#include +#include +#include +#include +#include "bam.h" +#include "faidx.h" +#include "bam2bcf.h" +#include "sam_header.h" +#include "khash.h" + +KHASH_MAP_INIT_STR(kh_rg, const char *) + +typedef struct AbstractTview { + int mrow, mcol; + + bam_index_t *idx; + bam_lplbuf_t *lplbuf; + bam_header_t *header; + bamFile fp; + int curr_tid, left_pos; + faidx_t *fai; + bcf_callaux_t *bca; + + int ccol, last_pos, row_shift, base_for, color_for, is_dot, l_ref, ins, no_skip, show_name; + char *ref; + khash_t(kh_rg) *rg_hash; + /* callbacks */ + void (*my_destroy)(struct AbstractTview* ); + void (*my_mvprintw)(struct AbstractTview* ,int,int,const char*,...); + void (*my_mvaddch)(struct AbstractTview*,int,int,int); + void (*my_attron)(struct AbstractTview*,int); + void (*my_attroff)(struct AbstractTview*,int); + void (*my_clear)(struct AbstractTview*); + int (*my_colorpair)(struct AbstractTview*,int); + int (*my_drawaln)(struct AbstractTview*,int,int); + int (*my_loop)(struct AbstractTview*); + int (*my_underline)(struct AbstractTview*); +} tview_t; + + +char bam_aux_getCEi(bam1_t *b, int i); +char bam_aux_getCSi(bam1_t *b, int i); +char bam_aux_getCQi(bam1_t *b, int i); + +#define TV_MIN_ALNROW 2 +#define TV_MAX_GOTO 40 +#define TV_LOW_MAPQ 10 + +#define TV_COLOR_MAPQ 0 +#define TV_COLOR_BASEQ 1 +#define TV_COLOR_NUCL 2 +#define TV_COLOR_COL 3 +#define TV_COLOR_COLQ 4 + +#define TV_BASE_NUCL 0 +#define TV_BASE_COLOR_SPACE 1 + +int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data); +int base_tv_init(tview_t*,const char *fn, const char *fn_fa, const char *samples); +void base_tv_destroy(tview_t*); +int base_draw_aln(tview_t *tv, int tid, int pos); + +typedef struct Tixel + { + int ch; + int attributes; + }tixel_t; + +#endif + diff --git a/bam_tview_curses.c b/bam_tview_curses.c new file mode 100644 index 0000000..4fdd1fb --- /dev/null +++ b/bam_tview_curses.c @@ -0,0 +1,297 @@ +#undef _HAVE_CURSES + +#if _CURSES_LIB == 0 +#elif _CURSES_LIB == 1 +#include +#ifndef NCURSES_VERSION +#warning "_CURSES_LIB=1 but NCURSES_VERSION not defined; tview is NOT compiled" +#else +#define _HAVE_CURSES +#endif +#elif _CURSES_LIB == 2 +#include +#define _HAVE_CURSES +#else +#warning "_CURSES_LIB is not 0, 1 or 2; tview is NOT compiled" +#endif + + +#include "bam_tview.h" + +#ifdef _HAVE_CURSES + + + +typedef struct CursesTview { + tview_t view; + WINDOW *wgoto, *whelp; + } curses_tview_t; + + + + +#define FROM_TV(ptr) ((curses_tview_t*)ptr) + +static void curses_destroy(tview_t* base) + { + curses_tview_t* tv=(curses_tview_t*)base; + + + delwin(tv->wgoto); delwin(tv->whelp); + endwin(); + + base_tv_destroy(base); + + free(tv); + } + +/* + void (*my_mvprintw)(struct AbstractTview* ,int,int,const char*,...); + void (*my_)(struct AbstractTview*,int,int,int); + void (*my_attron)(struct AbstractTview*,int); + void (*my_attroff)(struct AbstractTview*,int); + void (*my_clear)(struct AbstractTview*); + int (*my_colorpair)(struct AbstractTview*,int); +*/ + +static void curses_mvprintw(struct AbstractTview* tv,int y ,int x,const char* fmt,...) + { + unsigned int size=tv->mcol+2; + char* str=malloc(size); + if(str==0) exit(EXIT_FAILURE); + va_list argptr; + va_start(argptr, fmt); + vsnprintf(str,size, fmt, argptr); + va_end(argptr); + mvprintw(y,x,str); + free(str); + } + +static void curses_mvaddch(struct AbstractTview* tv,int y,int x,int ch) + { + mvaddch(y,x,ch); + } + +static void curses_attron(struct AbstractTview* tv,int flag) + { + attron(flag); + } +static void curses_attroff(struct AbstractTview* tv,int flag) + { + attroff(flag); + } +static void curses_clear(struct AbstractTview* tv) + { + clear(); + } + +static int curses_colorpair(struct AbstractTview* tv,int flag) + { + return COLOR_PAIR(flag); + } + +static int curses_drawaln(struct AbstractTview* tv, int tid, int pos) + { + return base_draw_aln(tv, tid, pos); + } + + + +static void tv_win_goto(curses_tview_t *tv, int *tid, int *pos) + { + char str[256], *p; + int i, l = 0; + tview_t *base=(tview_t*)tv; + wborder(tv->wgoto, '|', '|', '-', '-', '+', '+', '+', '+'); + mvwprintw(tv->wgoto, 1, 2, "Goto: "); + for (;;) { + int c = wgetch(tv->wgoto); + wrefresh(tv->wgoto); + if (c == KEY_BACKSPACE || c == '\010' || c == '\177') { + if(l > 0) --l; + } else if (c == KEY_ENTER || c == '\012' || c == '\015') { + int _tid = -1, _beg, _end; + if (str[0] == '=') { + _beg = strtol(str+1, &p, 10) - 1; + if (_beg > 0) { + *pos = _beg; + return; + } + } else { + bam_parse_region(base->header, str, &_tid, &_beg, &_end); + if (_tid >= 0) { + *tid = _tid; *pos = _beg; + return; + } + } + } else if (isgraph(c)) { + if (l < TV_MAX_GOTO) str[l++] = c; + } else if (c == '\027') l = 0; + else if (c == '\033') return; + str[l] = '\0'; + for (i = 0; i < TV_MAX_GOTO; ++i) mvwaddch(tv->wgoto, 1, 8 + i, ' '); + mvwprintw(tv->wgoto, 1, 8, "%s", str); + } +} + + + + +static void tv_win_help(curses_tview_t *tv) { + int r = 1; + tview_t* base=(tview_t*)base; + WINDOW *win = tv->whelp; + wborder(win, '|', '|', '-', '-', '+', '+', '+', '+'); + mvwprintw(win, r++, 2, " -=- Help -=- "); + r++; + mvwprintw(win, r++, 2, "? This window"); + mvwprintw(win, r++, 2, "Arrows Small scroll movement"); + mvwprintw(win, r++, 2, "h,j,k,l Small scroll movement"); + mvwprintw(win, r++, 2, "H,J,K,L Large scroll movement"); + mvwprintw(win, r++, 2, "ctrl-H Scroll 1k left"); + mvwprintw(win, r++, 2, "ctrl-L Scroll 1k right"); + mvwprintw(win, r++, 2, "space Scroll one screen"); + mvwprintw(win, r++, 2, "backspace Scroll back one screen"); + mvwprintw(win, r++, 2, "g Go to specific location"); + mvwprintw(win, r++, 2, "m Color for mapping qual"); + mvwprintw(win, r++, 2, "n Color for nucleotide"); + mvwprintw(win, r++, 2, "b Color for base quality"); + mvwprintw(win, r++, 2, "c Color for cs color"); + mvwprintw(win, r++, 2, "z Color for cs qual"); + mvwprintw(win, r++, 2, ". Toggle on/off dot view"); + mvwprintw(win, r++, 2, "s Toggle on/off ref skip"); + mvwprintw(win, r++, 2, "r Toggle on/off rd name"); + mvwprintw(win, r++, 2, "N Turn on nt view"); + mvwprintw(win, r++, 2, "C Turn on cs view"); + mvwprintw(win, r++, 2, "i Toggle on/off ins"); + mvwprintw(win, r++, 2, "q Exit"); + r++; + mvwprintw(win, r++, 2, "Underline: Secondary or orphan"); + mvwprintw(win, r++, 2, "Blue: 0-9 Green: 10-19"); + mvwprintw(win, r++, 2, "Yellow: 20-29 White: >=30"); + wrefresh(win); + wgetch(win); +} + +static int curses_underline(tview_t* tv) + { + return A_UNDERLINE; + } + +static int curses_loop(tview_t* tv) + { + int tid, pos; + curses_tview_t *CTV=(curses_tview_t *)tv; + tid = tv->curr_tid; pos = tv->left_pos; + while (1) { + int c = getch(); + switch (c) { + case '?': tv_win_help(CTV); break; + case '\033': + case 'q': goto end_loop; + case '/': + case 'g': tv_win_goto(CTV, &tid, &pos); break; + case 'm': tv->color_for = TV_COLOR_MAPQ; break; + case 'b': tv->color_for = TV_COLOR_BASEQ; break; + case 'n': tv->color_for = TV_COLOR_NUCL; break; + case 'c': tv->color_for = TV_COLOR_COL; break; + case 'z': tv->color_for = TV_COLOR_COLQ; break; + case 's': tv->no_skip = !tv->no_skip; break; + case 'r': tv->show_name = !tv->show_name; break; + case KEY_LEFT: + case 'h': --pos; break; + case KEY_RIGHT: + case 'l': ++pos; break; + case KEY_SLEFT: + case 'H': pos -= 20; break; + case KEY_SRIGHT: + case 'L': pos += 20; break; + case '.': tv->is_dot = !tv->is_dot; break; + case 'N': tv->base_for = TV_BASE_NUCL; break; + case 'C': tv->base_for = TV_BASE_COLOR_SPACE; break; + case 'i': tv->ins = !tv->ins; break; + case '\010': pos -= 1000; break; + case '\014': pos += 1000; break; + case ' ': pos += tv->mcol; break; + case KEY_UP: + case 'j': --tv->row_shift; break; + case KEY_DOWN: + case 'k': ++tv->row_shift; break; + case KEY_BACKSPACE: + case '\177': pos -= tv->mcol; break; + case KEY_RESIZE: getmaxyx(stdscr, tv->mrow, tv->mcol); break; + default: continue; + } + if (pos < 0) pos = 0; + if (tv->row_shift < 0) tv->row_shift = 0; + tv->my_drawaln(tv, tid, pos); + } +end_loop: + return 0; +} + + + + +tview_t* curses_tv_init(const char *fn, const char *fn_fa, const char *samples) + { + curses_tview_t *tv = (curses_tview_t*)calloc(1, sizeof(curses_tview_t)); + tview_t* base=(tview_t*)tv; + if(tv==0) + { + fprintf(stderr,"Calloc failed\n"); + return 0; + } + + base_tv_init(base,fn,fn_fa,samples); + /* initialize callbacks */ +#define SET_CALLBACK(fun) base->my_##fun=curses_##fun; + SET_CALLBACK(destroy); + SET_CALLBACK(mvprintw); + SET_CALLBACK(mvaddch); + SET_CALLBACK(attron); + SET_CALLBACK(attroff); + SET_CALLBACK(clear); + SET_CALLBACK(colorpair); + SET_CALLBACK(drawaln); + SET_CALLBACK(loop); + SET_CALLBACK(underline); +#undef SET_CALLBACK + + initscr(); + keypad(stdscr, TRUE); + clear(); + noecho(); + cbreak(); + + getmaxyx(stdscr, base->mrow, base->mcol); + tv->wgoto = newwin(3, TV_MAX_GOTO + 10, 10, 5); + tv->whelp = newwin(29, 40, 5, 5); + + start_color(); + init_pair(1, COLOR_BLUE, COLOR_BLACK); + init_pair(2, COLOR_GREEN, COLOR_BLACK); + init_pair(3, COLOR_YELLOW, COLOR_BLACK); + init_pair(4, COLOR_WHITE, COLOR_BLACK); + init_pair(5, COLOR_GREEN, COLOR_BLACK); + init_pair(6, COLOR_CYAN, COLOR_BLACK); + init_pair(7, COLOR_YELLOW, COLOR_BLACK); + init_pair(8, COLOR_RED, COLOR_BLACK); + init_pair(9, COLOR_BLUE, COLOR_BLACK); + return base; + } + + +#else // #ifdef _HAVE_CURSES +#include +#warning "No curses library is available; tview with curses is disabled." + +extern tview_t* text_tv_init(const char *fn, const char *fn_fa, const char *samples); + +tview_t* curses_tv_init(const char *fn, const char *fn_fa, const char *samples) + { + return text_tv_init(fn,fn_fa,samples); + } +#endif // #ifdef _HAVE_CURSES + + diff --git a/bam_tview_html.c b/bam_tview_html.c new file mode 100644 index 0000000..f52b4c3 --- /dev/null +++ b/bam_tview_html.c @@ -0,0 +1,349 @@ +#include +#include "bam_tview.h" + +#define UNDERLINE_FLAG 10 + +typedef struct HtmlTview { + tview_t view; + int row_count; + tixel_t** screen; + FILE* out; + int attributes;/* color... */ + } html_tview_t; + +#define FROM_TV(ptr) ((html_tview_t*)ptr) + +static void html_destroy(tview_t* base) + { + int i; + html_tview_t* tv=(html_tview_t*)base; + if(tv->screen!=NULL) + { + for(i=0;i< tv->row_count;++i) free(tv->screen[i]); + free(tv->screen); + } + base_tv_destroy(base); + free(tv); + } + +/* + void (*my_mvprintw)(struct AbstractTview* ,int,int,const char*,...); + void (*my_)(struct AbstractTview*,int,int,int); + void (*my_attron)(struct AbstractTview*,int); + void (*my_attroff)(struct AbstractTview*,int); + void (*my_clear)(struct AbstractTview*); + int (*my_colorpair)(struct AbstractTview*,int); +*/ + +static void html_mvprintw(struct AbstractTview* tv,int y ,int x,const char* fmt,...) + { + int i,nchars=0; + unsigned int size=tv->mcol+2; + char* str=malloc(size); + if(str==0) exit(EXIT_FAILURE); + va_list argptr; + va_start(argptr, fmt); + nchars=vsnprintf(str,size, fmt, argptr); + va_end(argptr); + + for(i=0;i< nchars;++i) + { + tv->my_mvaddch(tv,y,x+i,str[i]); + } + free(str); + } + +static void html_mvaddch(struct AbstractTview* tv,int y,int x,int ch) + { + tixel_t* row=NULL; + html_tview_t* ptr=FROM_TV(tv); + if( x >= tv->mcol ) return; //out of screen + while(ptr->row_count<=y) + { + int x; + row=(tixel_t*)calloc(tv->mcol,sizeof(tixel_t)); + if(row==0) exit(EXIT_FAILURE); + for(x=0;xmcol;++x) {row[x].ch=' ';row[x].attributes=0;} + ptr->screen=(tixel_t**)realloc(ptr->screen,sizeof(tixel_t*)*(ptr->row_count+1)); + ptr->screen[ptr->row_count++]=row; + } + row=ptr->screen[y]; + row[x].ch=ch; + row[x].attributes=ptr->attributes; + } + +static void html_attron(struct AbstractTview* tv,int flag) + { + html_tview_t* ptr=FROM_TV(tv); + ptr->attributes |= flag; + + + } + +static void html_attroff(struct AbstractTview* tv,int flag) + { + html_tview_t* ptr=FROM_TV(tv); + ptr->attributes &= ~(flag); + } + +static void html_clear(struct AbstractTview* tv) + { + html_tview_t* ptr=FROM_TV(tv); + if(ptr->screen!=NULL) + { + int i; + for(i=0;i< ptr->row_count;++i) free(ptr->screen[i]); + free(ptr->screen); + ptr->screen=NULL; + } + ptr->row_count=0; + ptr->attributes=0; + } + +static int html_colorpair(struct AbstractTview* tv,int flag) + { + return (1 << (flag)); + } + +static int html_drawaln(struct AbstractTview* tv, int tid, int pos) + { + int y,x; + html_tview_t* ptr=FROM_TV(tv); + html_clear(tv); + base_draw_aln(tv, tid, pos); + fputs("",ptr->out); + fprintf(ptr->out,"%s:%d", + tv->header->target_name[tid], + pos+1 + ); + //style + + fputs("",ptr->out); + + fputs("",ptr->out); + + fprintf(ptr->out,"
%s:%d
", + tv->header->target_name[tid], + pos+1 + ); + + fputs("
",ptr->out);
+    for(y=0;y< ptr->row_count;++y)
+    	{
+    	
+    	for(x=0;x< tv->mcol;++x)
+	    	{
+	    	
+		
+		if(x== 0 || ptr->screen[y][x].attributes != ptr->screen[y][x-1].attributes)
+	    		{
+	    		int css=0;
+			fprintf(ptr->out,"1) fprintf(stderr,"css=%d pow2=%d vs %d\n",css,(1 << (css)),ptr->screen[y][x].attributes);
+	    			if(( (ptr->screen[y][x].attributes) & (1 << (css)))!=0)
+	    				{
+	    				
+	    				fprintf(ptr->out," class='tviewc%s%d'",
+	    					(( (ptr->screen[y][x].attributes) & (1 << (UNDERLINE_FLAG)) )!=0?"u":""),
+	    					css);
+	    				break;
+	    				}
+	    			++css;
+	    			}
+
+
+	    		fputs(">",ptr->out);
+	    		}
+		
+		int ch=ptr->screen[y][x].ch;
+		switch(ch)
+			{
+			case '<': fputs("<",ptr->out);break;
+			case '>': fputs(">",ptr->out);break;
+			case '&': fputs("&",ptr->out);break;
+			default: fputc(ch,ptr->out); break;
+			}
+	    	
+	    	
+	    	if(x+1 == tv->mcol  || ptr->screen[y][x].attributes!=ptr->screen[y][x+1].attributes)
+	    		{
+	    		fputs("",ptr->out);
+	    		}
+	    	}
+    	if(y+1 < ptr->row_count) fputs("
",ptr->out); + } + fputs("
",ptr->out); + return 0; + } + + +#define ANSI_COLOR_RED "\x1b[31m" +#define ANSI_COLOR_GREEN "\x1b[32m" +#define ANSI_COLOR_YELLOW "\x1b[33m" +#define ANSI_COLOR_BLUE "\x1b[34m" +#define ANSI_COLOR_MAGENTA "\x1b[35m" +#define ANSI_COLOR_CYAN "\x1b[36m" +#define ANSI_COLOR_BLACK "\x1b[0m" +#define ANSI_COLOR_RESET ANSI_COLOR_BLACK + +#define ANSI_UNDERLINE_SET "\033[4m" +#define ANSI_UNDERLINE_UNSET "\033[0m" + +static int text_drawaln(struct AbstractTview* tv, int tid, int pos) + { + int y,x; + html_tview_t* ptr=FROM_TV(tv); + html_clear(tv); + base_draw_aln(tv, tid, pos); + int is_term= isatty(fileno(ptr->out)); + + for(y=0;y< ptr->row_count;++y) + { + for(x=0;x< tv->mcol;++x) + { + if(is_term) + { + int css=0; + while(css<32) + { + if(( (ptr->screen[y][x].attributes) & (1 << (css)))!=0) + { + break; + } + ++css; + } + switch(css) + { + //CSS(0, "black"); + case 1: fputs(ANSI_COLOR_BLUE,ptr->out); break; + case 2: fputs(ANSI_COLOR_GREEN,ptr->out); break; + case 3: fputs(ANSI_COLOR_YELLOW,ptr->out); break; + //CSS(4, "black"); + case 5: fputs(ANSI_COLOR_GREEN,ptr->out); break; + case 6: fputs(ANSI_COLOR_CYAN,ptr->out); break; + case 7: fputs(ANSI_COLOR_YELLOW,ptr->out); break; + case 8: fputs(ANSI_COLOR_RED,ptr->out); break; + case 9: fputs(ANSI_COLOR_BLUE,ptr->out); break; + default:break; + } + if(( (ptr->screen[y][x].attributes) & (1 << (UNDERLINE_FLAG)))!=0) + { + fputs(ANSI_UNDERLINE_SET,ptr->out); + } + + } + + + int ch=ptr->screen[y][x].ch; + + fputc(ch,ptr->out); + if(is_term) + { + fputs(ANSI_COLOR_RESET,ptr->out); + if(( (ptr->screen[y][x].attributes) & (1 << (UNDERLINE_FLAG)))!=0) + { + fputs(ANSI_UNDERLINE_UNSET,ptr->out); + } + } + } + fputc('\n',ptr->out); + } + return 0; + } + + +static int html_loop(tview_t* tv) + { + //tv->my_drawaln(tv, tv->curr_tid, tv->left_pos); + return 0; + } + +static int html_underline(tview_t* tv) + { + return (1 << UNDERLINE_FLAG); + } + +/* +static void init_pair(html_tview_t *tv,int id_ge_1, const char* pen, const char* paper) + { + + } +*/ + +tview_t* html_tv_init(const char *fn, const char *fn_fa, const char *samples) + { + char* colstr=getenv("COLUMNS"); + html_tview_t *tv = (html_tview_t*)calloc(1, sizeof(html_tview_t)); + tview_t* base=(tview_t*)tv; + if(tv==0) + { + fprintf(stderr,"Calloc failed\n"); + return 0; + } + tv->row_count=0; + tv->screen=NULL; + tv->out=stdout; + tv->attributes=0; + base_tv_init(base,fn,fn_fa,samples); + /* initialize callbacks */ +#define SET_CALLBACK(fun) base->my_##fun=html_##fun; + SET_CALLBACK(destroy); + SET_CALLBACK(mvprintw); + SET_CALLBACK(mvaddch); + SET_CALLBACK(attron); + SET_CALLBACK(attroff); + SET_CALLBACK(clear); + SET_CALLBACK(colorpair); + SET_CALLBACK(drawaln); + SET_CALLBACK(loop); + SET_CALLBACK(underline); +#undef SET_CALLBACK + + + if(colstr!=0) + { + base->mcol=atoi(colstr); + if(base->mcol<10) base->mcol=80; + } + base->mrow=99999; + +/* + init_pair(tv,1, "blue", "white"); + init_pair(tv,2, "green", "white"); + init_pair(tv,3, "yellow", "white"); + init_pair(tv,4, "white", "white"); + init_pair(tv,5, "green", "white"); + init_pair(tv,6, "cyan", "white"); + init_pair(tv,7, "yellow", "white"); + init_pair(tv,8, "red", "white"); + init_pair(tv,9, "blue", "white"); + */ + return base; + } + + +tview_t* text_tv_init(const char *fn, const char *fn_fa, const char *samples) + { + tview_t* tv=html_tv_init(fn,fn_fa,samples); + tv->my_drawaln=text_drawaln; + return tv; + } + diff --git a/bcftools/call1.c b/bcftools/call1.c index eb58498..18805a0 100644 --- a/bcftools/call1.c +++ b/bcftools/call1.c @@ -328,6 +328,9 @@ int bcfview(int argc, char *argv[]) extern int bcf_trio_call(uint32_t *prep, const bcf1_t *b, int *llr, int64_t *gt); extern int bcf_pair_call(const bcf1_t *b); extern int bcf_min_diff(const bcf1_t *b); + extern int bcf_p1_get_M(bcf_p1aux_t *b); + + extern gzFile bcf_p1_fp_lk; bcf_t *bp, *bout = 0; bcf1_t *b, *blast; @@ -343,10 +346,10 @@ int bcfview(int argc, char *argv[]) memset(&vc, 0, sizeof(viewconf_t)); vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; vc.indel_frac = -1.; vc.n_perm = 0; vc.min_perm_p = 0.01; vc.min_smpl_frac = 0; vc.min_lrt = 1; vc.min_ma_lrt = -1; memset(qcnt, 0, 8 * 256); - while ((c = getopt(argc, argv, "FN1:l:cC:eHAGvbSuP:t:p:QgLi:IMs:D:U:X:d:T:Ywm:")) >= 0) { + while ((c = getopt(argc, argv, "FN1:l:cC:eHAGvbSuP:t:p:QgLi:IMs:D:U:X:d:T:Ywm:K:")) >= 0) { switch (c) { case '1': vc.n1 = atoi(optarg); break; - case 'l': vc.bed = bed_read(optarg); break; + case 'l': vc.bed = bed_read(optarg); if (!vc.bed) fprintf(stderr,"Could not read \"%s\"\n", optarg); return 1; break; case 'D': vc.fn_dict = strdup(optarg); break; case 'F': vc.flag |= VC_FIX_PL; break; case 'N': vc.flag |= VC_ACGT_ONLY; break; @@ -373,6 +376,7 @@ int bcfview(int argc, char *argv[]) case 'C': vc.min_lrt = atof(optarg); break; case 'X': vc.min_perm_p = atof(optarg); break; case 'd': vc.min_smpl_frac = atof(optarg); break; + case 'K': bcf_p1_fp_lk = gzopen(optarg, "w"); break; case 's': vc.subsam = read_samples(optarg, &vc.n_sub); vc.ploidy = calloc(vc.n_sub + 1, 1); for (tid = 0; tid < vc.n_sub; ++tid) vc.ploidy[tid] = vc.subsam[tid][strlen(vc.subsam[tid]) + 1]; @@ -462,7 +466,7 @@ int bcfview(int argc, char *argv[]) vc.sublist = calloc(vc.n_sub, sizeof(int)); hout = bcf_hdr_subsam(hin, vc.n_sub, vc.subsam, vc.sublist); } - if (vc.flag & VC_CALL) write_header(hout); + write_header(hout); // always print the header vcf_hdr_write(bout, hout); } if (vc.flag & VC_CALL) { @@ -496,6 +500,10 @@ int bcfview(int argc, char *argv[]) } } } + if (bcf_p1_fp_lk && p1) { + int32_t M = bcf_p1_get_M(p1); + gzwrite(bcf_p1_fp_lk, &M, 4); + } while (vcf_read(bp, hin, b) > 0) { int is_indel, cons_llr = -1; int64_t cons_gt = -1; @@ -544,7 +552,7 @@ int bcfview(int argc, char *argv[]) int i; for (i = 0; i < 9; ++i) em[i] = -1.; } - if ( !(vc.flag&VC_KEEPALT) && vc.flag&VC_CALL && vc.min_ma_lrt>=0 ) + if ( !(vc.flag&VC_KEEPALT) && (vc.flag&VC_CALL) && vc.min_ma_lrt>=0 ) { bcf_p1_set_ploidy(b, p1); // could be improved: do this per site to allow pseudo-autosomal regions int gts = call_multiallelic_gt(b, p1, vc.min_ma_lrt, vc.flag&VC_VARONLY); @@ -552,7 +560,11 @@ int bcfview(int argc, char *argv[]) } else if (vc.flag & VC_CALL) { // call variants bcf_p1rst_t pr; - int calret = bcf_p1_cal(b, (em[7] >= 0 && em[7] < vc.min_lrt), p1, &pr); + int calret; + gzwrite(bcf_p1_fp_lk, &b->tid, 4); + gzwrite(bcf_p1_fp_lk, &b->pos, 4); + gzwrite(bcf_p1_fp_lk, &em[0], sizeof(double)); + calret = bcf_p1_cal(b, (em[7] >= 0 && em[7] < vc.min_lrt), p1, &pr); if (n_processed % 100000 == 0) { fprintf(stderr, "[%s] %ld sites processed.\n", __func__, (long)n_processed); bcf_p1_dump_afs(p1); @@ -597,6 +609,8 @@ int bcfview(int argc, char *argv[]) } else bcf_fix_gt(b); vcf_write(bout, hout, b); } + + if (bcf_p1_fp_lk) gzclose(bcf_p1_fp_lk); if (vc.prior_file) free(vc.prior_file); if (vc.flag & VC_CALL) bcf_p1_dump_afs(p1); if (hin != hout) bcf_hdr_destroy(hout); diff --git a/bcftools/index.c b/bcftools/index.c index 014856d..a7db24f 100644 --- a/bcftools/index.c +++ b/bcftools/index.c @@ -259,6 +259,7 @@ int bcf_idx_build2(const char *fn, const char *_fnidx) if (fpidx == 0) { fprintf(stderr, "[bcf_idx_build2] fail to create the index file.\n"); free(fnidx); + bcf_idx_destroy(idx); return -1; } bcf_idx_save(idx, fpidx); diff --git a/bcftools/prob1.c b/bcftools/prob1.c index d655722..f04cf08 100644 --- a/bcftools/prob1.c +++ b/bcftools/prob1.c @@ -5,6 +5,7 @@ #include #include #include +#include #include "prob1.h" #include "kstring.h" @@ -15,6 +16,8 @@ KSTREAM_INIT(gzFile, gzread, 16384) #define MC_EM_EPS 1e-5 #define MC_DEF_INDEL 0.15 +gzFile bcf_p1_fp_lk; + unsigned char seq_nt4_table[256] = { 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, @@ -165,6 +168,8 @@ bcf_p1aux_t *bcf_p1_init(int n, uint8_t *ploidy) return ma; } +int bcf_p1_get_M(bcf_p1aux_t *b) { return b->M; } + int bcf_p1_set_n1(bcf_p1aux_t *b, int n1) { if (n1 == 0 || n1 >= b->n) return -1; @@ -751,6 +756,8 @@ static void mc_cal_y_core(bcf_p1aux_t *ma, int beg) } } if (z[0] != ma->z) memcpy(ma->z, z[0], sizeof(double) * (ma->M + 1)); + if (bcf_p1_fp_lk) + gzwrite(bcf_p1_fp_lk, ma->z, sizeof(double) * (ma->M + 1)); } static void mc_cal_y(bcf_p1aux_t *ma) diff --git a/bcftools/vcf.c b/bcftools/vcf.c index bc11084..e8526a3 100644 --- a/bcftools/vcf.c +++ b/bcftools/vcf.c @@ -30,7 +30,12 @@ bcf_hdr_t *vcf_hdr_read(bcf_t *bp) memset(&smpl, 0, sizeof(kstring_t)); while (ks_getuntil(v->ks, '\n', &v->line, &dret) >= 0) { if (v->line.l < 2) continue; - if (v->line.s[0] != '#') return 0; // no sample line + if (v->line.s[0] != '#') { + free(meta.s); + free(smpl.s); + free(h); + return 0; // no sample line + } if (v->line.s[0] == '#' && v->line.s[1] == '#') { kputsn(v->line.s, v->line.l, &meta); kputc('\n', &meta); } else if (v->line.s[0] == '#') { diff --git a/ksort.h b/ksort.h index fa850ab..aa0bb93 100644 --- a/ksort.h +++ b/ksort.h @@ -26,6 +26,10 @@ /* Contact: Heng Li */ /* + 2012-12-11 (0.1.4): + + * Defined __ks_insertsort_##name as static to compile with C99. + 2008-11-16 (0.1.4): * Fixed a bug in introsort() that happens in rare cases. @@ -141,7 +145,7 @@ typedef struct { tmp = *l; *l = l[i]; l[i] = tmp; ks_heapadjust_##name(0, i, l); \ } \ } \ - inline void __ks_insertsort_##name(type_t *s, type_t *t) \ + static inline void __ks_insertsort_##name(type_t *s, type_t *t) \ { \ type_t *i, *j, swap_tmp; \ for (i = s + 1; i < t; ++i) \ diff --git a/misc/Makefile b/misc/Makefile index e1a5add..d36e7ac 100644 --- a/misc/Makefile +++ b/misc/Makefile @@ -28,7 +28,7 @@ lib-recur all-recur clean-recur cleanlocal-recur install-recur: lib: bamcheck:bamcheck.o - $(CC) $(CFLAGS) -o $@ bamcheck.o -lm -lz -L.. -lbam -lpthread + $(CC) $(CFLAGS) -o $@ bamcheck.o -L.. -lm -lbam -lpthread -lz bamcheck.o:bamcheck.c ../faidx.h ../khash.h ../sam.h ../razf.h $(CC) $(CFLAGS) -c -I.. -o $@ bamcheck.c diff --git a/misc/bamcheck.c b/misc/bamcheck.c index 532d105..352db21 100644 --- a/misc/bamcheck.c +++ b/misc/bamcheck.c @@ -116,15 +116,18 @@ typedef struct uint64_t total_len_dup; uint64_t nreads_1st; uint64_t nreads_2nd; + uint64_t nreads_filtered; uint64_t nreads_dup; uint64_t nreads_unmapped; uint64_t nreads_unpaired; uint64_t nreads_paired; + uint64_t nreads_anomalous; uint64_t nreads_mq0; uint64_t nbases_mapped; uint64_t nbases_mapped_cigar; uint64_t nbases_trimmed; // bwa trimmed bases uint64_t nmismatches; + uint64_t nreads_QCfailed, nreads_secondary; // GC-depth related data uint32_t ngcd, igcd; // The maximum number of GC depth bins and index of the current bin @@ -395,7 +398,7 @@ void count_mismatches_per_cycle(stats_t *stats,bam1_t *bam_line) { uint8_t qual = quals[iread] + 1; if ( qual>=stats->nquals ) - error("TODO: quality too high %d>=%d\n", quals[iread],stats->nquals); + error("TODO: quality too high %d>=%d (%s %d %s)\n", qual,stats->nquals, stats->sam->header->target_name[bam_line->core.tid],bam_line->core.pos+1,bam1_qname(bam_line)); int idx = is_fwd ? icycle : read_len-icycle-1; if ( idx>stats->max_len ) @@ -515,9 +518,9 @@ void realloc_gcd_buffer(stats_t *stats, int seq_len) int n = 1 + stats->gcd_ref_size / (stats->gcd_bin_size - seq_len); if ( n <= stats->igcd ) - error("Uh: n=%d igcd=%d\n", n,stats->igcd ); + error("The --GC-depth bin size is too small or reference genome too big; please decrease the bin size or increase the reference length\n"); - if ( n >= stats->ngcd ) + if ( n > stats->ngcd ) { stats->gcd = realloc(stats->gcd, n*sizeof(gc_depth_t)); if ( !stats->gcd ) @@ -617,16 +620,26 @@ void collect_stats(bam1_t *bam_line, stats_t *stats) if ( k == kh_end(stats->rg_hash) ) return; } if ( stats->flag_require && (bam_line->core.flag & stats->flag_require)!=stats->flag_require ) + { + stats->nreads_filtered++; return; + } if ( stats->flag_filter && (bam_line->core.flag & stats->flag_filter) ) + { + stats->nreads_filtered++; return; - + } if ( !is_in_regions(bam_line,stats) ) return; + if ( stats->filter_readlen!=-1 && bam_line->core.l_qseq!=stats->filter_readlen ) + return; + + if ( bam_line->core.flag & BAM_FQCFAIL ) stats->nreads_QCfailed++; + if ( bam_line->core.flag & BAM_FSECONDARY ) stats->nreads_secondary++; int seq_len = bam_line->core.l_qseq; if ( !seq_len ) return; - if ( stats->filter_readlen!=-1 && seq_len!=stats->filter_readlen ) return; + if ( seq_len >= stats->nbases ) realloc_buffers(stats,seq_len); if ( stats->max_len=stats->nquals ) - error("TODO: quality too high %d>=%d\n", quals[i],stats->nquals); + error("TODO: quality too high %d>=%d (%s %d %s)\n", qual,stats->nquals,stats->sam->header->target_name[bam_line->core.tid],bam_line->core.pos+1,bam1_qname(bam_line)); if ( qual>stats->max_qual ) stats->max_qual = qual; @@ -702,42 +715,48 @@ void collect_stats(bam1_t *bam_line, stats_t *stats) count_indels(stats,bam_line); - // The insert size is tricky, because for long inserts the libraries are - // prepared differently and the pairs point in other direction. BWA does - // not set the paired flag for them. Similar thing is true also for 454 - // reads. Therefore, do the insert size stats for all mapped reads. - int32_t isize = bam_line->core.isize; - if ( isize<0 ) isize = -isize; - if ( IS_PAIRED(bam_line) && isize!=0 ) + if ( !IS_PAIRED(bam_line) ) + stats->nreads_unpaired++; + else { stats->nreads_paired++; - if ( isize >= stats->nisize ) - isize=stats->nisize-1; - int pos_fst = bam_line->core.mpos - bam_line->core.pos; - int is_fst = IS_READ1(bam_line) ? 1 : -1; - int is_fwd = IS_REVERSE(bam_line) ? -1 : 1; - int is_mfwd = IS_MATE_REVERSE(bam_line) ? -1 : 1; + if ( bam_line->core.tid!=bam_line->core.mtid ) + stats->nreads_anomalous++; - if ( is_fwd*is_mfwd>0 ) - stats->isize_other[isize]++; - else if ( is_fst*pos_fst>0 ) - { - if ( is_fst*is_fwd>0 ) - stats->isize_inward[isize]++; - else - stats->isize_outward[isize]++; - } - else if ( is_fst*pos_fst<0 ) + // The insert size is tricky, because for long inserts the libraries are + // prepared differently and the pairs point in other direction. BWA does + // not set the paired flag for them. Similar thing is true also for 454 + // reads. Mates mapped to different chromosomes have isize==0. + int32_t isize = bam_line->core.isize; + if ( isize<0 ) isize = -isize; + if ( isize >= stats->nisize ) + isize = stats->nisize-1; + if ( isize>0 || bam_line->core.tid==bam_line->core.mtid ) { - if ( is_fst*is_fwd>0 ) - stats->isize_outward[isize]++; - else - stats->isize_inward[isize]++; + int pos_fst = bam_line->core.mpos - bam_line->core.pos; + int is_fst = IS_READ1(bam_line) ? 1 : -1; + int is_fwd = IS_REVERSE(bam_line) ? -1 : 1; + int is_mfwd = IS_MATE_REVERSE(bam_line) ? -1 : 1; + + if ( is_fwd*is_mfwd>0 ) + stats->isize_other[isize]++; + else if ( is_fst*pos_fst>0 ) + { + if ( is_fst*is_fwd>0 ) + stats->isize_inward[isize]++; + else + stats->isize_outward[isize]++; + } + else if ( is_fst*pos_fst<0 ) + { + if ( is_fst*is_fwd>0 ) + stats->isize_outward[isize]++; + else + stats->isize_inward[isize]++; + } } } - else - stats->nreads_unpaired++; // Number of mismatches uint8_t *nm = bam_aux_get(bam_line,"NM"); @@ -889,7 +908,7 @@ void output_stats(stats_t *stats) // Calculate average insert size and standard deviation (from the main bulk data only) int isize, ibulk=0; uint64_t nisize=0, nisize_inward=0, nisize_outward=0, nisize_other=0; - for (isize=1; isizenisize; isize++) + for (isize=0; isizenisize; isize++) { // Each pair was counted twice stats->isize_inward[isize] *= 0.5; @@ -903,7 +922,7 @@ void output_stats(stats_t *stats) } double bulk=0, avg_isize=0, sd_isize=0; - for (isize=1; isizenisize; isize++) + for (isize=0; isizenisize; isize++) { bulk += stats->isize_inward[isize] + stats->isize_outward[isize] + stats->isize_other[isize]; avg_isize += isize * (stats->isize_inward[isize] + stats->isize_outward[isize] + stats->isize_other[isize]); @@ -928,6 +947,8 @@ void output_stats(stats_t *stats) printf(" %s",stats->argv[i]); printf("\n"); printf("# Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part.\n"); + printf("SN\traw total sequences:\t%ld\n", (long)(stats->nreads_filtered+stats->nreads_1st+stats->nreads_2nd)); + printf("SN\tfiltered sequences:\t%ld\n", (long)stats->nreads_filtered); printf("SN\tsequences:\t%ld\n", (long)(stats->nreads_1st+stats->nreads_2nd)); printf("SN\tis paired:\t%d\n", stats->nreads_1st&&stats->nreads_2nd ? 1 : 0); printf("SN\tis sorted:\t%d\n", stats->is_sorted ? 1 : 0); @@ -939,6 +960,8 @@ void output_stats(stats_t *stats) printf("SN\treads paired:\t%ld\n", (long)stats->nreads_paired); printf("SN\treads duplicated:\t%ld\n", (long)stats->nreads_dup); printf("SN\treads MQ0:\t%ld\n", (long)stats->nreads_mq0); + printf("SN\treads QC failed:\t%ld\n", (long)stats->nreads_QCfailed); + printf("SN\tnon-primary alignments:\t%ld\n", (long)stats->nreads_secondary); printf("SN\ttotal length:\t%ld\n", (long)stats->total_len); printf("SN\tbases mapped:\t%ld\n", (long)stats->nbases_mapped); printf("SN\tbases mapped (cigar):\t%ld\n", (long)stats->nbases_mapped_cigar); @@ -955,6 +978,7 @@ void output_stats(stats_t *stats) printf("SN\tinward oriented pairs:\t%ld\n", (long)nisize_inward); printf("SN\toutward oriented pairs:\t%ld\n", (long)nisize_outward); printf("SN\tpairs with other orientation:\t%ld\n", (long)nisize_other); + printf("SN\tpairs on different chromosomes:\t%ld\n", (long)stats->nreads_anomalous/2); int ibase,iqual; if ( stats->max_lennbases ) stats->max_len++; @@ -1021,7 +1045,7 @@ void output_stats(stats_t *stats) printf("GCC\t%d\t%.2f\t%.2f\t%.2f\t%.2f\n", ibase,100.*ptr[0]/sum,100.*ptr[1]/sum,100.*ptr[2]/sum,100.*ptr[3]/sum); } printf("# Insert sizes. Use `grep ^IS | cut -f 2-` to extract this part. The columns are: pairs total, inward oriented pairs, outward oriented pairs, other pairs\n"); - for (isize=1; isizeisize_inward[isize]+stats->isize_outward[isize]+stats->isize_other[isize]), (long)stats->isize_inward[isize], (long)stats->isize_outward[isize], (long)stats->isize_other[isize]); @@ -1050,12 +1074,14 @@ void output_stats(stats_t *stats) } printf("# Coverage distribution. Use `grep ^COV | cut -f 2-` to extract this part.\n"); - printf("COV\t[<%d]\t%d\t%ld\n",stats->cov_min,stats->cov_min-1, (long)stats->cov[0]); + if ( stats->cov[0] ) + printf("COV\t[<%d]\t%d\t%ld\n",stats->cov_min,stats->cov_min-1, (long)stats->cov[0]); int icov; for (icov=1; icovncov-1; icov++) - printf("COV\t[%d-%d]\t%d\t%ld\n",stats->cov_min + (icov-1)*stats->cov_step, stats->cov_min + icov*stats->cov_step-1,stats->cov_min + icov*stats->cov_step-1, (long)stats->cov[icov]); - printf("COV\t[%d<]\t%d\t%ld\n",stats->cov_min + (stats->ncov-2)*stats->cov_step-1,stats->cov_min + (stats->ncov-2)*stats->cov_step-1, (long)stats->cov[stats->ncov-1]); - + if ( stats->cov[icov] ) + printf("COV\t[%d-%d]\t%d\t%ld\n",stats->cov_min + (icov-1)*stats->cov_step, stats->cov_min + icov*stats->cov_step-1,stats->cov_min + icov*stats->cov_step-1, (long)stats->cov[icov]); + if ( stats->cov[stats->ncov-1] ) + printf("COV\t[%d<]\t%d\t%ld\n",stats->cov_min + (stats->ncov-2)*stats->cov_step-1,stats->cov_min + (stats->ncov-2)*stats->cov_step-1, (long)stats->cov[stats->ncov-1]); // Calculate average GC content, then sort by GC and depth printf("# GC-depth. Use `grep ^GCD | cut -f 2-` to extract this part. The columns are: GC%%, unique sequence percentiles, 10th, 25th, 50th, 75th and 90th depth percentile\n"); @@ -1279,7 +1305,7 @@ void error(const char *format, ...) printf(" -d, --remove-dups Exlude from statistics reads marked as duplicates\n"); printf(" -f, --required-flag Required flag, 0 for unset [0]\n"); printf(" -F, --filtering-flag Filtering flag, 0 for unset [0]\n"); - printf(" --GC-depth Bin size for GC-depth graph and the maximum reference length [2e4,6e9]\n"); + printf(" --GC-depth Bin size for GC-depth graph and the maximum reference length [2e4,4.2e9]\n"); printf(" -h, --help This help message\n"); printf(" -i, --insert-size Maximum insert size [8000]\n"); printf(" -I, --id Include only listed read group or sample name\n"); @@ -1311,16 +1337,17 @@ int main(int argc, char *argv[]) stats_t *stats = calloc(1,sizeof(stats_t)); stats->ngc = 200; - stats->nquals = 95; + stats->nquals = 256; stats->nbases = 300; stats->nisize = 8000; stats->max_len = 30; stats->max_qual = 40; stats->isize_main_bulk = 0.99; // There are always outliers at the far end stats->gcd_bin_size = 20e3; - stats->gcd_ref_size = 3e9; + stats->gcd_ref_size = 4.2e9; stats->rseq_pos = -1; - stats->tid = stats->gcd_pos = stats->igcd = -1; + stats->tid = stats->gcd_pos = -1; + stats->igcd = 0; stats->is_sorted = 1; stats->cov_min = 1; stats->cov_max = 1000; diff --git a/sam_header.c b/sam_header.c index a1b5181..ddc2c38 100644 --- a/sam_header.c +++ b/sam_header.c @@ -366,6 +366,7 @@ static HeaderLine *sam_header_line_parse(const char *headerLine) while (*to && *to=='\t') to++; if ( to-from != 1 ) { debug("[sam_header_line_parse] multiple tabs on line [%s] (%d)\n", headerLine,(int)(to-from)); + free(hline); return 0; } from = to; diff --git a/samtools.1 b/samtools.1 index 4b3f75a..869feaa 100644 --- a/samtools.1 +++ b/samtools.1 @@ -353,7 +353,7 @@ which enables fast BAM concatenation. .TP .B sort -samtools sort [-no] [-m maxMem] +samtools sort [-nof] [-m maxMem] Sort alignments by leftmost coordinates. File .I .bam @@ -371,6 +371,13 @@ Output the final alignment to the standard output. .B -n Sort by read names rather than by chromosomal coordinates .TP +.B -f +Use +.I +as the full output path and do not append +.I .bam +suffix. +.TP .BI -m \ INT Approximately the maximum required memory. [500000000] .RE