From a2ef036000d869f35ec58e66e5f2a748cdc2d128 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Thu, 29 Jan 2009 12:33:23 +0000 Subject: [PATCH] * samtools-0.1.2-1 * added flagstat command --- Makefile | 2 +- bam_stat.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ bamtk.c | 5 +++- 3 files changed, 78 insertions(+), 2 deletions(-) create mode 100644 bam_stat.c diff --git a/Makefile b/Makefile index aff7eaa..69ed702 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ CXXFLAGS= $(CFLAGS) DFLAGS= -D_IOLIB=2 -D_FILE_OFFSET_BITS=64 #-D_NO_RAZF #-D_NO_CURSES OBJS= bam.o bam_import.o bam_pileup.o bam_lpileup.o bam_sort.o bam_index.o \ razf.o bgzf.o faidx.o bam_tview.o bam_maqcns.o bam_aux.o bam_plcmd.o \ - bam_mate.o bam_rmdup.o glf.o + bam_mate.o bam_rmdup.o glf.o bam_stat.o PROG= razip bgzip samtools INCLUDES= LIBS= -lm -lz diff --git a/bam_stat.c b/bam_stat.c new file mode 100644 index 0000000..3f58686 --- /dev/null +++ b/bam_stat.c @@ -0,0 +1,73 @@ +#include "bam.h" + +typedef struct { + long long n_reads, n_mapped, n_pair_all, n_pair_map, n_pair_good; + long long n_sgltn, n_read1, n_read2; + long long n_qcfail, n_dup; + long long n_diffchr, n_diffhigh; +} bam_flagstat_t; + +#define flagstat_loop(s, c) do { \ + ++(s)->n_reads; \ + if ((c)->flag & BAM_FPAIRED) { \ + ++(s)->n_pair_all; \ + if ((c)->flag & BAM_FPROPER_PAIR) ++(s)->n_pair_good; \ + if ((c)->flag & BAM_FREAD1) ++(s)->n_read1; \ + if ((c)->flag & BAM_FREAD2) ++(s)->n_read2; \ + if ((c)->flag & BAM_FMUNMAP) ++(s)->n_sgltn; \ + if (!((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FMUNMAP)) { \ + ++(s)->n_pair_map; \ + if ((c)->mtid != (c)->tid) { \ + ++(s)->n_diffchr; \ + if ((c)->qual >= 5) ++(s)->n_diffhigh; \ + } \ + } \ + } \ + if (!((c)->flag & BAM_FUNMAP)) ++(s)->n_mapped; \ + if ((c)->flag & BAM_FQCFAIL) ++(s)->n_qcfail; \ + if ((c)->flag & BAM_FDUP) ++(s)->n_dup; \ + } while (0) + +bam_flagstat_t *bam_flagstat_core(bamFile fp) +{ + bam_flagstat_t *s; + bam1_t *b; + bam1_core_t *c; + s = (bam_flagstat_t*)calloc(1, sizeof(bam_flagstat_t)); + b = bam_init1(); + c = &b->core; + while (bam_read1(fp, b) >= 0) { + flagstat_loop(s, c); + } + bam_destroy1(b); + return s; +} +int bam_flagstat(int argc, char *argv[]) +{ + bamFile fp; + bam_header_t *header; + bam_flagstat_t *s; + if (argc == optind) { + fprintf(stderr, "Usage: samtools flagstat \n"); + return 1; + } + fp = strcmp(argv[optind], "-")? bam_open(argv[optind], "r") : bam_dopen(fileno(stdin), "r"); + assert(fp); + header = bam_header_read(fp); + s = bam_flagstat_core(fp); + printf("%lld in total\n", s->n_reads); + printf("%lld QC failure\n", s->n_qcfail); + printf("%lld duplicates\n", s->n_dup); + printf("%lld mapped\n", s->n_mapped); + printf("%lld paired in sequencing\n", s->n_pair_all); + printf("%lld read1\n", s->n_read1); + printf("%lld read2\n", s->n_read2); + printf("%lld properly paired\n", s->n_pair_good); + printf("%lld with both itself and its mate mapped\n", s->n_pair_map); + printf("%lld singletons\n", s->n_sgltn); + printf("%lld with its mate mapped to a different chr\n", s->n_diffchr); + free(s); + bam_header_destroy(header); + bam_close(fp); + return 0; +} diff --git a/bamtk.c b/bamtk.c index e162f07..0e73016 100644 --- a/bamtk.c +++ b/bamtk.c @@ -3,7 +3,7 @@ #include "bam.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.2" +#define PACKAGE_VERSION "0.1.2-1" #endif int bam_taf2baf(int argc, char *argv[]); @@ -14,6 +14,7 @@ int bam_sort(int argc, char *argv[]); int bam_tview_main(int argc, char *argv[]); int bam_mating(int argc, char *argv[]); int bam_rmdup(int argc, char *argv[]); +int bam_flagstat(int argc, char *argv[]); int faidx_main(int argc, char *argv[]); int glf_view_main(int argc, char *argv[]); @@ -94,6 +95,7 @@ static int usage() fprintf(stderr, " fixmate fix mate information\n"); fprintf(stderr, " rmdup remove PCR duplicates\n"); fprintf(stderr, " glfview print GLFv2 file\n"); + fprintf(stderr, " flagstat simple stats\n"); fprintf(stderr, "\n"); return 1; } @@ -111,6 +113,7 @@ int main(int argc, char *argv[]) else if (strcmp(argv[1], "fixmate") == 0) return bam_mating(argc-1, argv+1); else if (strcmp(argv[1], "rmdup") == 0) return bam_rmdup(argc-1, argv+1); else if (strcmp(argv[1], "glfview") == 0) return glf_view_main(argc-1, argv+1); + else if (strcmp(argv[1], "flagstat") == 0) return bam_flagstat(argc-1, argv+1); #ifndef _NO_CURSES else if (strcmp(argv[1], "tview") == 0) return bam_tview_main(argc-1, argv+1); #endif -- 2.39.2