]> git.donarmstrong.com Git - samtools.git/blobdiff - bam_stat.c
* samtools-0.1.2-1
[samtools.git] / bam_stat.c
diff --git a/bam_stat.c b/bam_stat.c
new file mode 100644 (file)
index 0000000..3f58686
--- /dev/null
@@ -0,0 +1,73 @@
+#include "bam.h"
+
+typedef struct {
+       long long n_reads, n_mapped, n_pair_all, n_pair_map, n_pair_good;
+       long long n_sgltn, n_read1, n_read2;
+       long long n_qcfail, n_dup;
+       long long n_diffchr, n_diffhigh;
+} bam_flagstat_t;
+
+#define flagstat_loop(s, c) do {                                                                               \
+               ++(s)->n_reads;                                                                                                 \
+               if ((c)->flag & BAM_FPAIRED) {                                                                  \
+                       ++(s)->n_pair_all;                                                                                      \
+                       if ((c)->flag & BAM_FPROPER_PAIR) ++(s)->n_pair_good;           \
+                       if ((c)->flag & BAM_FREAD1) ++(s)->n_read1;                                     \
+                       if ((c)->flag & BAM_FREAD2) ++(s)->n_read2;                                     \
+                       if ((c)->flag & BAM_FMUNMAP) ++(s)->n_sgltn;                            \
+                       if (!((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FMUNMAP)) { \
+                               ++(s)->n_pair_map;                                                                              \
+                               if ((c)->mtid != (c)->tid) {                                                    \
+                                       ++(s)->n_diffchr;                                                                       \
+                                       if ((c)->qual >= 5) ++(s)->n_diffhigh;                          \
+                               }                                                                                                               \
+                       }                                                                                                                       \
+               }                                                                                                                               \
+               if (!((c)->flag & BAM_FUNMAP)) ++(s)->n_mapped;                                 \
+               if ((c)->flag & BAM_FQCFAIL) ++(s)->n_qcfail;                                   \
+               if ((c)->flag & BAM_FDUP) ++(s)->n_dup;                                                 \
+       } while (0)
+
+bam_flagstat_t *bam_flagstat_core(bamFile fp)
+{
+       bam_flagstat_t *s;
+       bam1_t *b;
+       bam1_core_t *c;
+       s = (bam_flagstat_t*)calloc(1, sizeof(bam_flagstat_t));
+       b = bam_init1();
+       c = &b->core;
+       while (bam_read1(fp, b) >= 0) {
+               flagstat_loop(s, c);
+       }
+       bam_destroy1(b);
+       return s;
+}
+int bam_flagstat(int argc, char *argv[])
+{
+       bamFile fp;
+       bam_header_t *header;
+       bam_flagstat_t *s;
+       if (argc == optind) {
+               fprintf(stderr, "Usage: samtools flagstat <in.bam>\n");
+               return 1;
+       }
+       fp = strcmp(argv[optind], "-")? bam_open(argv[optind], "r") : bam_dopen(fileno(stdin), "r");
+       assert(fp);
+       header = bam_header_read(fp);
+       s = bam_flagstat_core(fp);
+       printf("%lld in total\n", s->n_reads);
+       printf("%lld QC failure\n", s->n_qcfail);
+       printf("%lld duplicates\n", s->n_dup);
+       printf("%lld mapped\n", s->n_mapped);
+       printf("%lld paired in sequencing\n", s->n_pair_all);
+       printf("%lld read1\n", s->n_read1);
+       printf("%lld read2\n", s->n_read2);
+       printf("%lld properly paired\n", s->n_pair_good);
+       printf("%lld with both itself and its mate mapped\n", s->n_pair_map);
+       printf("%lld singletons\n", s->n_sgltn);
+       printf("%lld with its mate mapped to a different chr\n", s->n_diffchr);
+       free(s);
+       bam_header_destroy(header);
+       bam_close(fp);
+       return 0;
+}