]> git.donarmstrong.com Git - samtools.git/commitdiff
* samtools-0.1.2-1
authorHeng Li <lh3@live.co.uk>
Thu, 29 Jan 2009 12:33:23 +0000 (12:33 +0000)
committerHeng Li <lh3@live.co.uk>
Thu, 29 Jan 2009 12:33:23 +0000 (12:33 +0000)
 * added flagstat command

Makefile
bam_stat.c [new file with mode: 0644]
bamtk.c

index aff7eaa0b2991157d336ce748638060cf26f5882..69ed702331f825cd17c6f8530de2a232d1abcf26 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ CXXFLAGS=       $(CFLAGS)
 DFLAGS=                -D_IOLIB=2 -D_FILE_OFFSET_BITS=64 #-D_NO_RAZF #-D_NO_CURSES
 OBJS=          bam.o bam_import.o bam_pileup.o bam_lpileup.o bam_sort.o bam_index.o \
                        razf.o bgzf.o faidx.o bam_tview.o bam_maqcns.o bam_aux.o bam_plcmd.o \
-                       bam_mate.o bam_rmdup.o glf.o
+                       bam_mate.o bam_rmdup.o glf.o bam_stat.o
 PROG=          razip bgzip samtools
 INCLUDES=      
 LIBS=          -lm -lz
diff --git a/bam_stat.c b/bam_stat.c
new file mode 100644 (file)
index 0000000..3f58686
--- /dev/null
@@ -0,0 +1,73 @@
+#include "bam.h"
+
+typedef struct {
+       long long n_reads, n_mapped, n_pair_all, n_pair_map, n_pair_good;
+       long long n_sgltn, n_read1, n_read2;
+       long long n_qcfail, n_dup;
+       long long n_diffchr, n_diffhigh;
+} bam_flagstat_t;
+
+#define flagstat_loop(s, c) do {                                                                               \
+               ++(s)->n_reads;                                                                                                 \
+               if ((c)->flag & BAM_FPAIRED) {                                                                  \
+                       ++(s)->n_pair_all;                                                                                      \
+                       if ((c)->flag & BAM_FPROPER_PAIR) ++(s)->n_pair_good;           \
+                       if ((c)->flag & BAM_FREAD1) ++(s)->n_read1;                                     \
+                       if ((c)->flag & BAM_FREAD2) ++(s)->n_read2;                                     \
+                       if ((c)->flag & BAM_FMUNMAP) ++(s)->n_sgltn;                            \
+                       if (!((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FMUNMAP)) { \
+                               ++(s)->n_pair_map;                                                                              \
+                               if ((c)->mtid != (c)->tid) {                                                    \
+                                       ++(s)->n_diffchr;                                                                       \
+                                       if ((c)->qual >= 5) ++(s)->n_diffhigh;                          \
+                               }                                                                                                               \
+                       }                                                                                                                       \
+               }                                                                                                                               \
+               if (!((c)->flag & BAM_FUNMAP)) ++(s)->n_mapped;                                 \
+               if ((c)->flag & BAM_FQCFAIL) ++(s)->n_qcfail;                                   \
+               if ((c)->flag & BAM_FDUP) ++(s)->n_dup;                                                 \
+       } while (0)
+
+bam_flagstat_t *bam_flagstat_core(bamFile fp)
+{
+       bam_flagstat_t *s;
+       bam1_t *b;
+       bam1_core_t *c;
+       s = (bam_flagstat_t*)calloc(1, sizeof(bam_flagstat_t));
+       b = bam_init1();
+       c = &b->core;
+       while (bam_read1(fp, b) >= 0) {
+               flagstat_loop(s, c);
+       }
+       bam_destroy1(b);
+       return s;
+}
+int bam_flagstat(int argc, char *argv[])
+{
+       bamFile fp;
+       bam_header_t *header;
+       bam_flagstat_t *s;
+       if (argc == optind) {
+               fprintf(stderr, "Usage: samtools flagstat <in.bam>\n");
+               return 1;
+       }
+       fp = strcmp(argv[optind], "-")? bam_open(argv[optind], "r") : bam_dopen(fileno(stdin), "r");
+       assert(fp);
+       header = bam_header_read(fp);
+       s = bam_flagstat_core(fp);
+       printf("%lld in total\n", s->n_reads);
+       printf("%lld QC failure\n", s->n_qcfail);
+       printf("%lld duplicates\n", s->n_dup);
+       printf("%lld mapped\n", s->n_mapped);
+       printf("%lld paired in sequencing\n", s->n_pair_all);
+       printf("%lld read1\n", s->n_read1);
+       printf("%lld read2\n", s->n_read2);
+       printf("%lld properly paired\n", s->n_pair_good);
+       printf("%lld with both itself and its mate mapped\n", s->n_pair_map);
+       printf("%lld singletons\n", s->n_sgltn);
+       printf("%lld with its mate mapped to a different chr\n", s->n_diffchr);
+       free(s);
+       bam_header_destroy(header);
+       bam_close(fp);
+       return 0;
+}
diff --git a/bamtk.c b/bamtk.c
index e162f076addfaaee2b653c7e2ca88ef387ce1e96..0e7301649e3f910d5b2d7b70b4da81c2a0d81691 100644 (file)
--- a/bamtk.c
+++ b/bamtk.c
@@ -3,7 +3,7 @@
 #include "bam.h"
 
 #ifndef PACKAGE_VERSION
-#define PACKAGE_VERSION "0.1.2"
+#define PACKAGE_VERSION "0.1.2-1"
 #endif
 
 int bam_taf2baf(int argc, char *argv[]);
@@ -14,6 +14,7 @@ int bam_sort(int argc, char *argv[]);
 int bam_tview_main(int argc, char *argv[]);
 int bam_mating(int argc, char *argv[]);
 int bam_rmdup(int argc, char *argv[]);
+int bam_flagstat(int argc, char *argv[]);
 
 int faidx_main(int argc, char *argv[]);
 int glf_view_main(int argc, char *argv[]);
@@ -94,6 +95,7 @@ static int usage()
        fprintf(stderr, "         fixmate     fix mate information\n");
        fprintf(stderr, "         rmdup       remove PCR duplicates\n");
        fprintf(stderr, "         glfview     print GLFv2 file\n");
+       fprintf(stderr, "         flagstat    simple stats\n");
        fprintf(stderr, "\n");
        return 1;
 }
@@ -111,6 +113,7 @@ int main(int argc, char *argv[])
        else if (strcmp(argv[1], "fixmate") == 0) return bam_mating(argc-1, argv+1);
        else if (strcmp(argv[1], "rmdup") == 0) return bam_rmdup(argc-1, argv+1);
        else if (strcmp(argv[1], "glfview") == 0) return glf_view_main(argc-1, argv+1);
+       else if (strcmp(argv[1], "flagstat") == 0) return bam_flagstat(argc-1, argv+1);
 #ifndef _NO_CURSES
        else if (strcmp(argv[1], "tview") == 0) return bam_tview_main(argc-1, argv+1);
 #endif