]> git.donarmstrong.com Git - samtools.git/blobdiff - misc/bamcheck.c
Merge pull request #44 from peterjc/check_bins
[samtools.git] / misc / bamcheck.c
index 5ab85b76495ec12f28900a5906f039141ecd6bac..b64c1690392bdf0bb1cca604c422f77df4fca9ca 100644 (file)
@@ -116,6 +116,7 @@ typedef struct
     uint64_t total_len_dup;
     uint64_t nreads_1st;
     uint64_t nreads_2nd;
+    uint64_t nreads_filtered;
     uint64_t nreads_dup;
     uint64_t nreads_unmapped;
     uint64_t nreads_unpaired;
@@ -619,9 +620,15 @@ void collect_stats(bam1_t *bam_line, stats_t *stats)
         if ( k == kh_end(stats->rg_hash) ) return;
     }
     if ( stats->flag_require && (bam_line->core.flag & stats->flag_require)!=stats->flag_require )
+    {
+        stats->nreads_filtered++;
         return;
+    }
     if ( stats->flag_filter && (bam_line->core.flag & stats->flag_filter) )
+    {
+        stats->nreads_filtered++;
         return;
+    }
     if ( !is_in_regions(bam_line,stats) )
         return;
     if ( stats->filter_readlen!=-1 && bam_line->core.l_qseq!=stats->filter_readlen ) 
@@ -940,6 +947,8 @@ void output_stats(stats_t *stats)
         printf(" %s",stats->argv[i]);
     printf("\n");
     printf("# Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part.\n");
+    printf("SN\traw total sequences:\t%ld\n", (long)(stats->nreads_filtered+stats->nreads_1st+stats->nreads_2nd));
+    printf("SN\tfiltered sequences:\t%ld\n", (long)stats->nreads_filtered);
     printf("SN\tsequences:\t%ld\n", (long)(stats->nreads_1st+stats->nreads_2nd));
     printf("SN\tis paired:\t%d\n", stats->nreads_1st&&stats->nreads_2nd ? 1 : 0);
     printf("SN\tis sorted:\t%d\n", stats->is_sorted ? 1 : 0);
@@ -954,8 +963,8 @@ void output_stats(stats_t *stats)
     printf("SN\treads QC failed:\t%ld\n", (long)stats->nreads_QCfailed);
     printf("SN\tnon-primary alignments:\t%ld\n", (long)stats->nreads_secondary);
     printf("SN\ttotal length:\t%ld\n", (long)stats->total_len);
-    printf("SN\tbases mapped:\t%ld\n", (long)stats->nbases_mapped);
-    printf("SN\tbases mapped (cigar):\t%ld\n", (long)stats->nbases_mapped_cigar);
+    printf("SN\tbases mapped:\t%ld\n", (long)stats->nbases_mapped);                 // the length of the whole read goes here, including soft-clips etc.
+    printf("SN\tbases mapped (cigar):\t%ld\n", (long)stats->nbases_mapped_cigar);   // only matched and inserted bases are counted here
     printf("SN\tbases trimmed:\t%ld\n", (long)stats->nbases_trimmed);
     printf("SN\tbases duplicated:\t%ld\n", (long)stats->total_len_dup);
     printf("SN\tmismatches:\t%ld\n", (long)stats->nmismatches);