]> git.donarmstrong.com Git - samtools.git/blobdiff - misc/bamcheck.c
Merge branch 'vsbuffalo-master' into develop
[samtools.git] / misc / bamcheck.c
index d9d54c5856e5ba6832486c81bf460dfdf19574d2..5ab85b76495ec12f28900a5906f039141ecd6bac 100644 (file)
@@ -120,11 +120,13 @@ typedef struct
     uint64_t nreads_unmapped;
     uint64_t nreads_unpaired;
     uint64_t nreads_paired;
+    uint64_t nreads_anomalous;
     uint64_t nreads_mq0;
     uint64_t nbases_mapped;
     uint64_t nbases_mapped_cigar;
     uint64_t nbases_trimmed;  // bwa trimmed bases
     uint64_t nmismatches;
+    uint64_t nreads_QCfailed, nreads_secondary;
 
     // GC-depth related data
     uint32_t ngcd, igcd;        // The maximum number of GC depth bins and index of the current bin
@@ -620,13 +622,17 @@ void collect_stats(bam1_t *bam_line, stats_t *stats)
         return;
     if ( stats->flag_filter && (bam_line->core.flag & stats->flag_filter) )
         return;
-
     if ( !is_in_regions(bam_line,stats) )
         return;
+    if ( stats->filter_readlen!=-1 && bam_line->core.l_qseq!=stats->filter_readlen ) 
+        return;
+
+    if ( bam_line->core.flag & BAM_FQCFAIL ) stats->nreads_QCfailed++;
+    if ( bam_line->core.flag & BAM_FSECONDARY ) stats->nreads_secondary++;
 
     int seq_len = bam_line->core.l_qseq;
     if ( !seq_len ) return;
-    if ( stats->filter_readlen!=-1 && seq_len!=stats->filter_readlen ) return;
+
     if ( seq_len >= stats->nbases )
         realloc_buffers(stats,seq_len);
     if ( stats->max_len<seq_len )
@@ -702,42 +708,48 @@ void collect_stats(bam1_t *bam_line, stats_t *stats)
 
         count_indels(stats,bam_line);
 
-        // The insert size is tricky, because for long inserts the libraries are
-        // prepared differently and the pairs point in other direction. BWA does
-        // not set the paired flag for them.  Similar thing is true also for 454
-        // reads. Therefore, do the insert size stats for all mapped reads.
-        int32_t isize = bam_line->core.isize;
-        if ( isize<0 ) isize = -isize;
-        if ( IS_PAIRED(bam_line) )
+        if ( !IS_PAIRED(bam_line) )
+            stats->nreads_unpaired++;
+        else 
         {
             stats->nreads_paired++;
-            if ( isize >= stats->nisize )
-                isize=stats->nisize-1;
 
-            int pos_fst = bam_line->core.mpos - bam_line->core.pos;
-            int is_fst  = IS_READ1(bam_line) ? 1 : -1;
-            int is_fwd  = IS_REVERSE(bam_line) ? -1 : 1;
-            int is_mfwd = IS_MATE_REVERSE(bam_line) ? -1 : 1;
+            if ( bam_line->core.tid!=bam_line->core.mtid )
+                stats->nreads_anomalous++;
 
-            if ( is_fwd*is_mfwd>0 )
-                stats->isize_other[isize]++;
-            else if ( is_fst*pos_fst>0 )
-            {
-                if ( is_fst*is_fwd>0 )
-                    stats->isize_inward[isize]++;
-                else
-                    stats->isize_outward[isize]++;
-            }
-            else if ( is_fst*pos_fst<0 )
+            // The insert size is tricky, because for long inserts the libraries are
+            // prepared differently and the pairs point in other direction. BWA does
+            // not set the paired flag for them.  Similar thing is true also for 454
+            // reads. Mates mapped to different chromosomes have isize==0.
+            int32_t isize = bam_line->core.isize;
+            if ( isize<0 ) isize = -isize;
+            if ( isize >= stats->nisize )
+                isize = stats->nisize-1;
+            if ( isize>0 || bam_line->core.tid==bam_line->core.mtid )
             {
-                if ( is_fst*is_fwd>0 )
-                    stats->isize_outward[isize]++;
-                else
-                    stats->isize_inward[isize]++;
+                int pos_fst = bam_line->core.mpos - bam_line->core.pos;
+                int is_fst  = IS_READ1(bam_line) ? 1 : -1;
+                int is_fwd  = IS_REVERSE(bam_line) ? -1 : 1;
+                int is_mfwd = IS_MATE_REVERSE(bam_line) ? -1 : 1;
+
+                if ( is_fwd*is_mfwd>0 )
+                    stats->isize_other[isize]++;
+                else if ( is_fst*pos_fst>0 )
+                {
+                    if ( is_fst*is_fwd>0 )
+                        stats->isize_inward[isize]++;
+                    else
+                        stats->isize_outward[isize]++;
+                }
+                else if ( is_fst*pos_fst<0 )
+                {
+                    if ( is_fst*is_fwd>0 )
+                        stats->isize_outward[isize]++;
+                    else
+                        stats->isize_inward[isize]++;
+                }
             }
         }
-        else
-            stats->nreads_unpaired++;
 
         // Number of mismatches 
         uint8_t *nm = bam_aux_get(bam_line,"NM");
@@ -939,6 +951,8 @@ void output_stats(stats_t *stats)
     printf("SN\treads paired:\t%ld\n", (long)stats->nreads_paired);
     printf("SN\treads duplicated:\t%ld\n", (long)stats->nreads_dup);
     printf("SN\treads MQ0:\t%ld\n", (long)stats->nreads_mq0);
+    printf("SN\treads QC failed:\t%ld\n", (long)stats->nreads_QCfailed);
+    printf("SN\tnon-primary alignments:\t%ld\n", (long)stats->nreads_secondary);
     printf("SN\ttotal length:\t%ld\n", (long)stats->total_len);
     printf("SN\tbases mapped:\t%ld\n", (long)stats->nbases_mapped);
     printf("SN\tbases mapped (cigar):\t%ld\n", (long)stats->nbases_mapped_cigar);
@@ -955,6 +969,7 @@ void output_stats(stats_t *stats)
     printf("SN\tinward oriented pairs:\t%ld\n", (long)nisize_inward);
     printf("SN\toutward oriented pairs:\t%ld\n", (long)nisize_outward);
     printf("SN\tpairs with other orientation:\t%ld\n", (long)nisize_other);
+    printf("SN\tpairs on different chromosomes:\t%ld\n", (long)stats->nreads_anomalous/2);
 
     int ibase,iqual;
     if ( stats->max_len<stats->nbases ) stats->max_len++;