uint64_t total_len_dup;
uint64_t nreads_1st;
uint64_t nreads_2nd;
+ uint64_t nreads_filtered;
uint64_t nreads_dup;
uint64_t nreads_unmapped;
uint64_t nreads_unpaired;
uint64_t nreads_paired;
+ uint64_t nreads_anomalous;
uint64_t nreads_mq0;
uint64_t nbases_mapped;
uint64_t nbases_mapped_cigar;
uint64_t nbases_trimmed; // bwa trimmed bases
uint64_t nmismatches;
+ uint64_t nreads_QCfailed, nreads_secondary;
// GC-depth related data
uint32_t ngcd, igcd; // The maximum number of GC depth bins and index of the current bin
if ( k == kh_end(stats->rg_hash) ) return;
}
if ( stats->flag_require && (bam_line->core.flag & stats->flag_require)!=stats->flag_require )
+ {
+ stats->nreads_filtered++;
return;
+ }
if ( stats->flag_filter && (bam_line->core.flag & stats->flag_filter) )
+ {
+ stats->nreads_filtered++;
return;
-
+ }
if ( !is_in_regions(bam_line,stats) )
return;
+ if ( stats->filter_readlen!=-1 && bam_line->core.l_qseq!=stats->filter_readlen )
+ return;
+
+ if ( bam_line->core.flag & BAM_FQCFAIL ) stats->nreads_QCfailed++;
+ if ( bam_line->core.flag & BAM_FSECONDARY ) stats->nreads_secondary++;
int seq_len = bam_line->core.l_qseq;
if ( !seq_len ) return;
- if ( stats->filter_readlen!=-1 && seq_len!=stats->filter_readlen ) return;
+
if ( seq_len >= stats->nbases )
realloc_buffers(stats,seq_len);
if ( stats->max_len<seq_len )
count_indels(stats,bam_line);
- // The insert size is tricky, because for long inserts the libraries are
- // prepared differently and the pairs point in other direction. BWA does
- // not set the paired flag for them. Similar thing is true also for 454
- // reads. Therefore, do the insert size stats for all mapped reads.
- int32_t isize = bam_line->core.isize;
- if ( isize<0 ) isize = -isize;
- if ( IS_PAIRED(bam_line) )
+ if ( !IS_PAIRED(bam_line) )
+ stats->nreads_unpaired++;
+ else
{
stats->nreads_paired++;
- if ( isize >= stats->nisize )
- isize=stats->nisize-1;
- int pos_fst = bam_line->core.mpos - bam_line->core.pos;
- int is_fst = IS_READ1(bam_line) ? 1 : -1;
- int is_fwd = IS_REVERSE(bam_line) ? -1 : 1;
- int is_mfwd = IS_MATE_REVERSE(bam_line) ? -1 : 1;
+ if ( bam_line->core.tid!=bam_line->core.mtid )
+ stats->nreads_anomalous++;
- if ( is_fwd*is_mfwd>0 )
- stats->isize_other[isize]++;
- else if ( is_fst*pos_fst>0 )
- {
- if ( is_fst*is_fwd>0 )
- stats->isize_inward[isize]++;
- else
- stats->isize_outward[isize]++;
- }
- else if ( is_fst*pos_fst<0 )
+ // The insert size is tricky, because for long inserts the libraries are
+ // prepared differently and the pairs point in other direction. BWA does
+ // not set the paired flag for them. Similar thing is true also for 454
+ // reads. Mates mapped to different chromosomes have isize==0.
+ int32_t isize = bam_line->core.isize;
+ if ( isize<0 ) isize = -isize;
+ if ( isize >= stats->nisize )
+ isize = stats->nisize-1;
+ if ( isize>0 || bam_line->core.tid==bam_line->core.mtid )
{
- if ( is_fst*is_fwd>0 )
- stats->isize_outward[isize]++;
- else
- stats->isize_inward[isize]++;
+ int pos_fst = bam_line->core.mpos - bam_line->core.pos;
+ int is_fst = IS_READ1(bam_line) ? 1 : -1;
+ int is_fwd = IS_REVERSE(bam_line) ? -1 : 1;
+ int is_mfwd = IS_MATE_REVERSE(bam_line) ? -1 : 1;
+
+ if ( is_fwd*is_mfwd>0 )
+ stats->isize_other[isize]++;
+ else if ( is_fst*pos_fst>0 )
+ {
+ if ( is_fst*is_fwd>0 )
+ stats->isize_inward[isize]++;
+ else
+ stats->isize_outward[isize]++;
+ }
+ else if ( is_fst*pos_fst<0 )
+ {
+ if ( is_fst*is_fwd>0 )
+ stats->isize_outward[isize]++;
+ else
+ stats->isize_inward[isize]++;
+ }
}
}
- else
- stats->nreads_unpaired++;
// Number of mismatches
uint8_t *nm = bam_aux_get(bam_line,"NM");
printf(" %s",stats->argv[i]);
printf("\n");
printf("# Summary Numbers. Use `grep ^SN | cut -f 2-` to extract this part.\n");
+ printf("SN\traw total sequences:\t%ld\n", (long)(stats->nreads_filtered+stats->nreads_1st+stats->nreads_2nd));
+ printf("SN\tfiltered sequences:\t%ld\n", (long)stats->nreads_filtered);
printf("SN\tsequences:\t%ld\n", (long)(stats->nreads_1st+stats->nreads_2nd));
printf("SN\tis paired:\t%d\n", stats->nreads_1st&&stats->nreads_2nd ? 1 : 0);
printf("SN\tis sorted:\t%d\n", stats->is_sorted ? 1 : 0);
printf("SN\treads paired:\t%ld\n", (long)stats->nreads_paired);
printf("SN\treads duplicated:\t%ld\n", (long)stats->nreads_dup);
printf("SN\treads MQ0:\t%ld\n", (long)stats->nreads_mq0);
+ printf("SN\treads QC failed:\t%ld\n", (long)stats->nreads_QCfailed);
+ printf("SN\tnon-primary alignments:\t%ld\n", (long)stats->nreads_secondary);
printf("SN\ttotal length:\t%ld\n", (long)stats->total_len);
- printf("SN\tbases mapped:\t%ld\n", (long)stats->nbases_mapped);
- printf("SN\tbases mapped (cigar):\t%ld\n", (long)stats->nbases_mapped_cigar);
+ printf("SN\tbases mapped:\t%ld\n", (long)stats->nbases_mapped); // the length of the whole read goes here, including soft-clips etc.
+ printf("SN\tbases mapped (cigar):\t%ld\n", (long)stats->nbases_mapped_cigar); // only matched and inserted bases are counted here
printf("SN\tbases trimmed:\t%ld\n", (long)stats->nbases_trimmed);
printf("SN\tbases duplicated:\t%ld\n", (long)stats->total_len_dup);
printf("SN\tmismatches:\t%ld\n", (long)stats->nmismatches);
printf("SN\tinward oriented pairs:\t%ld\n", (long)nisize_inward);
printf("SN\toutward oriented pairs:\t%ld\n", (long)nisize_outward);
printf("SN\tpairs with other orientation:\t%ld\n", (long)nisize_other);
+ printf("SN\tpairs on different chromosomes:\t%ld\n", (long)stats->nreads_anomalous/2);
int ibase,iqual;
if ( stats->max_len<stats->nbases ) stats->max_len++;