X-Git-Url: https://git.donarmstrong.com/?p=samtools.git;a=blobdiff_plain;f=bam_plcmd.c;h=9c4f273dcbfe2c3576e9c6ea0e2d7661839bbb85;hp=51dece53796f23563c2943e77c8482f70ddef1d5;hb=60e0a8467ddbd0b89f15d201dcfe10c8796552b2;hpb=e08e3faf4cc66727674ecacb6914e69b9a5df8c9 diff --git a/bam_plcmd.c b/bam_plcmd.c index 51dece5..9c4f273 100644 --- a/bam_plcmd.c +++ b/bam_plcmd.c @@ -4,6 +4,8 @@ #include #include #include +#include +#include #include "sam.h" #include "faidx.h" #include "kstring.h" @@ -80,6 +82,7 @@ int bed_overlap(const void *_h, const char *chr, int beg, int end); typedef struct { int max_mq, min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth, fmt_flag; + int rflag_require, rflag_filter; int openQ, extQ, tandemQ, min_support; // for indels double min_frac; // for indels char *reg, *pl_list; @@ -117,6 +120,8 @@ static int mplp_func(void *data, bam1_t *b) skip = 1; continue; } + if (ma->conf->rflag_require && !(ma->conf->rflag_require&b->core.flag)) { skip = 1; continue; } + if (ma->conf->rflag_filter && ma->conf->rflag_filter&b->core.flag) { skip = 1; continue; } if (ma->conf->bed) { // test overlap skip = !bed_overlap(ma->conf->bed, ma->h->target_name[b->core.tid], b->core.pos, bam_calend(&b->core, bam1_cigar(b))); if (skip) continue; @@ -215,6 +220,10 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) } data[i]->conf = conf; h_tmp = bam_header_read(data[i]->fp); + if ( !h_tmp ) { + fprintf(stderr,"[%s] fail to read the header of %s\n", __func__, fn[i]); + exit(1); + } data[i]->h = i? h : h_tmp; // for i==0, "h" has not been set yet bam_smpl_add(sm, fn[i], (conf->flag&MPLP_IGNORE_RG)? 0 : h_tmp->text); rghash = bcf_call_add_rg(rghash, h_tmp->text, conf->pl_list); @@ -312,7 +321,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) ref16 = bam_nt16_table[_ref0]; for (i = 0; i < gplp.n; ++i) bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], ref16, bca, bcr + i); - bcf_call_combine(gplp.n, bcr, ref16, &bc); + bcf_call_combine(gplp.n, bcr, bca, ref16, &bc); bcf_call2bcf(tid, pos, &bc, b, bcr, conf->fmt_flag, 0, 0); bcf_write(bp, bh, b); bcf_destroy(b); @@ -320,7 +329,7 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) if (!(conf->flag&MPLP_NO_INDEL) && total_depth < max_indel_depth && bcf_call_gap_prep(gplp.n, gplp.n_plp, gplp.plp, pos, bca, ref, rghash) >= 0) { for (i = 0; i < gplp.n; ++i) bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], -1, bca, bcr + i); - if (bcf_call_combine(gplp.n, bcr, -1, &bc) >= 0) { + if (bcf_call_combine(gplp.n, bcr, bca, -1, &bc) >= 0) { b = calloc(1, sizeof(bcf1_t)); bcf_call2bcf(tid, pos, &bc, b, bcr, conf->fmt_flag, bca, ref); bcf_write(bp, bh, b); @@ -393,11 +402,15 @@ static int mpileup(mplp_conf_t *conf, int n, char **fn) } #define MAX_PATH_LEN 1024 -static int read_file_list(const char *file_list,int *n,char **argv[]) +int read_file_list(const char *file_list,int *n,char **argv[]) { char buf[MAX_PATH_LEN]; - int len, nfiles; - char **files; + int len, nfiles = 0; + char **files = NULL; + struct stat sb; + + *n = 0; + *argv = NULL; FILE *fh = fopen(file_list,"r"); if ( !fh ) @@ -406,28 +419,33 @@ static int read_file_list(const char *file_list,int *n,char **argv[]) return 1; } - // Speed is not an issue here, determine the number of files by reading the file twice - nfiles = 0; - while ( fgets(buf,MAX_PATH_LEN,fh) ) nfiles++; - - if ( fseek(fh, 0L, SEEK_SET) ) - { - fprintf(stderr,"%s: %s\n", file_list,strerror(errno)); - return 1; - } - files = calloc(nfiles,sizeof(char*)); nfiles = 0; while ( fgets(buf,MAX_PATH_LEN,fh) ) { + // allow empty lines and trailing spaces len = strlen(buf); while ( len>0 && isspace(buf[len-1]) ) len--; if ( !len ) continue; - files[nfiles] = malloc(sizeof(char)*(len+1)); - strncpy(files[nfiles],buf,len); - files[nfiles][len] = 0; + // check sanity of the file list + buf[len] = 0; + if (stat(buf, &sb) != 0) + { + // no such file, check if it is safe to print its name + int i, safe_to_print = 1; + for (i=0; i= 0) { + static struct option lopts[] = + { + {"rf",1,0,1}, // require flag + {"ff",1,0,2}, // filter flag + {0,0,0,0} + }; + while ((c = getopt_long(argc, argv, "Agf:r:l:M:q:Q:uaRC:BDSd:L:b:P:po:e:h:Im:F:EG:6OsV1:2:",lopts,NULL)) >= 0) { switch (c) { + case 1 : mplp.rflag_require = strtol(optarg,0,0); break; + case 2 : mplp.rflag_filter = strtol(optarg,0,0); break; case 'f': mplp.fai = fai_load(optarg); if (mplp.fai == 0) return 1; @@ -513,7 +539,7 @@ int bam_mpileup(int argc, char *argv[]) fprintf(stderr, " -6 assume the quality is in the Illumina-1.3+ encoding\n"); fprintf(stderr, " -A count anomalous read pairs\n"); fprintf(stderr, " -B disable BAQ computation\n"); - fprintf(stderr, " -b FILE list of input BAM files [null]\n"); + fprintf(stderr, " -b FILE list of input BAM filenames, one per line [null]\n"); fprintf(stderr, " -C INT parameter for adjusting mapQ; 0 to disable [0]\n"); fprintf(stderr, " -d INT max per-BAM depth to avoid excessive memory usage [%d]\n", mplp.max_depth); fprintf(stderr, " -E recalculate extended BAQ on the fly thus ignoring existing BQs\n"); @@ -525,6 +551,8 @@ int bam_mpileup(int argc, char *argv[]) fprintf(stderr, " -R ignore RG tags\n"); fprintf(stderr, " -q INT skip alignments with mapQ smaller than INT [%d]\n", mplp.min_mq); fprintf(stderr, " -Q INT skip bases with baseQ/BAQ smaller than INT [%d]\n", mplp.min_baseQ); + fprintf(stderr, " --rf INT required flags: skip reads with mask bits unset []\n"); + fprintf(stderr, " --ff INT filter flags: skip reads with mask bits set []\n"); fprintf(stderr, "\nOutput options:\n\n"); fprintf(stderr, " -D output per-sample DP in BCF (require -g/-u)\n"); fprintf(stderr, " -g generate BCF output (genotype likelihoods)\n");