From 9bd8cd7dd42efa091a9361bb06a29d1bead405ce Mon Sep 17 00:00:00 2001 From: Heng Li Date: Thu, 30 Jul 2009 09:03:39 +0000 Subject: [PATCH] * samtools-0.1.5-16 (r421) * in view and pileup, load header from FASTA index if the input is SAM. --- bam_import.c | 5 +++-- bam_plcmd.c | 4 +++- bamtk.c | 2 +- misc/samtools.pl | 1 + sam.c | 21 +++++++++++++++++++++ sam.h | 5 ++++- sam_view.c | 19 +++++++++++++++---- 7 files changed, 48 insertions(+), 9 deletions(-) diff --git a/bam_import.c b/bam_import.c index a49f9e8..d004de3 100644 --- a/bam_import.c +++ b/bam_import.c @@ -118,9 +118,10 @@ bam_header_t *sam_header_read2(const char *fn) kstring_t *str; kh_ref_t *hash; khiter_t k; - hash = kh_init(ref); + if (fn == 0) return 0; fp = (strcmp(fn, "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(fn, "r"); - assert(fp); + if (fp == 0) return 0; + hash = kh_init(ref); ks = ks_init(fp); str = (kstring_t*)calloc(1, sizeof(kstring_t)); while (ks_getuntil(ks, 0, str, &dret) > 0) { diff --git a/bam_plcmd.c b/bam_plcmd.c index 5d5506f..391767a 100644 --- a/bam_plcmd.c +++ b/bam_plcmd.c @@ -330,7 +330,7 @@ int bam_pileup(int argc, char *argv[]) fprintf(stderr, " -i only show lines/consensus with indels\n"); fprintf(stderr, " -m INT filtering reads with bits in INT [%d]\n", d->mask); fprintf(stderr, " -M INT cap mapping quality at INT [%d]\n", d->c->cap_mapQ); - fprintf(stderr, " -t FILE list of reference sequences (assume the input is in SAM)\n"); + fprintf(stderr, " -t FILE list of reference sequences (force -S)\n"); fprintf(stderr, " -l FILE list of sites at which pileup is output\n"); fprintf(stderr, " -f FILE reference sequence in the FASTA format\n\n"); fprintf(stderr, " -c output the maq consensus sequence\n"); @@ -356,6 +356,8 @@ int bam_pileup(int argc, char *argv[]) } if (d->fai == 0 && (d->format & (BAM_PLF_CNS|BAM_PLF_INDEL_ONLY))) fprintf(stderr, "[bam_pileup] indels will not be called when -f is absent.\n"); + if (fn_fa && is_SAM && fn_list == 0) fn_list = samfaipath(fn_fa); + { samfile_t *fp; fp = is_SAM? samopen(argv[optind], "r", fn_list) : samopen(argv[optind], "rb", 0); diff --git a/bamtk.c b/bamtk.c index daae6ab..234f465 100644 --- a/bamtk.c +++ b/bamtk.c @@ -4,7 +4,7 @@ #include "bam.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.5-15 (r419)" +#define PACKAGE_VERSION "0.1.5-16 (r421)" #endif int bam_taf2baf(int argc, char *argv[]); diff --git a/misc/samtools.pl b/misc/samtools.pl index 0b0e9d2..06c24dc 100755 --- a/misc/samtools.pl +++ b/misc/samtools.pl @@ -229,6 +229,7 @@ sub unique { print $_ if (/^\@/); $score = $1 if (/AS:i:(\d+)/); my @t = split("\t"); + next if (@t < 11); if ($score < 0) { # AS tag is unavailable my $cigar = $t[5]; my ($mm, $go, $ge) = (0, 0, 0); diff --git a/sam.c b/sam.c index e8e742c..b8e7350 100644 --- a/sam.c +++ b/sam.c @@ -1,4 +1,6 @@ #include +#include +#include "faidx.h" #include "sam.h" #define TYPE_BAM 1 @@ -152,3 +154,22 @@ int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *func_data) bam_destroy1(b); return 0; } + +char *samfaipath(const char *fn_ref) +{ + char *fn_list = 0; + fn_list = calloc(strlen(fn_ref) + 5, 1); + strcat(strcpy(fn_list, fn_ref), ".fai"); + if (access(fn_list, R_OK) == -1) { // fn_list is unreadable + if (access(fn_ref, R_OK) == -1) { + fprintf(stderr, "[samfaipath] fail to read file %s.\n", fn_ref); + } else { + fprintf(stderr, "[samfaipath] build FASTA index...\n"); + if (fai_build(fn_ref) == -1) { + fprintf(stderr, "[samfaipath] fail to build FASTA index.\n"); + free(fn_list); fn_list = 0; + } + } + } + return fn_list; +} diff --git a/sam.h b/sam.h index f06439b..0b87194 100644 --- a/sam.h +++ b/sam.h @@ -51,7 +51,8 @@ extern "C" { @param aux auxiliary data; if mode[0]=='w', aux points to bam_header_t; if strcmp(mode, "rb")!=0 and @SQ header lines in SAM are absent, aux points the file name of the list of the reference; - aux is not used otherwise. + aux is not used otherwise. If @SQ header lines are present in SAM, + aux is not used, either. @return SAM/BAM file handler */ @@ -88,6 +89,8 @@ extern "C" { */ int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *data); + char *samfaipath(const char *fn_ref); + #ifdef __cplusplus } #endif diff --git a/sam_view.c b/sam_view.c index 685784d..1776b8e 100644 --- a/sam_view.c +++ b/sam_view.c @@ -3,6 +3,7 @@ #include #include #include "sam.h" +#include "faidx.h" static int g_min_mapQ = 0, g_flag_on = 0, g_flag_off = 0; static char *g_library, *g_rg; @@ -38,11 +39,11 @@ int main_samview(int argc, char *argv[]) int c, is_header = 0, is_header_only = 0, is_bamin = 1, ret = 0, is_uncompressed = 0, is_bamout = 0; int of_type = BAM_OFDEC, is_long_help = 0; samfile_t *in = 0, *out = 0; - char in_mode[5], out_mode[5], *fn_out = 0, *fn_list = 0; + char in_mode[5], out_mode[5], *fn_out = 0, *fn_list = 0, *fn_ref = 0; /* parse command-line options */ strcpy(in_mode, "r"); strcpy(out_mode, "w"); - while ((c = getopt(argc, argv, "Sbt:hHo:q:f:F:ul:r:xX?")) >= 0) { + while ((c = getopt(argc, argv, "Sbt:hHo:q:f:F:ul:r:xX?T:")) >= 0) { switch (c) { case 'S': is_bamin = 0; break; case 'b': is_bamout = 1; break; @@ -59,6 +60,7 @@ int main_samview(int argc, char *argv[]) case 'x': of_type = BAM_OFHEX; break; case 'X': of_type = BAM_OFSTR; break; case '?': is_long_help = 1; break; + case 'T': fn_ref = strdup(optarg); is_bamin = 0; break; default: return usage(is_long_help); } } @@ -74,11 +76,19 @@ int main_samview(int argc, char *argv[]) if (is_uncompressed) strcat(out_mode, "u"); if (argc == optind) return usage(is_long_help); + // generate the fn_list if necessary + if (fn_list && fn_ref) { + fprintf(stderr, "[main_samview] both -t and -T are applied. -T is ignored.\n"); + } else fn_list = samfaipath(fn_ref); // open file handlers if ((in = samopen(argv[optind], in_mode, fn_list)) == 0) { fprintf(stderr, "[main_samview] fail to open file for reading.\n"); goto view_end; } + if (in->header == 0) { + fprintf(stderr, "[main_samview] fail to read SAM header.\n"); + goto view_end; + } if ((out = samopen(fn_out? fn_out : "-", out_mode, in->header)) == 0) { fprintf(stderr, "[main_samview] fail to open file for writing.\n"); goto view_end; @@ -116,7 +126,7 @@ int main_samview(int argc, char *argv[]) view_end: // close files, free and return - free(fn_list); free(fn_out); free(g_library); free(g_rg); + free(fn_list); free(fn_ref); free(fn_out); free(g_library); free(g_rg); samclose(in); samclose(out); return ret; @@ -134,6 +144,7 @@ static int usage(int is_long_help) fprintf(stderr, " -x output FLAG in HEX (samtools-C specific)\n"); fprintf(stderr, " -X output FLAG in stirng (samtools-C specific)\n"); fprintf(stderr, " -t FILE list of reference names and lengths (force -S) [null]\n"); + fprintf(stderr, " -T FILE reference sequence file (force -S) [null]\n"); fprintf(stderr, " -o FILE output file name [stdout]\n"); fprintf(stderr, " -f INT required flag, 0 for unset [0]\n"); fprintf(stderr, " -F INT filtering flag, 0 for unset [0]\n"); @@ -153,7 +164,7 @@ static int usage(int is_long_help) corresponding sequence length. The `.fai' file generated by `faidx'\n\ can be used here. This file may be empty if reads are unaligned.\n\ \n\ - 2. SAM->BAM conversion: `samtools view -bt ref.fa.fai in.sam.gz'.\n\ + 2. SAM->BAM conversion: `samtools view -bT ref.fa in.sam.gz'.\n\ \n\ 3. BAM->SAM conversion: `samtools view in.bam'.\n\ \n\ -- 2.39.5