From 2104bef5b18bf46562b9eca9c3d9a681ad6e8cf3 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Tue, 1 Dec 2009 14:37:17 +0000 Subject: [PATCH] * samtools-0.1.7-2 (r520) * fixed a few issues with compilation in Windows (on behalf of John) * choose a random base as the consensus (for population genetics studies) --- bam_plcmd.c | 17 ++++++++++++++--- bamtk.c | 2 +- misc/samtools.pl | 45 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 59 insertions(+), 5 deletions(-) diff --git a/bam_plcmd.c b/bam_plcmd.c index ba787a9..1191b58 100644 --- a/bam_plcmd.c +++ b/bam_plcmd.c @@ -18,6 +18,7 @@ KHASH_MAP_INIT_INT64(64, indel_list_t) #define BAM_PLF_GLF 0x08 #define BAM_PLF_VAR_ONLY 0x10 #define BAM_PLF_2ND 0x20 +#define BAM_PLF_RANBASE 0x40 typedef struct { bam_header_t *h; @@ -182,8 +183,15 @@ static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *p if (i == n) return 0; } // call the consensus and indel - if (d->format & BAM_PLF_CNS) // call consensus - cns = bam_maqcns_call(n, pu, d->c); + if (d->format & BAM_PLF_CNS) { // call consensus + if (d->format & BAM_PLF_RANBASE) { + const bam_pileup1_t *p = pu + (int)(drand48() * n); + int q = bam1_qual(p->b)[p->qpos]; + int mapQ = p->b->core.qual < d->c->cap_mapQ? p->b->core.qual : d->c->cap_mapQ; + uint32_t b = bam1_seqi(bam1_seq(p->b), p->qpos); + cns = b<<28 | 0xf<<24 | mapQ<<16 | q<<8; + } else cns = bam_maqcns_call(n, pu, d->c); + } if ((d->format & (BAM_PLF_CNS|BAM_PLF_INDEL_ONLY)) && d->ref && pos < d->len) { // call indels if (proposed_indels) // the first element gives the size of the array r = bam_maqindel(n, pos, d->ido, pu, d->ref, proposed_indels[0], proposed_indels+1); @@ -301,7 +309,7 @@ int bam_pileup(int argc, char *argv[]) d->tid = -1; d->mask = BAM_DEF_MASK; d->c = bam_maqcns_init(); d->ido = bam_maqindel_opt_init(); - while ((c = getopt(argc, argv, "st:f:cT:N:r:l:im:gI:G:vM:S2a")) >= 0) { + while ((c = getopt(argc, argv, "st:f:cT:N:r:l:im:gI:G:vM:S2aR")) >= 0) { switch (c) { case 'a': d->c->is_soap = 1; break; case 's': d->format |= BAM_PLF_SIMPLE; break; @@ -318,6 +326,7 @@ int bam_pileup(int argc, char *argv[]) case 'm': d->mask = strtol(optarg, 0, 0); break; case 'g': d->format |= BAM_PLF_GLF; break; case '2': d->format |= BAM_PLF_2ND; break; + case 'R': d->format |= BAM_PLF_RANBASE; break; case 'I': d->ido->q_indel = atoi(optarg); break; case 'G': d->ido->r_indel = atof(optarg); break; case 'S': is_SAM = 1; break; @@ -339,6 +348,7 @@ int bam_pileup(int argc, char *argv[]) fprintf(stderr, " -l FILE list of sites at which pileup is output\n"); fprintf(stderr, " -f FILE reference sequence in the FASTA format\n\n"); fprintf(stderr, " -c output the maq consensus sequence\n"); + fprintf(stderr, " -R randomly choose a random base as the consensus (force -c)\n"); fprintf(stderr, " -v print variants only (for -c)\n"); fprintf(stderr, " -g output in the GLFv3 format (suppressing -c/-i/-s)\n"); fprintf(stderr, " -T FLOAT theta in maq consensus calling model (for -c/-g) [%f]\n", d->c->theta); @@ -350,6 +360,7 @@ int bam_pileup(int argc, char *argv[]) free(fn_list); free(fn_fa); free(d); return 1; } + if (d->format & BAM_PLF_RANBASE) d->format |= BAM_PLF_CNS; if (fn_fa) d->fai = fai_load(fn_fa); if (d->format & (BAM_PLF_CNS|BAM_PLF_GLF)) bam_maqcns_prepare(d->c); // consensus calling if (d->format & BAM_PLF_GLF) { // for glf output diff --git a/bamtk.c b/bamtk.c index 2c7e4f0..aaa4a3e 100644 --- a/bamtk.c +++ b/bamtk.c @@ -9,7 +9,7 @@ #endif #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.1.7-1 (r515)" +#define PACKAGE_VERSION "0.1.7-2 (r520)" #endif int bam_taf2baf(int argc, char *argv[]); diff --git a/misc/samtools.pl b/misc/samtools.pl index 320e8aa..1d9cf59 100755 --- a/misc/samtools.pl +++ b/misc/samtools.pl @@ -11,7 +11,7 @@ my $version = '0.3.3'; my $command = shift(@ARGV); my %func = (showALEN=>\&showALEN, pileup2fq=>\&pileup2fq, varFilter=>\&varFilter, - unique=>\&unique, uniqcmp=>\&uniqcmp, sra2hdr=>\&sra2hdr); + unique=>\&unique, uniqcmp=>\&uniqcmp, sra2hdr=>\&sra2hdr, sam2fq=>\&sam2fq); die("Unknown command \"$command\".\n") if (!defined($func{$command})); &{$func{$command}}; @@ -226,6 +226,49 @@ sub p2q_print_str { } } +# +# sam2fq +# + +sub sam2fq { + my %opts = (n=>20, p=>''); + getopts('n:p:', \%opts); + die("Usage: samtools.pl sam2fq [-n 20] [-p ] \n") if (@ARGV == 0 && -t STDIN); + if ($opts{p} && $opts{n} > 1) { + my $pre = $opts{p}; + my @fh; + for (0 .. $opts{n}-1) { + open($fh[$_], sprintf("| gzip > $pre.%.3d.fq.gz", $_)) || die; + } + my $i = 0; + while (<>) { + next if (/^@/); + chomp; + my @t = split("\t"); + next if ($t[9] eq '*'); + my ($name, $seq, $qual); + if ($t[1] & 16) { # reverse strand + $seq = reverse($t[9]); + $qual = reverse($t[10]); + $seq =~ tr/ACGTacgt/TGCAtgca/; + } else { + ($seq, $qual) = @t[9,10]; + } + $name = $t[0]; + $name .= "/1" if ($t[1] & 0x40); + $name .= "/2" if ($t[1] & 0x80); + print {$fh[$i]} "\@$name\n$seq\n"; + if ($qual ne '*') { + print {$fh[$i]} "+\n$qual\n"; + } + $i = 0 if (++$i == $opts{n}); + } + close($fh[$_]) for (0 .. $opts{n}-1); + } else { + die("To be implemented.\n"); + } +} + # # sra2hdr # -- 2.39.2