#define BAM_PLF_GLF 0x08
#define BAM_PLF_VAR_ONLY 0x10
#define BAM_PLF_2ND 0x20
+#define BAM_PLF_RANBASE 0x40
typedef struct {
bam_header_t *h;
if (i == n) return 0;
}
// call the consensus and indel
- if (d->format & BAM_PLF_CNS) // call consensus
- cns = bam_maqcns_call(n, pu, d->c);
+ if (d->format & BAM_PLF_CNS) { // call consensus
+ if (d->format & BAM_PLF_RANBASE) {
+ const bam_pileup1_t *p = pu + (int)(drand48() * n);
+ int q = bam1_qual(p->b)[p->qpos];
+ int mapQ = p->b->core.qual < d->c->cap_mapQ? p->b->core.qual : d->c->cap_mapQ;
+ uint32_t b = bam1_seqi(bam1_seq(p->b), p->qpos);
+ cns = b<<28 | 0xf<<24 | mapQ<<16 | q<<8;
+ } else cns = bam_maqcns_call(n, pu, d->c);
+ }
if ((d->format & (BAM_PLF_CNS|BAM_PLF_INDEL_ONLY)) && d->ref && pos < d->len) { // call indels
if (proposed_indels) // the first element gives the size of the array
r = bam_maqindel(n, pos, d->ido, pu, d->ref, proposed_indels[0], proposed_indels+1);
d->tid = -1; d->mask = BAM_DEF_MASK;
d->c = bam_maqcns_init();
d->ido = bam_maqindel_opt_init();
- while ((c = getopt(argc, argv, "st:f:cT:N:r:l:im:gI:G:vM:S2a")) >= 0) {
+ while ((c = getopt(argc, argv, "st:f:cT:N:r:l:im:gI:G:vM:S2aR")) >= 0) {
switch (c) {
case 'a': d->c->is_soap = 1; break;
case 's': d->format |= BAM_PLF_SIMPLE; break;
case 'm': d->mask = strtol(optarg, 0, 0); break;
case 'g': d->format |= BAM_PLF_GLF; break;
case '2': d->format |= BAM_PLF_2ND; break;
+ case 'R': d->format |= BAM_PLF_RANBASE; break;
case 'I': d->ido->q_indel = atoi(optarg); break;
case 'G': d->ido->r_indel = atof(optarg); break;
case 'S': is_SAM = 1; break;
fprintf(stderr, " -l FILE list of sites at which pileup is output\n");
fprintf(stderr, " -f FILE reference sequence in the FASTA format\n\n");
fprintf(stderr, " -c output the maq consensus sequence\n");
+ fprintf(stderr, " -R randomly choose a random base as the consensus (force -c)\n");
fprintf(stderr, " -v print variants only (for -c)\n");
fprintf(stderr, " -g output in the GLFv3 format (suppressing -c/-i/-s)\n");
fprintf(stderr, " -T FLOAT theta in maq consensus calling model (for -c/-g) [%f]\n", d->c->theta);
free(fn_list); free(fn_fa); free(d);
return 1;
}
+ if (d->format & BAM_PLF_RANBASE) d->format |= BAM_PLF_CNS;
if (fn_fa) d->fai = fai_load(fn_fa);
if (d->format & (BAM_PLF_CNS|BAM_PLF_GLF)) bam_maqcns_prepare(d->c); // consensus calling
if (d->format & BAM_PLF_GLF) { // for glf output
my $command = shift(@ARGV);
my %func = (showALEN=>\&showALEN, pileup2fq=>\&pileup2fq, varFilter=>\&varFilter,
- unique=>\&unique, uniqcmp=>\&uniqcmp, sra2hdr=>\&sra2hdr);
+ unique=>\&unique, uniqcmp=>\&uniqcmp, sra2hdr=>\&sra2hdr, sam2fq=>\&sam2fq);
die("Unknown command \"$command\".\n") if (!defined($func{$command}));
&{$func{$command}};
}
}
+#
+# sam2fq
+#
+
+sub sam2fq {
+ my %opts = (n=>20, p=>'');
+ getopts('n:p:', \%opts);
+ die("Usage: samtools.pl sam2fq [-n 20] [-p <prefix>] <inp.sam>\n") if (@ARGV == 0 && -t STDIN);
+ if ($opts{p} && $opts{n} > 1) {
+ my $pre = $opts{p};
+ my @fh;
+ for (0 .. $opts{n}-1) {
+ open($fh[$_], sprintf("| gzip > $pre.%.3d.fq.gz", $_)) || die;
+ }
+ my $i = 0;
+ while (<>) {
+ next if (/^@/);
+ chomp;
+ my @t = split("\t");
+ next if ($t[9] eq '*');
+ my ($name, $seq, $qual);
+ if ($t[1] & 16) { # reverse strand
+ $seq = reverse($t[9]);
+ $qual = reverse($t[10]);
+ $seq =~ tr/ACGTacgt/TGCAtgca/;
+ } else {
+ ($seq, $qual) = @t[9,10];
+ }
+ $name = $t[0];
+ $name .= "/1" if ($t[1] & 0x40);
+ $name .= "/2" if ($t[1] & 0x80);
+ print {$fh[$i]} "\@$name\n$seq\n";
+ if ($qual ne '*') {
+ print {$fh[$i]} "+\n$qual\n";
+ }
+ $i = 0 if (++$i == $opts{n});
+ }
+ close($fh[$_]) for (0 .. $opts{n}-1);
+ } else {
+ die("To be implemented.\n");
+ }
+}
+
#
# sra2hdr
#