#include <ctype.h>
#include <string.h>
-#define PACKAGE_VERSION "0.2.0"
+#define PACKAGE_VERSION "0.2.3"
const uint8_t nst_nt4_table[256] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
static double INDEL_FRAC = 0.1;
static double INDEL_EXTEND = 0.3;
static int IS_SOLID = 0;
+static int SHOW_MM_INFO = 1;
void maq_mut_diref(const seq_t *seq, int is_hap, mutseq_t *hap1, mutseq_t *hap2)
{
c[0] = nst_nt4_table[(int)seq->s[i]];
c[1] = hap1->s[i]; c[2] = hap2->s[i];
if (c[0] >= 4) continue;
- if ((c[1] & mutmsk) != NOCHANGE || (c[1] & mutmsk) != NOCHANGE) {
+ if ((c[1] & mutmsk) != NOCHANGE || (c[2] & mutmsk) != NOCHANGE) {
printf("%s\t%d\t", name, i+1);
if (c[1] == c[2]) { // hom
if ((c[1]&mutmsk) == SUBSTITUTE) { // substitution
tmp_seq[1] = (uint8_t*)calloc(l+2, 1);
size[0] = size_l; size[1] = size_r;
- Q = (int)(-10.0 * log(ERR_RATE) / log(10.0) + 0.499) + 33;
+ Q = (ERR_RATE == 0.0)? 'I' : (int)(-10.0 * log(ERR_RATE) / log(10.0) + 0.499) + 33;
tot_len = n_ref = 0;
while ((l = seq_read_fasta(fp_fa, &seq, name, 0)) >= 0) {
n_sub[0] = n_sub[1] = n_indel[0] = n_indel[1] = n_err[0] = n_err[1] = 0;
#define __gen_read(x, start, iter) do { \
- for (i = (start), k = 0, ext_coor[x] = -1; i >= 0 && i < seq.l && k < s[x]; iter) { \
+ for (i = (start), k = 0, ext_coor[x] = -10; i >= 0 && i < seq.l && k < s[x]; iter) { \
int c = target[i], mut_type = c & mutmsk; \
if (ext_coor[x] < 0) { \
+ if (mut_type != NOCHANGE && mut_type != SUBSTITUTE) continue; \
ext_coor[x] = i; \
- if (mut_type != NOCHANGE && mut_type != SUBSTITUTE) { \
- ext_coor[x] = -1; \
- break; \
- } \
} \
if (mut_type == DELETE) ++n_indel[x]; \
else if (mut_type == NOCHANGE || mut_type == SUBSTITUTE) { \
} else { \
int n, ins; \
++n_indel[x]; \
+ tmp_seq[x][k++] = c & 0xf; \
for (n = mut_type>>12, ins = c>>4; n > 0 && k < s[x]; --n, ins >>= 2) \
tmp_seq[x][k++] = ins & 0x3; \
} \
} \
- if (k != s[x]) ext_coor[x] = -1; \
+ if (k != s[x]) ext_coor[x] = -10; \
} while (0)
if (!IS_SOLID) {
} else { // FF pair
__gen_read(0, pos, ++i);
__gen_read(1, pos + d - 1 - s[1], ++i);
+ ++ext_coor[0]; ++ext_coor[1];
}
// change to color sequence: (0,1,2,3) -> (A,C,G,T)
for (j = 0; j < 2; ++j) {
for (j = 0; j < 2; ++j) {
for (i = 0; i < s[j]; ++i) qstr[i] = Q;
qstr[i] = 0;
- fprintf(fpo[j], "@%s_%u_%u_%d:%d:%d_%llx/%d\n", name, ext_coor[0]+1, ext_coor[1]+1,
- n_err[j], n_sub[j], n_indel[j], (long long)ii, j==0? is_flip+1 : 2-is_flip);
+ if (SHOW_MM_INFO) {
+ fprintf(fpo[j], "@%s_%u_%u_%d:%d:%d_%d:%d:%d_%llx/%d\n", name, ext_coor[0]+1, ext_coor[1]+1,
+ n_err[0], n_sub[0], n_indel[0], n_err[1], n_sub[1], n_indel[1],
+ (long long)ii, j==0? is_flip+1 : 2-is_flip);
+ } else {
+ fprintf(fpo[j], "@%s_%u_%u_%llx/%d %d:%d:%d_%d:%d:%d\n", name, ext_coor[0]+1, ext_coor[1]+1,
+ (long long)ii, j==0? is_flip+1 : 2-is_flip,
+ n_err[0], n_sub[0], n_indel[0], n_err[1], n_sub[1], n_indel[1]);
+ }
for (i = 0; i < s[j]; ++i)
fputc("ACGTN"[(int)tmp_seq[j][i]], fpo[j]);
fprintf(fpo[j], "\n+\n%s\n", qstr);
fprintf(stderr, " -R FLOAT fraction of indels [%.2f]\n", INDEL_FRAC);
fprintf(stderr, " -X FLOAT probability an indel is extended [%.2f]\n", INDEL_EXTEND);
fprintf(stderr, " -c generate reads in color space (SOLiD reads)\n");
- fprintf(stderr, " -h haploid mode\n");
+ fprintf(stderr, " -C show mismatch info in comment rather than read name\n");
+ fprintf(stderr, " -h haplotype mode\n");
fprintf(stderr, "\n");
+ fprintf(stderr, "Note: For SOLiD reads, the first read is F3 and the second is R3.\n\n");
return 1;
}
N = 1000000; dist = 500; std_dev = 50;
size_l = size_r = 70;
- while ((c = getopt(argc, argv, "e:d:s:N:1:2:r:R:hX:c")) >= 0) {
+ while ((c = getopt(argc, argv, "e:d:s:N:1:2:r:R:hX:cC")) >= 0) {
switch (c) {
case 'd': dist = atoi(optarg); break;
case 's': std_dev = atoi(optarg); break;
case 'R': INDEL_FRAC = atof(optarg); break;
case 'X': INDEL_EXTEND = atof(optarg); break;
case 'c': IS_SOLID = 1; break;
+ case 'C': SHOW_MM_INFO = 0; break;
case 'h': is_hap = 1; break;
}
}