6 #Modified by Zayed Albertyn(zayed.albertyn@gmail.com) & Colin Hercus(colin@novocraft.com)
19 if ($s1->[2] ne '*' && $s1->[2] eq $s2->[2]) { # then calculate $isize
20 my $x1 = ($s1->[1] & 0x10)? $s1->[3] + length($s1->[9]) : $s1->[3];
21 my $x2 = ($s2->[1] & 0x10)? $s2->[3] + length($s2->[9]) : $s2->[3];
24 # update mate coordinate
25 if ($s2->[2] ne '*') {
26 @$s1[6..8] = (($s2->[2] eq $s1->[2])? "=" : $s2->[2], $s2->[3], $isize);
27 $s1->[1] |= 0x20 if ($s2->[1] & 0x10);
31 if ($s1->[2] ne '*') {
32 @$s2[6..8] = (($s1->[2] eq $s2->[2])? "=" : $s1->[2], $s1->[3], -$isize);
33 $s2->[1] |= 0x20 if ($s1->[1] & 0x10);
42 die("Usage: novo2sam.pl [-p] <aln.novo>\n") if (@ARGV == 0);
43 my $is_paired = defined($opts{p});
47 my ($s_last, $s_curr) = (\@s1, \@s2);
50 next if (/(QC|NM)\s*$/ || /(R\s+\d+)\s*$/);
51 &novo2sam_aux($_, $s_curr, $is_paired);
52 if (@$s_last != 0 && $s_last->[0] eq $s_curr->[0]) {
53 &mating($s_last, $s_curr);
54 print join("\t", @$s_last), "\n";
55 print join("\t", @$s_curr), "\n";
56 @$s_last = (); @$s_curr = ();
58 print join("\t", @$s_last), "\n" if (@$s_last != 0);
59 my $s = $s_last; $s_last = $s_curr; $s_curr = $s;
62 print join("\t", @$s_last), "\n" if (@$s_last != 0);
66 my ($line, $s, $is_paired) = @_;
69 my @t = split(/\s+/, $line);
70 my @variations = @t[13 .. $#t];
72 return if ($t[4] ne 'U');
73 my $len = length($t[2]);
75 $s->[0] = substr($t[0], 1);
76 $s->[0] =~ s/\/[12]$//g;
77 # initial flag (will be updated later)
79 $s->[1] |= 1 | 1<<($t[1] eq 'L'? 6 : 7);
80 $s->[1] |= 2 if ($t[10] eq '.');
83 $s->[9] = reverse($t[2]);
84 $s->[10] = reverse($t[3]);
85 $s->[9] =~ tr/ACGTRYMKWSNacgtrymkwsn/TGCAYRKMWSNtgcayrkmwsn/;
87 $s->[9] = $t[2]; $s->[10] = $t[3];
91 if (scalar @variations ==0 ) {
92 $s->[5] = $len . "M"; # IMPORTANT: this cigar is not correct for gapped alignment
94 #convert to correct CIGAR
95 my $tmpstr = join" ",@variations ;
96 if ( $tmpstr=~ /\+|\-/ ) {
97 $cigarstring = cigar_method($line,\@variations,$len);
105 $s->[2] = substr($t[7], 1); $s->[3] = $t[8];
106 $s->[1] |= 0x10 if ($t[9] eq 'R');
108 $s->[4] = $t[5] > $t[6]? $t[5] : $t[6];
110 $s->[6] = '*'; $s->[7] = $s->[8] = 0;
112 push(@$s, "NM:i:".(@t-13));
114 $md = mdtag($md,$line,\@variations,$len);
115 push(@$s, "MD:Z:$md");
124 my @variations = @$ref;
131 foreach $string (@variations) {
132 my ($indeltype,$insert) = indeltype($string);
133 if ($indeltype eq "+") {
134 $len = length ($insert);
138 my $pos = $1 if $string =~ /^(\d+)/;
140 if ($len !=0 || ($deleteflag eq 1 && $indeltype eq ">")) {
145 if ($indeltype eq ">") {
151 if ($indeltype eq "-") {
152 my $deletedbase = $2 if $string =~ /(\d+)\-([A-Z]+)/;
153 if ($deleteflag == 0 ) {
156 $mdtag.=$deletedbase;
161 $len = $rdlen - $q + 1;
165 # print "In:$line\n";
166 # print "MD: OLD => NEW\nMD: $oldmd => $mdtag\n\n";
175 if ($string =~ /([A-Z]+)\>/) {
178 } elsif ($string =~ /\-/) {
180 } elsif ($string =~ /\+([A-Z]+)/) {
184 return ($indeltype,$insert);
193 my @variations = @$ref;
203 foreach $string (@variations) {
204 next if $string =~ />/;
205 my $pos = $1 if $string =~ /^(\d+)/;
207 if ($string =~ /\+([A-Z]+)/) {
210 }elsif ($string =~ /\-([A-Z]+)/) {
214 #print "$pos $indeltype $insert $t $q\n";
223 if ($indeltype eq "-") {
228 if ($indeltype eq "+") {
229 $len = length ($insert);
242 $len= $rdlen - $q + 1;
248 $cigar = newcigar($cigar,'D');
249 $cigar = newcigar($cigar,'I');
252 #print "c CIGAR:\t$cigar\n\n";
265 $copy =~ s/^($char+)/$1;/g;
267 $copy =~ s/([^0-9$char])($char+)/$1;$2;/g;
269 my @parts = split(/;/,$copy);
271 foreach $el (@parts) {
273 if ($el =~ /^$char+$/) {
274 $new.=length($el).$char;