1#!/usr/bin/perl -w 2 3# Contact: lh3 4# Version: 0.1.1 5 6use strict; 7use warnings; 8use Getopt::Std; 9 10&soap2sam; 11exit; 12 13sub mating { 14 my ($s1, $s2) = @_; 15 my $isize = 0; 16 if ($s1->[2] ne '*' && $s1->[2] eq $s2->[2]) { # then calculate $isize 17 my $x1 = ($s1->[1] & 0x10)? $s1->[3] + length($s1->[9]) : $s1->[3]; 18 my $x2 = ($s2->[1] & 0x10)? $s2->[3] + length($s2->[9]) : $s2->[3]; 19 $isize = $x2 - $x1; 20 } 21 # update mate coordinate 22 if ($s2->[2] ne '*') { 23 @$s1[6..8] = (($s2->[2] eq $s1->[2])? "=" : $s2->[2], $s2->[3], $isize); 24 $s1->[1] |= 0x20 if ($s2->[1] & 0x10); 25 } else { 26 $s1->[1] |= 0x8; 27 } 28 if ($s1->[2] ne '*') { 29 @$s2[6..8] = (($s1->[2] eq $s2->[2])? "=" : $s1->[2], $s1->[3], -$isize); 30 $s2->[1] |= 0x20 if ($s1->[1] & 0x10); 31 } else { 32 $s2->[1] |= 0x8; 33 } 34} 35 36sub soap2sam { 37 my %opts = (); 38 getopts("p", \%opts); 39 die("Usage: soap2sam.pl [-p] <aln.soap>\n") if (@ARGV == 0 && -t STDIN); 40 my $is_paired = defined($opts{p}); 41 # core loop 42 my @s1 = (); 43 my @s2 = (); 44 my ($s_last, $s_curr) = (\@s1, \@s2); 45 while (<>) { 46 s/[\177-\377]|[\000-\010]|[\012-\040]//g; 47 next if (&soap2sam_aux($_, $s_curr, $is_paired) < 0); 48 if (@$s_last != 0 && $s_last->[0] eq $s_curr->[0]) { 49 &mating($s_last, $s_curr); 50 print join("\t", @$s_last), "\n"; 51 print join("\t", @$s_curr), "\n"; 52 @$s_last = (); @$s_curr = (); 53 } else { 54 print join("\t", @$s_last), "\n" if (@$s_last != 0); 55 my $s = $s_last; $s_last = $s_curr; $s_curr = $s; 56 } 57 } 58 print join("\t", @$s_last), "\n" if (@$s_last != 0); 59} 60 61sub soap2sam_aux { 62 my ($line, $s, $is_paired) = @_; 63 chomp($line); 64 my @t = split(/\s+/, $line); 65 return -1 if (@t < 9 || $line =~ /^\s/ || !$t[0]); 66 @$s = (); 67 # fix SOAP-2.1.x bugs 68 @t = @t[0..2,4..$#t] unless ($t[3] =~ /^\d+$/); 69 # read name 70 $s->[0] = $t[0]; 71 $s->[0] =~ s/\/[12]$//g; 72 # initial flag (will be updated later) 73 $s->[1] = 0; 74 $s->[1] |= 1 | 1<<($t[4] eq 'a'? 6 : 7); 75 $s->[1] |= 2 if ($is_paired); 76 # read & quality 77 $s->[9] = $t[1]; 78 $s->[10] = (length($t[2]) > length($t[1]))? substr($t[2], 0, length($t[1])) : $t[2]; 79 # cigar 80 $s->[5] = length($s->[9]) . "M"; 81 # coor 82 $s->[2] = $t[7]; $s->[3] = $t[8]; 83 $s->[1] |= 0x10 if ($t[6] eq '-'); 84 # mapQ 85 $s->[4] = $t[3] == 1? 30 : 0; 86 # mate coordinate 87 $s->[6] = '*'; $s->[7] = $s->[8] = 0; 88 # aux 89 push(@$s, "NM:i:$t[9]"); 90 my $md = ''; 91 if ($t[9]) { 92 my @x; 93 for (10 .. $#t) { 94 push(@x, sprintf("%.3d,$1", $2)) if ($t[$_] =~ /^([ACGT])->(\d+)/i); 95 } 96 @x = sort(@x); 97 my $a = 0; 98 for (@x) { 99 my ($y, $z) = split(","); 100 $md .= (int($y)-$a) . $z; 101 $a += $y - $a + 1; 102 } 103 $md .= length($t[1]) - $a; 104 } else { 105 $md = length($t[1]); 106 } 107 push(@$s, "MD:Z:$md"); 108 return 0; 109} 110