1#!/usr/local/bin/perl 2# 3# Copyright (C) 2010 Broad Institute. 4# Copyright (C) 2011, 2014, 2017 Genome Research Ltd. 5# 6# Author: Heng Li <lh3@sanger.ac.uk> 7# 8# Permission is hereby granted, free of charge, to any person obtaining a copy 9# of this software and associated documentation files (the "Software"), to deal 10# in the Software without restriction, including without limitation the rights 11# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12# copies of the Software, and to permit persons to whom the Software is 13# furnished to do so, subject to the following conditions: 14# 15# The above copyright notice and this permission notice shall be included in 16# all copies or substantial portions of the Software. 17# 18# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24# DEALINGS IN THE SOFTWARE. 25 26use strict; 27use warnings; 28use Getopt::Std; 29 30&main; 31exit; 32 33sub main { 34 &usage if (@ARGV < 1); 35 my $command = shift(@ARGV); 36 my %func = (subsam=>\&subsam, listsam=>\&listsam, fillac=>\&fillac, qstats=>\&qstats, varFilter=>\&varFilter, 37 hapmap2vcf=>\&hapmap2vcf, ucscsnp2vcf=>\&ucscsnp2vcf, filter4vcf=>\&varFilter, ldstats=>\&ldstats, 38 gapstats=>\&gapstats, splitchr=>\&splitchr, vcf2fq=>\&vcf2fq); 39 die("Unknown command \"$command\".\n") if (!defined($func{$command})); 40 &{$func{$command}}; 41} 42 43sub splitchr { 44 my %opts = (l=>5000000); 45 getopts('l:', \%opts); 46 my $l = $opts{l}; 47 die(qq/Usage: vcfutils.pl splitchr [-l $opts{l}] <in.fa.fai>\n/) if (@ARGV == 0 && -t STDIN); 48 while (<>) { 49 my @t = split; 50 my $last = 0; 51 for (my $i = 0; $i < $t[1];) { 52 my $e = ($t[1] - $i) / $l < 1.1? $t[1] : $i + $l; 53 print "$t[0]:".($i+1)."-$e\n"; 54 $i = $e; 55 } 56 } 57} 58 59sub subsam { 60 die(qq/Usage: vcfutils.pl subsam <in.vcf> [samples]\n/) if (@ARGV == 0); 61 my ($fh, %h); 62 my $fn = shift(@ARGV); 63 my @col; 64 open($fh, ($fn =~ /\.gz$/)? "gzip -dc $fn |" : $fn) || die; 65 $h{$_} = 1 for (@ARGV); 66 while (<$fh>) { 67 if (/^##/) { 68 print; 69 } elsif (/^#/) { 70 my @t = split; 71 my @s = @t[0..8]; # all fixed fields + FORMAT 72 for (9 .. $#t) { 73 if ($h{$t[$_]}) { 74 push(@s, $t[$_]); 75 push(@col, $_); 76 } 77 } 78 pop(@s) if (@s == 9); # no sample selected; remove the FORMAT field 79 print join("\t", @s), "\n"; 80 } else { 81 my @t = split; 82 if (@col == 0) { 83 print join("\t", @t[0..7]), "\n"; 84 } else { 85 print join("\t", @t[0..8], map {$t[$_]} @col), "\n"; 86 } 87 } 88 } 89 close($fh); 90} 91 92sub listsam { 93 die(qq/Usage: vcfutils.pl listsam <in.vcf>\n/) if (@ARGV == 0 && -t STDIN); 94 while (<>) { 95 if (/^#/ && !/^##/) { 96 my @t = split; 97 print join("\n", @t[9..$#t]), "\n"; 98 exit; 99 } 100 } 101} 102 103sub fillac { 104 die(qq/Usage: vcfutils.pl fillac <in.vcf>\n\nNote: The GT field MUST BE present and always appear as the first field.\n/) if (@ARGV == 0 && -t STDIN); 105 while (<>) { 106 if (/^#/) { 107 print; 108 } else { 109 my @t = split; 110 my @c = (0, 0); 111 my $n = 0; 112 my $s = -1; 113 @_ = split(":", $t[8]); 114 for (0 .. $#_) { 115 if ($_[$_] eq 'GT') { $s = $_; last; } 116 } 117 if ($s < 0) { 118 print join("\t", @t), "\n"; 119 next; 120 } 121 for (9 .. $#t) { 122 if ($t[$_] =~ /^0,0,0/) { 123 } elsif ($t[$_] =~ /^([^\s:]+:){$s}(\d+).(\d+)/) { 124 ++$c[$2]; ++$c[$3]; 125 $n += 2; 126 } 127 } 128 my $AC = "AC=" . join("\t", @c[1..$#c]) . ";AN=$n"; 129 my $info = $t[7]; 130 $info =~ s/(;?)AC=(\d+)//; 131 $info =~ s/(;?)AN=(\d+)//; 132 if ($info eq '.') { 133 $info = $AC; 134 } else { 135 $info .= ";$AC"; 136 } 137 $t[7] = $info; 138 print join("\t", @t), "\n"; 139 } 140 } 141} 142 143sub ldstats { 144 my %opts = (t=>0.9); 145 getopts('t:', \%opts); 146 die("Usage: vcfutils.pl ldstats [-t $opts{t}] <in.vcf>\n") if (@ARGV == 0 && -t STDIN); 147 my $cutoff = $opts{t}; 148 my ($last, $lastchr) = (0x7fffffff, ''); 149 my ($x, $y, $n) = (0, 0, 0); 150 while (<>) { 151 if (/^([^#\s]+)\s(\d+)/) { 152 my ($chr, $pos) = ($1, $2); 153 if (/NEIR=([\d\.]+)/) { 154 ++$n; 155 ++$y, $x += $pos - $last if ($lastchr eq $chr && $pos > $last && $1 > $cutoff); 156 } 157 $last = $pos; $lastchr = $chr; 158 } 159 } 160 print "Number of SNP intervals in strong LD (r > $opts{t}): $y\n"; 161 print "Fraction: ", $y/$n, "\n"; 162 print "Length: $x\n"; 163} 164 165sub qstats { 166 my %opts = (r=>'', s=>0.02, v=>undef); 167 getopts('r:s:v', \%opts); 168 die("Usage: vcfutils.pl qstats [-r ref.vcf] <in.vcf>\n 169Note: This command discards indels. Output: QUAL #non-indel #SNPs #transitions #joint ts/tv #joint/#ref #joint/#non-indel \n") if (@ARGV == 0 && -t STDIN); 170 my %ts = (AG=>1, GA=>1, CT=>1, TC=>1); 171 my %h = (); 172 my $is_vcf = defined($opts{v})? 1 : 0; 173 if ($opts{r}) { # read the reference positions 174 my $fh; 175 open($fh, $opts{r}) || die; 176 while (<$fh>) { 177 next if (/^#/); 178 if ($is_vcf) { 179 my @t = split; 180 $h{$t[0],$t[1]} = $t[4]; 181 } else { 182 $h{$1,$2} = 1 if (/^(\S+)\s+(\d+)/); 183 } 184 } 185 close($fh); 186 } 187 my $hsize = scalar(keys %h); 188 my @a; 189 while (<>) { 190 next if (/^#/); 191 my @t = split; 192 next if (length($t[3]) != 1 || uc($t[3]) eq 'N'); 193 $t[3] = uc($t[3]); $t[4] = uc($t[4]); 194 my @s = split(',', $t[4]); 195 $t[5] = 3 if ($t[5] eq '.' || $t[5] < 0); 196 next if (length($s[0]) != 1); 197 my $hit; 198 if ($is_vcf) { 199 $hit = 0; 200 my $aa = $h{$t[0],$t[1]}; 201 if (defined($aa)) { 202 my @aaa = split(",", $aa); 203 for (@aaa) { 204 $hit = 1 if ($_ eq $s[0]); 205 } 206 } 207 } else { 208 $hit = defined($h{$t[0],$t[1]})? 1 : 0; 209 } 210 push(@a, [$t[5], ($t[4] eq '.' || $t[4] eq $t[3])? 0 : 1, $ts{$t[3].$s[0]}? 1 : 0, $hit]); 211 } 212 push(@a, [-1, 0, 0, 0]); # end marker 213 die("[qstats] No SNP data!\n") if (@a == 0); 214 @a = sort {$b->[0]<=>$a->[0]} @a; 215 my $next = $opts{s}; 216 my $last = $a[0]; 217 my @c = (0, 0, 0, 0); 218 my @lc; 219 $lc[1] = $lc[2] = 0; 220 for my $p (@a) { 221 if ($p->[0] == -1 || ($p->[0] != $last && $c[0]/@a > $next)) { 222 my @x; 223 $x[0] = sprintf("%.4f", $c[1]-$c[2]? $c[2] / ($c[1] - $c[2]) : 100); 224 $x[1] = sprintf("%.4f", $hsize? $c[3] / $hsize : 0); 225 $x[2] = sprintf("%.4f", $c[3] / $c[1]); 226 my $a = $c[1] - $lc[1]; 227 my $b = $c[2] - $lc[2]; 228 $x[3] = sprintf("%.4f", $a-$b? $b / ($a-$b) : 100); 229 print join("\t", $last, @c, @x), "\n"; 230 $next = $c[0]/@a + $opts{s}; 231 $lc[1] = $c[1]; $lc[2] = $c[2]; 232 } 233 ++$c[0]; $c[1] += $p->[1]; $c[2] += $p->[2]; $c[3] += $p->[3]; 234 $last = $p->[0]; 235 } 236} 237 238sub varFilter { 239 my %opts = (d=>2, D=>10000000, a=>2, W=>10, Q=>10, w=>3, p=>undef, 1=>1e-4, 2=>1e-100, 3=>0, 4=>1e-4, G=>0, S=>1000, e=>1e-4); 240 getopts('pd:D:W:Q:w:a:1:2:3:4:G:S:e:', \%opts); 241 die(qq/ 242Usage: vcfutils.pl varFilter [options] <in.vcf> 243 244Options: -Q INT minimum RMS mapping quality for SNPs [$opts{Q}] 245 -d INT minimum read depth [$opts{d}] 246 -D INT maximum read depth [$opts{D}] 247 -a INT minimum number of alternate bases [$opts{a}] 248 -w INT SNP within INT bp around a gap to be filtered [$opts{w}] 249 -W INT window size for filtering adjacent gaps [$opts{W}] 250 -1 FLOAT min P-value for strand bias (given PV4) [$opts{1}] 251 -2 FLOAT min P-value for baseQ bias [$opts{2}] 252 -3 FLOAT min P-value for mapQ bias [$opts{3}] 253 -4 FLOAT min P-value for end distance bias [$opts{4}] 254 -e FLOAT min P-value for HWE (plus F<0) [$opts{e}] 255 -p print filtered variants 256 257Note: Some of the filters rely on annotations generated by SAMtools\/BCFtools. 258\n/) if (@ARGV == 0 && -t STDIN); 259 260 # calculate the window size 261 my ($ol, $ow) = ($opts{W}, $opts{w}); 262 my $max_dist = $ol > $ow? $ol : $ow; 263 # the core loop 264 my @staging; # (indel_filtering_score, flt_tag, indel_span; chr, pos, ...) 265 while (<>) { 266 my @t = split; 267 if (/^#/) { 268 print; next; 269 } 270 next if ($t[4] eq '.'); # skip non-var sites 271 next if ($t[3] eq 'N'); # skip sites with unknown ref ('N') 272 # check if the site is a SNP 273 my $type = 1; # SNP 274 if (length($t[3]) > 1) { 275 $type = 2; # MNP 276 my @s = split(',', $t[4]); 277 for (@s) { 278 $type = 3 if (length != length($t[3])); 279 } 280 } else { 281 my @s = split(',', $t[4]); 282 for (@s) { 283 $type = 3 if (length > 1); 284 } 285 } 286 # clear the out-of-range elements 287 while (@staging) { 288 # Still on the same chromosome and the first element's window still affects this position? 289 last if ($staging[0][3] eq $t[0] && $staging[0][4] + $staging[0][2] + $max_dist >= $t[1]); 290 varFilter_aux(shift(@staging), $opts{p}); # calling a function is a bit slower, not much 291 } 292 my $flt = 0; 293 # parse annotations 294 my ($dp, $mq, $dp_alt) = (-1, -1, -1); 295 if ($t[7] =~ /DP4=(\d+),(\d+),(\d+),(\d+)/i) { 296 $dp = $1 + $2 + $3 + $4; 297 $dp_alt = $3 + $4; 298 } 299 if ($t[7] =~ /DP=(\d+)/i) { 300 $dp = $1; 301 } 302 $mq = $1 if ($t[7] =~ /MQ=(\d+)/i); 303 # the depth and mapQ filter 304 if ($dp >= 0) { 305 if ($dp < $opts{d}) { 306 $flt = 2; 307 } elsif ($dp > $opts{D}) { 308 $flt = 3; 309 } 310 } 311 $flt = 4 if ($dp_alt >= 0 && $dp_alt < $opts{a}); 312 $flt = 1 if ($flt == 0 && $mq >= 0 && $mq < $opts{Q}); 313 $flt = 7 if ($flt == 0 && /PV4=([^,]+),([^,]+),([^,]+),([^,;\t]+)/ 314 && ($1<$opts{1} || $2<$opts{2} || $3<$opts{3} || $4<$opts{4})); 315 $flt = 8 if ($flt == 0 && ((/MXGQ=(\d+)/ && $1 < $opts{G}) || (/MXSP=(\d+)/ && $1 >= $opts{S}))); 316 # HWE filter 317 if ($t[7] =~ /G3=([^;,]+),([^;,]+),([^;,]+).*HWE=([^;,]+)/ && $4 < $opts{e}) { 318 my $p = 2*$1 + $2; 319 my $f = ($p > 0 && $p < 1)? 1 - $2 / ($p * (1-$p)) : 0; 320 $flt = 9 if ($f < 0); 321 } 322 323 my $score = $t[5] * 100 + $dp_alt; 324 my $rlen = length($t[3]) - 1; # $indel_score<0 for SNPs 325 if ($flt == 0) { 326 if ($type == 3) { # an indel 327 # filtering SNPs and MNPs 328 for my $x (@staging) { 329 next if (($x->[0]&3) == 3 || $x->[1] || $x->[4] + $x->[2] + $ow < $t[1]); 330 $x->[1] = 5; 331 } 332 # check the staging list for indel filtering 333 for my $x (@staging) { 334 next if (($x->[0]&3) != 3 || $x->[1] || $x->[4] + $x->[2] + $ol < $t[1]); 335 if ($x->[0]>>2 < $score) { 336 $x->[1] = 6; 337 } else { 338 $flt = 6; last; 339 } 340 } 341 } else { # SNP or MNP 342 for my $x (@staging) { 343 next if (($x->[0]&3) != 3 || $x->[4] + $x->[2] + $ow < $t[1]); 344 if ($x->[4] + length($x->[7]) - 1 == $t[1] && substr($x->[7], -1, 1) eq substr($t[4], 0, 1) 345 && length($x->[7]) - length($x->[6]) == 1) { 346 $x->[1] = 5; 347 } else { $flt = 5; } 348 last; 349 } 350 # check MNP 351 for my $x (@staging) { 352 next if (($x->[0]&3) == 3 || $x->[4] + $x->[2] < $t[1]); 353 if ($x->[0]>>2 < $score) { 354 $x->[1] = 8; 355 } else { 356 $flt = 8; last; 357 } 358 } 359 } 360 } 361 push(@staging, [$score<<2|$type, $flt, $rlen, @t]); 362 } 363 # output the last few elements in the staging list 364 while (@staging) { 365 varFilter_aux(shift @staging, $opts{p}); 366 } 367} 368 369sub varFilter_aux { 370 my ($first, $is_print) = @_; 371 if ($first->[1] == 0) { 372 print join("\t", @$first[3 .. @$first-1]), "\n"; 373 } elsif ($is_print) { 374 print STDERR join("\t", substr("UQdDaGgPMS", $first->[1], 1), @$first[3 .. @$first-1]), "\n"; 375 } 376} 377 378sub gapstats { 379 my (@c0, @c1); 380 $c0[$_] = $c1[$_] = 0 for (0 .. 10000); 381 while (<>) { 382 next if (/^#/); 383 my @t = split; 384 next if (length($t[3]) == 1 && $t[4] =~ /^[A-Za-z](,[A-Za-z])*$/); # not an indel 385 my @s = split(',', $t[4]); 386 for my $x (@s) { 387 my $l = length($x) - length($t[3]) + 5000; 388 if ($x =~ /^-/) { 389 $l = -(length($x) - 1) + 5000; 390 } elsif ($x =~ /^\+/) { 391 $l = length($x) - 1 + 5000; 392 } 393 $c0[$l] += 1 / @s; 394 } 395 } 396 for (my $i = 0; $i < 10000; ++$i) { 397 next if ($c0[$i] == 0); 398 $c1[0] += $c0[$i]; 399 $c1[1] += $c0[$i] if (($i-5000)%3 == 0); 400 printf("C\t%d\t%.2f\n", ($i-5000), $c0[$i]); 401 } 402 printf("3\t%d\t%d\t%.3f\n", $c1[0], $c1[1], $c1[1]/$c1[0]); 403} 404 405sub ucscsnp2vcf { 406 die("Usage: vcfutils.pl <in.ucsc.snp>\n") if (@ARGV == 0 && -t STDIN); 407 print "##fileformat=VCFv4.0\n"; 408 print join("\t", "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"), "\n"; 409 while (<>) { 410 my @t = split("\t"); 411 my $indel = ($t[9] =~ /^[ACGT](\/[ACGT])+$/)? 0 : 1; 412 my $pos = $t[2] + 1; 413 my @alt; 414 push(@alt, $t[7]); 415 if ($t[6] eq '-') { 416 $t[9] = reverse($t[9]); 417 $t[9] =~ tr/ACGTRYMKWSNacgtrymkwsn/TGCAYRKMWSNtgcayrkmwsn/; 418 } 419 my @a = split("/", $t[9]); 420 for (@a) { 421 push(@alt, $_) if ($_ ne $alt[0]); 422 } 423 if ($indel) { 424 --$pos; 425 for (0 .. $#alt) { 426 $alt[$_] =~ tr/-//d; 427 $alt[$_] = "N$alt[$_]"; 428 } 429 } 430 my $ref = shift(@alt); 431 my $af = $t[13] > 0? ";AF=$t[13]" : ''; 432 my $valid = ($t[12] eq 'unknown')? '' : ";valid=$t[12]"; 433 my $info = "molType=$t[10];class=$t[11]$valid$af"; 434 print join("\t", $t[1], $pos, $t[4], $ref, join(",", @alt), 0, '.', $info), "\n"; 435 } 436} 437 438sub hapmap2vcf { 439 die("Usage: vcfutils.pl <in.ucsc.snp> <in.hapmap>\n") if (@ARGV == 0); 440 my $fn = shift(@ARGV); 441 # parse UCSC SNP 442 warn("Parsing UCSC SNPs...\n"); 443 my ($fh, %map); 444 open($fh, ($fn =~ /\.gz$/)? "gzip -dc $fn |" : $fn) || die; 445 while (<$fh>) { 446 my @t = split; 447 next if ($t[3] - $t[2] != 1); # not SNP 448 @{$map{$t[4]}} = @t[1,3,7]; 449 } 450 close($fh); 451 # write VCF 452 warn("Writing VCF...\n"); 453 print "##fileformat=VCFv4.0\n"; 454 while (<>) { 455 my @t = split; 456 if ($t[0] eq 'rs#') { # the first line 457 print join("\t", "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT", @t[11..$#t]), "\n"; 458 } else { 459 next unless ($map{$t[0]}); 460 next if (length($t[1]) != 3); # skip non-SNPs 461 my $a = \@{$map{$t[0]}}; 462 my $ref = $a->[2]; 463 my @u = split('/', $t[1]); 464 if ($u[1] eq $ref) { 465 $u[1] = $u[0]; $u[0] = $ref; 466 } elsif ($u[0] ne $ref) { next; } 467 my $alt = $u[1]; 468 my %w; 469 $w{$u[0]} = 0; $w{$u[1]} = 1; 470 my @s = (@$a[0,1], $t[0], $ref, $alt, 0, '.', '.', 'GT'); 471 my $is_tri = 0; 472 for (@t[11..$#t]) { 473 if ($_ eq 'NN') { 474 push(@s, './.'); 475 } else { 476 my @a = ($w{substr($_,0,1)}, $w{substr($_,1,1)}); 477 if (!defined($a[0]) || !defined($a[1])) { 478 $is_tri = 1; 479 last; 480 } 481 push(@s, "$a[0]/$a[1]"); 482 } 483 } 484 next if ($is_tri); 485 print join("\t", @s), "\n"; 486 } 487 } 488} 489 490sub vcf2fq { 491 my %opts = (d=>3, D=>100000, Q=>10, l=>5); 492 getopts('d:D:Q:l:', \%opts); 493 die(qq/ 494Usage: vcfutils.pl vcf2fq [options] <all-site.vcf> 495 496Options: -d INT minimum depth [$opts{d}] 497 -D INT maximum depth [$opts{D}] 498 -Q INT min RMS mapQ [$opts{Q}] 499 -l INT INDEL filtering window [$opts{l}] 500\n/) if (@ARGV == 0 && -t STDIN); 501 502 my ($last_chr, $seq, $qual, $last_pos, @gaps); 503 my $_Q = $opts{Q}; 504 my $_d = $opts{d}; 505 my $_D = $opts{D}; 506 507 my %het = (AC=>'M', AG=>'R', AT=>'W', CA=>'M', CG=>'S', CT=>'Y', 508 GA=>'R', GC=>'S', GT=>'K', TA=>'W', TC=>'Y', TG=>'K'); 509 510 $last_chr = ''; 511 while (<>) { 512 next if (/^#/); 513 my @t = split; 514 if ($last_chr ne $t[0]) { 515 &v2q_post_process($last_chr, \$seq, \$qual, \@gaps, $opts{l}) if ($last_chr); 516 ($last_chr, $last_pos) = ($t[0], 0); 517 $seq = $qual = ''; 518 @gaps = (); 519 } 520 die("[vcf2fq] unsorted input\n") if ($t[1] - $last_pos < 0); 521 if ($t[1] - $last_pos > 1) { 522 $seq .= 'n' x ($t[1] - $last_pos - 1); 523 $qual .= '!' x ($t[1] - $last_pos - 1); 524 } 525 if (length($t[3]) == 1 && $t[7] !~ /INDEL/ && $t[4] =~ /^([A-Za-z.])(,[A-Za-z])*$/) { # a SNP or reference 526 my ($ref, $alt) = ($t[3], $1); 527 my ($b, $q); 528 $q = $1 if ($t[7] =~ /FQ=(-?[\d\.]+)/); 529 if ($q < 0) { 530 $_ = ($t[7] =~ /AF1=([\d\.]+)/)? $1 : 0; 531 $b = ($_ < .5 || $alt eq '.')? $ref : $alt; 532 $q = -$q; 533 } else { 534 $b = $het{"$ref$alt"}; 535 $b ||= 'N'; 536 } 537 $b = lc($b); 538 $b = uc($b) if (($t[7] =~ /MQ=(\d+)/ && $1 >= $_Q) && ($t[7] =~ /DP=(\d+)/ && $1 >= $_d && $1 <= $_D)); 539 $q = int($q + 33 + .499); 540 $q = chr($q <= 126? $q : 126); 541 $seq .= $b; 542 $qual .= $q; 543 } elsif ($t[4] ne '.') { # an INDEL 544 push(@gaps, [$t[1], length($t[3])]); 545 } 546 $last_pos = $t[1]; 547 } 548 &v2q_post_process($last_chr, \$seq, \$qual, \@gaps, $opts{l}); 549} 550 551sub v2q_post_process { 552 my ($chr, $seq, $qual, $gaps, $l) = @_; 553 for my $g (@$gaps) { 554 my $beg = $g->[0] > $l? $g->[0] - $l : 0; 555 my $end = $g->[0] + $g->[1] + $l; 556 $end = length($$seq) if ($end > length($$seq)); 557 substr($$seq, $beg, $end - $beg) = lc(substr($$seq, $beg, $end - $beg)); 558 } 559 print "\@$chr\n"; &v2q_print_str($seq); 560 print "+\n"; &v2q_print_str($qual); 561} 562 563sub v2q_print_str { 564 my ($s) = @_; 565 my $l = length($$s); 566 for (my $i = 0; $i < $l; $i += 60) { 567 print substr($$s, $i, 60), "\n"; 568 } 569} 570 571sub usage { 572 die(qq/ 573Usage: vcfutils.pl <command> [<arguments>]\n 574Command: subsam get a subset of samples 575 listsam list the samples 576 fillac fill the allele count field 577 qstats SNP stats stratified by QUAL 578 579 hapmap2vcf convert the hapmap format to VCF 580 ucscsnp2vcf convert UCSC SNP SQL dump to VCF 581 582 varFilter filtering short variants (*) 583 vcf2fq VCF->fastq (**) 584 585Notes: Commands with description endting with (*) may need bcftools 586 specific annotations. 587\n/); 588} 589