1package Bio::Graphics::Glyph::segments; 2 3use strict; 4use Bio::Location::Simple; 5 6use constant RAGGED_START_FUZZ => 25; # will show ragged ends of alignments 7 # up to this many bp. 8use constant DEBUG => 0; 9 10# These are just offsets into an array data structure 11use constant TARGET => 0; 12use constant SRC_START => 1; 13use constant SRC_END => 2; 14use constant TGT_START => 3; 15use constant TGT_END => 4; 16 17eval { require Bio::Graphics::Browser2::Realign; 1; } || eval { require Bio::Graphics::Browser::Realign; }; 18 19use base qw(Bio::Graphics::Glyph::segmented_keyglyph Bio::Graphics::Glyph::generic); 20 21my %complement = (g=>'c',a=>'t',t=>'a',c=>'g',n=>'n', 22 G=>'C',A=>'T',T=>'A',C=>'G',N=>'N'); 23 24sub my_description { 25 return <<END; 26This glyph draws multipart genomic features as a series of rectangles connected 27by solid lines. If the feature is attached to a DNA sequence, then the glyph will 28draw the sequence when magnification is high enough to show individual base pairs. 29The glyph is also capable of drawing genomic alignments, and performing protein 30translations. 31END 32} 33sub my_options { 34 { 35 draw_target => [ 36 'boolean', 37 undef, 38 'If true, draw the dna residues of the TARGET (aligned) sequence when the', 39 'magnification level allows.', 40 'See L<Bio::Graphics::Glyph::segments/"Displaying Alignments">.' 41 ], 42 draw_protein_target => [ 43 'boolean', 44 undef, 45 'If true, draw the protein residues of the TARGET (aligned) sequence when the', 46 'magnification level allows.', 47 'See L<Bio::Graphics::Glyph::segments/"Displaying Alignments">.'], 48 ragged_extra => [ 49 'boolean', 50 undef, 51 'When combined with -draw_target, draw extra bases beyond the end', 52 'of the alignment. The value is the maximum number of extra bases.', 53 'See L<Bio::Graphics::Glyph::segments/"Displaying Alignments">.'], 54 show_mismatch => [ 55 'integer', 56 undef, 57 'When combined with -draw_target, highlights mismatched bases in', 58 'the mismatch color. A value of 0 or undef never shows mismatches.', 59 'A value of 1 shows mismatches at the base pair alignment level, but', 60 'not at magnifications too low to allow the DNA to be displayed.', 61 'Any other positive integer will show mismatches when the track is showing', 62 'a region less than or equal to the specified value.', 63 'See L<Bio::Graphics::Glyph::segments/"Displaying Alignments">.'], 64 mismatch_color => [ 65 'color', 66 'lightgrey', 67 'The color to use for mismatched bases when displaying alignments.', 68 'See L<Bio::Graphics::Glyph::segments/"Displaying Alignments">.'], 69 indel_color => [ 70 'color', 71 'lightgrey', 72 'The color to use for indels when displaying alignments.'], 73 insertion_color => [ 74 'color', 75 'green', 76 'The color to use for insertions when displaying alignments; overrides indel_color'], 77 deletion_color => [ 78 'color', 79 'red', 80 'The color to use for deletions when displaying alignments; overrides indel_color'], 81 mismatch_only => [ 82 'boolean', 83 undef, 84 'If true, only print mismatched bases when displaying alignments.'], 85 true_target => [ 86 'boolean', 87 undef, 88 'Show the target DNA in its native (plus strand) orientation, even', 89 'if the alignment is to the minus strand.', 90 'See L<Bio::Graphics::Glyph::segments/"Displaying Alignments">.'], 91 split_on_cigar => [ 92 'boolean', 93 undef, 94 'If true, and if the feature contains a CIGAR string as the value of the Gap', 95 'tag, then split the feature into subparts based on the CIGAR.'], 96 realign => [ 97 'boolean', 98 undef, 99 'Attempt to realign sequences at high magnification to account', 100 'for indels.', 101 'See L<Bio::Graphics::Glyph::segments/"Displaying Alignments">.'], 102 } 103} 104 105sub mismatch_color { 106 my $self = shift; 107 my $c = $self->option('mismatch_color') || 'lightgrey'; 108 return $self->translate_color($c); 109} 110 111sub indel_color { 112 my $self = shift; 113 my $c = $self->option('indel_color'); 114 return $self->mismatch_color unless $c; 115 return $self->translate_color($c); 116} 117 118sub insertion_color { 119 my $self = shift; 120 my $c = $self->option('insertion_color'); 121 $c ||= $self->option('indel_color'); 122 $c ||= $self->my_options->{insertion_color}[1]; 123 return $self->translate_color($c); 124} 125 126sub deletion_color { 127 my $self = shift; 128 my $c = $self->option('deletion_color'); 129 $c ||= $self->option('indel_color'); 130 $c ||= $self->my_options->{deletion_color}[1]; 131 return $self->translate_color($c); 132} 133 134sub show_mismatch { 135 my $self = shift; 136 my $smm = $self->option('show_mismatch'); 137 $smm ||= 1 if $self->option('mismatch_only'); 138 return unless $smm; 139 return 1 if $smm == 1 && $self->dna_fits; 140 return 1 if $smm >= $self->panel->length; 141} 142 143sub mismatch_only { shift->option('mismatch_only') } 144 145 146sub pad_left { 147 my $self = shift; 148 return $self->SUPER::pad_left unless $self->level > 0; 149 my $ragged = $self->option('ragged_start') 150 ? RAGGED_START_FUZZ 151 : $self->option('ragged_extra'); 152 153 return $self->SUPER::pad_left 154 unless $self->draw_target && $ragged && $self->dna_fits; 155 my $extra = 0; 156 my $target = eval {$self->feature->hit} or return $self->SUPER::pad_left + $extra; 157 return $self->SUPER::pad_left + $extra unless $target->start<$target->end && $target->start < $ragged; 158 return ($target->start-1) * $self->scale + $extra; 159} 160 161sub pad_right { 162 my $self = shift; 163 return $self->SUPER::pad_right unless $self->level > 0; 164 my $ragged = $self->option('ragged_start') 165 ? RAGGED_START_FUZZ 166 : $self->option('ragged_extra'); 167 return $self->SUPER::pad_right 168 unless $self->draw_target && $ragged && $self->dna_fits; 169 my $target = eval {$self->feature->hit} or return $self->SUPER::pad_right; 170 return $self->SUPER::pad_right unless $target->end < $target->start && $target->start < $ragged; 171 return ($target->end-1) * $self->scale; 172} 173 174sub labelwidth { 175 my $self = shift; 176 return $self->SUPER::labelwidth unless $self->draw_target && $self->dna_fits && $self->label_position eq 'left'; 177 return $self->{labelwidth} ||= (length($self->label||'')+1) * $self->mono_font->width; 178} 179sub draw_target { 180 my $self = shift; 181 return if $self->option('draw_dna'); 182 return $self->option('draw_target'); 183} 184 185sub draw_protein_target { 186 my $self = shift; 187 return if $self->option('draw_protein'); 188 return $self->option('draw_protein_target'); 189 return $self->option('draw_target'); 190} 191 192sub height { 193 my $self = shift; 194 my $height = $self->SUPER::height; 195 return $height unless $self->draw_target || $self->draw_protein_target; 196 if ($self->draw_target) { 197 return $height unless $self->dna_fits; 198 } 199 if ($self->draw_protein_target) { 200 return $height unless $self->protein_fits; 201 } 202 my $fontheight = $self->mono_font->height; 203 return $fontheight if $fontheight > $height; 204} 205 206# group sets connector to 'solid' 207sub connector { 208 my $self = shift; 209 return $self->SUPER::connector(@_) if $self->all_callbacks; 210 return ($self->SUPER::connector(@_) || 'solid'); 211} 212 213# never allow our components to bump 214sub bump { 215 my $self = shift; 216 my $bump = $self->SUPER::bump(@_); 217 return $bump if $self->all_callbacks; 218 return $self->parts_overlap ? $bump : 0; 219} 220 221sub maxdepth { 222 my $self = shift; 223 my $md = $self->Bio::Graphics::Glyph::maxdepth; 224 return $md if defined $md; 225 return 1; 226} 227 228# this was willfully confusing 229#sub fontcolor { 230# my $self = shift; 231# return $self->SUPER::fontcolor unless $self->draw_target;# || $self->option('draw_dna'); 232# return $self->SUPER::fontcolor unless $self->dna_fits; 233# return $self->bgcolor; 234#} 235 236# Override _subfeat() method to make it appear that a top-level feature that 237# has no subfeatures appears as a feature that has a single subfeature. 238# Otherwise at high mags gaps will be drawn as components rather than 239# as connectors. Because of differing representations of split features 240# in Bio::DB::GFF::Feature and Bio::SeqFeature::Generic, there is 241# some breakage of encapsulation here. 242sub _subfeat { 243 my $self = shift; 244 my $feature = shift; 245 246 my @subfeat = $self->SUPER::_subfeat($feature); 247 248 if (!@subfeat && $self->option('split_on_cigar')) { 249 my $cigar = $self->_get_cigar($feature); 250 if ($cigar && @$cigar) { 251 return $self->_split_on_cigar($feature,$cigar); 252 } 253 } 254 255 return @subfeat if @subfeat; 256 if ($self->level == 0 && !@subfeat && !$self->feature_has_subparts) { 257 return $self->feature; 258 } else { 259 return; 260 } 261} 262 263sub _split_on_cigar { 264 my $self = shift; 265 my ($feature,$cigar) = @_; 266 267 my $source_start = $feature->start; 268 my $source_end = $feature->end; 269 my $ss = $feature->strand; 270 my $ts = $feature->hit->strand; 271 272 # Render -/- alignments on the + strand 273 if ($ss == -1 && $ts == -1) { 274 $ss = 1; 275 } 276 277 my $target_start = eval {$feature->hit->start} || return $feature; 278 279 my (@parts); 280 281 for my $event (@$cigar) { 282 my ($op,$count) = @$event; 283 if ($op eq 'I' || $op eq 'S' || $op eq 'H') { 284 $target_start += $count; 285 } 286 elsif ($op eq 'D' || $op eq 'N') { 287 $source_start += $count; 288 } 289 elsif ($op eq 'P') { 290 # Do NOTHING for pads. Irrelevant for pairwise 291 # alignments, since we cannot show the pad in 292 # the reference sequence 293 } else { # everything else is assumed to be a match -- revisit 294 push @parts,[$source_start,$source_start+$count-1, 295 $target_start,$target_start+$count-1]; 296 $source_start += $count; 297 $target_start += $count; 298 } 299 } 300 301 my $id = $feature->seq_id; 302 my $tid = $feature->hit->seq_id; 303 my @result = map { 304 my ($s1,$s2,$t1,$t2) = @$_; 305 my $s = Bio::Graphics::Feature->new(-seq_id=> $id, 306 -start => $s1, 307 -end => $s2, 308 -strand => $ss, 309 ); 310 my $h = Bio::Graphics::Feature->new(-seq_id=> $tid, 311 -start => $t1, 312 -end => $t2, 313 -strand => $ts, 314 ); 315 $s->add_hit($h); 316 $s; 317 } @parts; 318 return @result; 319} 320 321sub draw { 322 my $self = shift; 323 324 my $draw_target = $self->draw_target && $self->dna_fits && eval {$self->feature->hit->seq}; 325 326 $self->SUPER::draw(@_); 327 328 return if $self->feature_has_subparts; 329 return unless $draw_target; 330 331 my $drew_sequence; 332 $drew_sequence = $self->draw_multiple_alignment(@_); 333 334 my ($gd,$x,$y) = @_; 335 $y += $self->top + $self->pad_top if $drew_sequence; # something is wrong - this is a hack/workaround 336 my $connector = $self->connector; 337 $self->draw_connectors($gd,$x,$y) 338 if $connector && $connector ne 'none' && $self->level == 0; 339} 340 341sub draw_component { 342 my $self = shift; 343 my ($gd,$left,$top,$partno,$total_parts) = @_; 344 my ($x1,$y1,$x2,$y2) = $self->bounds($left,$top); 345 346 my $draw_target; 347 my $strand = $self->feature->strand; 348 349 if ($self->draw_target && $self->dna_fits) { 350 $draw_target++; 351 my $stranded = $self->stranded; 352 my $bgcolor = $self->bgcolor; 353 if ($stranded) { 354 $x1 -= 6 if $strand < 0 && $x1 >= $self->panel->left; 355 $x2 += 6 if $strand > 0 && $x2 <= $self->panel->right; 356 $self->filled_arrow($gd,$strand,$x1,$y1,$x2,$y2) 357 } else { 358 $self->filled_box($gd,$x1,$y1,$x2,$y2,$bgcolor,$bgcolor); 359 } 360 } else { 361 $self->SUPER::draw_component(@_); 362 } 363 364 return unless $self->show_mismatch; 365 my $mismatch_color = $self->mismatch_color; 366 my $feature = $self->feature; 367 my $start = $self->feature->start; 368 my $end = $feature->end; 369 my (@mismatch_positions,@del_positions,@in_positions); 370 371 if (my ($src,$matchstr,$tgt) = eval{$feature->padded_alignment}) { 372 my @src = split '',$src; 373 my @match = split '',$matchstr; 374 my @tgt = split '',$tgt; 375 my $pos = $start; 376 377 # skip over src padding (probably soft clipped) 378 while ($src[0] eq '-') { 379 shift @src; 380 shift @tgt; 381 } 382 while ($src[-1] eq '-') { 383 pop @src; 384 pop @tgt; 385 } 386 387 for (my $i=0;$i<@src;$i++) { 388 if ($src[$i] eq '-') { 389 push @in_positions,$pos; 390 } 391 elsif ($tgt[$i] eq '-') { 392 push @del_positions,$pos; 393 $pos++; 394 } elsif ($src[$i] ne $tgt[$i]) { 395 push @mismatch_positions,$pos; 396 $pos++; 397 } else { 398 $pos++; 399 } 400 } 401 } 402 403 else { 404 my $sdna = eval {$feature->dna}; 405 my $tdna = eval {$feature->target->dna}; # works with GFF files 406 407 return unless $sdna =~ /[gatc]/i; 408 return unless $tdna =~ /[gatc]/i; 409 410 my @src = split '',$sdna; 411 my @tgt = split '',$tdna; 412 for (my $i=0;$i<@src;$i++) { 413 next if $src[$i] eq $tgt[$i]; 414 warn "$src[$i] eq $tgt[$i], strand=$strand"; 415 my $pos = $strand >= 0 ? $i+$start : $end-$i; 416 push @mismatch_positions,$pos; 417 } 418 } 419 420 my $pixels_per_base = $self->scale; 421 my $panel_right = $self->panel->right; 422 423 for my $a ([\@mismatch_positions,$self->mismatch_color], 424 [\@del_positions,$self->deletion_color], 425 [\@in_positions,$self->insertion_color,0.5,0.5] 426 ) { 427 428 my $color = $a->[1]; 429 my $offset = $a->[2]||0; 430 my $width = $a->[3]||1; 431 my @pixel_positions = $self->map_no_trunc(@{$a->[0]}); 432 433 foreach (@pixel_positions) { 434 next if $_ < $x1; 435 next if $_ > $x2; 436 next if $_ >= $panel_right; 437 my $left = $_ - $pixels_per_base*$offset; 438 my $right = $left+($width*$pixels_per_base); 439 my $top = $y1+1; 440 my $bottom= $y2-1; 441 my $middle= ($y1+$y2)/2; 442 if ($self->stranded && $left <= $x1+$pixels_per_base-1 && $self->strand < 0) { 443 $self->filled_arrow($gd,$self->strand, 444 $draw_target ? ($left-4):$left+2, 445 $top, 446 $draw_target ? $right:$right+5,$bottom,$color,$color,1); 447 } elsif ($self->stranded && $right >= $x2-$pixels_per_base+1 && $self->strand > 0) { 448 $self->filled_arrow($gd,$self->strand, 449 $left,$top,$draw_target ? ($right+4): $right-2,$bottom,$color,$color,1); 450 } else { 451 $self->filled_box($gd, 452 $left, 453 $top, 454 $right, 455 $bottom, 456 $color,$color); 457 } 458 } 459 } 460} 461 462# BUG: this horrible subroutine has grown without control and needs 463# to be broken down into manageable subrutines. 464sub draw_multiple_alignment { 465 my $self = shift; 466 my $gd = shift; 467 my ($left,$top,$partno,$total_parts) = @_; 468 469 my $flipped = $self->flip; 470 my $ragged_extra = $self->option('ragged_start') 471 ? RAGGED_START_FUZZ : $self->option('ragged_extra'); 472 my $true_target = $self->option('true_target'); 473 my $show_mismatch = $self->show_mismatch; 474 my $do_realign = $self->option('realign'); 475 476 my $pixels_per_base = $self->scale; 477 my $feature = $self->feature; 478 479 my $panel = $self->panel; 480 my ($abs_start,$abs_end) = ($feature->start,$feature->end); 481 my ($tgt_start,$tgt_end) = ($feature->hit->start,$feature->hit->end); 482 my ($panel_start,$panel_end) = ($self->panel->start,$self->panel->end); 483 my $strand = $feature->strand; 484 my $panel_left = $self->panel->left; 485 my $panel_right = $self->panel->right; 486 my $bgcolor = $self->bgcolor; 487 488 my $drew_sequence; 489 490 if ($tgt_start > $tgt_end) { #correct for data problems 491 $strand = -1; 492 ($tgt_start,$tgt_end) = ($tgt_end,$tgt_start); 493 } 494 495 warn "TGT_START..TGT_END = $tgt_start..$tgt_end" if DEBUG; 496 497 my ($bl,$bt,$br,$bb) = $self->bounds($left,$top); 498 $top = $bt; 499 500 my $stranded = $self->stranded; 501 502 my @s = $self->_subfeat($feature); 503 504 # FIX ME 505 # workaround for features in which top level feature does not have a hit but 506 # subfeatures do. There is total breakage of encapsulation here because sometimes 507 # a chado alignment places the aligned segment in the top-level feature, and sometimes 508 # in the child feature. 509 unless (@s) { # || $feature->isa('Bio::DB::GFF::Feature')) { 510 @s = ($feature); 511 } 512 513 my $can_realign; 514 if (Bio::Graphics::Browser2::Realign->can('align_segs')) { 515 $can_realign = \&Bio::Graphics::Browser2::Realign::align_segs; 516 } elsif (Bio::Graphics::Browser::Realign->can('align_segs')) { 517 $can_realign = \&Bio::Graphics::Browser::Realign::align_segs; 518 } 519 520 my (@segments,%strands); 521 my ($ref_dna,$tgt_dna); 522 523 for my $s (@s) { 524 525 my $target = $s->hit; 526 my ($src_start,$src_end) = ($s->start,$s->end); 527# next unless $src_start <= $panel_end && $src_end >= $panel_start; 528 529 my ($tgt_start,$tgt_end) = ($target->start,$target->end); 530 531 my $strand_bug; 532 unless (exists $strands{$target}) { 533 my $strand = $feature->strand; 534 if ($tgt_start > $tgt_end) { #correct for data problems 535 $strand = -1; 536 ($tgt_start,$tgt_end) = ($tgt_end,$tgt_start); 537 $strand_bug++; 538 } 539 $strands{$target} = $strand; 540 } 541 542 my $cigar = $self->_get_cigar($s); 543 if ($cigar || ($can_realign && $do_realign)) { 544 ($ref_dna,$tgt_dna) = ($s->dna,$target->dna); 545 warn "$s: ",$s->seq_id,":",$s->start,'..',$s->end if DEBUG; 546 warn "ref/tgt" if DEBUG; 547 warn "$ref_dna\n$tgt_dna" if DEBUG; 548 549 my @exact_segments; 550 551 if ($cigar) { 552 warn "Segmenting [$target,$src_start,$src_end,$tgt_start,$tgt_end] via $cigar.\n" if DEBUG; 553 @exact_segments = $self->_gapped_alignment_to_segments($cigar,$ref_dna,$tgt_dna); 554 } 555 else { 556 warn "Realigning [$target,$src_start,$src_end,$tgt_start,$tgt_end].\n" if DEBUG; 557 @exact_segments = $can_realign->($ref_dna,$tgt_dna); 558 } 559 560 foreach (@exact_segments) { 561 warn "=========> [$target,@$_]\n" if DEBUG; 562 my $a = $strands{$target} >= 0 563 ? [$target,$_->[0]+$src_start,$_->[1]+$src_start,$_->[2]+$tgt_start,$_->[3]+$tgt_start] 564 : [$target,$src_end-$_->[1],$src_end-$_->[0],$_->[2]+$tgt_start,$_->[3]+$tgt_start]; 565 warn "[$target,$_->[0]+$src_start,$_->[1]+$src_start,$tgt_end-$_->[3],$tgt_end-$_->[2]]" if DEBUG; 566 warn "=========> [@$a]\n" if DEBUG; 567 warn substr($ref_dna, $_->[0],$_->[1]-$_->[0]+1),"\n" if DEBUG; 568 warn substr($tgt_dna,$_->[2],$_->[3]-$_->[2]+1),"\n" if DEBUG; 569 push @segments,$a; 570 } 571 } 572 else { 573 push @segments,[$target,$src_start,$src_end,$tgt_start,$tgt_end]; 574 } 575 } 576 577 # get 'em in the right order so that we don't have to worry about 578 # where the beginning and end are. 579 @segments = sort {$a->[TGT_START]<=>$b->[TGT_START]} @segments; 580 581 # adjust for ragged (nonaligned) ends 582 my ($offset_left,$offset_right) = (0,0); 583 if ($ragged_extra && $ragged_extra > 0) { 584 585 # add a little rag to the left end 586 $offset_left = $segments[0]->[TGT_START] > $ragged_extra ? $ragged_extra : $segments[0]->[TGT_START]-1; 587 if ($strand >= 0) { 588 $offset_left = $segments[0]->[SRC_START]-1 if $segments[0]->[SRC_START] - $offset_left < 1; 589 $abs_start -= $offset_left; 590 $tgt_start -= $offset_left; 591 $segments[0]->[SRC_START] -= $offset_left; 592 $segments[0]->[TGT_START] -= $offset_left; 593 } else { 594 $abs_end += $offset_left; 595 $tgt_start -= $offset_left; 596 $segments[0]->[SRC_END] += $offset_left; 597 $segments[0]->[TGT_START] -= $offset_left; 598 } 599 600 # add a little rag to the right end - this is complicated because 601 # we don't know what the length of the underlying dna is, so we 602 # use the subfeat method to find out 603 my $current_end = $segments[-1]->[TGT_END]; 604 $offset_right = length $segments[-1]->[TARGET]->subseq($current_end+1,$current_end+$ragged_extra)->seq; 605 if ($strand >= 0) { 606 $abs_end += $offset_right; 607 $tgt_end += $offset_left; 608 $segments[-1]->[TGT_END] += $offset_right; 609 $segments[-1]->[SRC_END] += $offset_right; 610 } else { 611 $abs_start -= $offset_right; 612 $tgt_end += $offset_left; 613 $segments[-1]->[TGT_END] += $offset_right; 614 $segments[-1]->[SRC_START] -= $offset_right; 615 } 616 } 617 618 # get the DNAs now - a little complicated by the necessity of using 619 # the subseq() method 620 $ref_dna ||= $feature->subseq(1-$offset_left,$feature->length+$offset_right)->seq; 621 622 # this may not be right if the alignment involves only a portion of the target DNA 623 $tgt_dna ||= $feature->hit->dna; 624 625 # none of these seem to be working properly with BAM alignments 626 # my $tgt_len = abs($segments[-1]->[TGT_END] - $segments[0]->[TGT_START]) + 1; 627 # my $tgt_dna = $feature->hit->subseq(1-$offset_left,$feature->length+$offset_right)->seq; 628 # my $tgt_dna = $feature->hit->subseq(1-$offset_left,$tgt_len+$offset_right)->seq; 629 630 # work around changes in the API 631 $ref_dna = $ref_dna->seq if ref $ref_dna and $ref_dna->can('seq'); 632 $tgt_dna = $tgt_dna->seq if ref $tgt_dna and $tgt_dna->can('seq'); 633 634 $ref_dna = lc $ref_dna; 635 $tgt_dna = lc $tgt_dna; 636 637 # sanity check. Let's see if they look like they're lining up 638 warn "$feature dna sanity check:\n$ref_dna\n$tgt_dna\n" if DEBUG; 639 640 # now we're all lined up, and we're going to adjust everything to fall within the bounds 641 # of the left and right panel coordinates 642 my %clip; 643 for my $seg (@segments) { 644 645 my $target = $seg->[TARGET]; 646 warn "preclip [@$seg]\n" if DEBUG; 647 648 # left clipping 649 if ( (my $delta = $seg->[SRC_START] - $panel_start) < 0 ) { 650 warn "clip left delta = $delta" if DEBUG; 651 $seg->[SRC_START] = $panel_start; 652 if ($strand >= 0) { 653 $seg->[TGT_START] -= $delta; 654 } 655 } 656 657 # right clipping 658 if ( (my $delta = $panel_end - $seg->[SRC_END]) < 0) { 659 warn "clip right delta = $delta" if DEBUG; 660 $seg->[SRC_END] = $panel_end; 661 if ($strand < 0) { 662 $seg->[TGT_START] -= $delta; 663 } 664 } 665 666 my $length = $seg->[SRC_END]-$seg->[SRC_START]+1; 667 $seg->[TGT_END] = $seg->[TGT_START]+$length-1; 668 669 warn "Clipping gives [@$seg], tgt_start = $tgt_start\n" if DEBUG; 670 } 671 672 # remove segments that got clipped out of existence 673 # no longer doing this because it interferes with ability to 674 # detect insertions in the target 675# @segments = grep { $_->[SRC_START]<=$_->[SRC_END] } @segments; 676 677 # relativize coordinates 678 if ($strand < 0) { 679# breaks BAM, but probably needed for non-BAM features 680 $ref_dna = $self->reversec($ref_dna) unless eval { $feature->reversed } ; 681 $tgt_dna = $self->reversec($tgt_dna); 682 } 683 684 my ($red,$green,$blue) = $self->panel->rgb($bgcolor); 685 my $avg = ($red+$green+$blue)/3; 686 my $color = $self->translate_color($avg > 128 ? 'black' : 'white'); 687 my $font = $self->mono_font; 688 my $lineheight = $font->height; 689 my $fontwidth = $font->width; 690 691 my $mismatch = $self->mismatch_color; 692 my $insertion= $self->insertion_color; 693 my $deletion = $self->deletion_color; 694 my $grey = $self->translate_color('gray'); 695 my $mismatch_font_color = eval { 696 my ($r,$g,$b) = $self->panel->rgb($mismatch); 697 $self->translate_color(($r+$g+$b)>128 ? 'black' : 'white'); 698 }; 699 my $insertion_font_color = eval { 700 my ($r,$g,$b) = $self->panel->rgb($insertion); 701 $self->translate_color(($r+$g+$b)>128 ? 'black' : 'white'); 702 }; 703 my $deletion_font_color = eval { 704 my ($r,$g,$b) = $self->panel->rgb($deletion); 705 $self->translate_color(($r+$g+$b)>128 ? 'black' : 'white'); 706 }; 707 708 709 unless (@segments) { # this will happen if entire region is a target gap 710 for (my $i = $bl;$i<$br-$self->scale;$i+=$self->scale) { 711 $gd->char($font,$self->flip ? $i+$self->scale-4 : $i+2,$top,'-',$deletion_font_color); 712 } 713 return; 714 } 715 716 for my $seg (@segments) { 717 $seg->[SRC_START] -= $abs_start - 1; 718 $seg->[SRC_END] -= $abs_start - 1; 719 $seg->[TGT_START] -= $tgt_start - 1; 720 $seg->[TGT_END] -= $tgt_start - 1; 721 722 if ($strand < 0) { 723 ($seg->[TGT_START],$seg->[TGT_END]) = (length($tgt_dna)-$seg->[TGT_END]+1,length($tgt_dna)-$seg->[TGT_START]+1); 724 } 725 if (DEBUG) { 726 warn "$feature: relativized coordinates = [@$seg]\n"; 727 warn $self->_subsequence($ref_dna,$seg->[SRC_START],$seg->[SRC_END]),"\n"; 728 warn $self->_subsequence($tgt_dna,$seg->[TGT_START],$seg->[TGT_END]),"\n"; 729 } 730 } 731 732 # draw 733 my $base2pixel = 734 $self->flip ? 735 sub { 736 my ($src,$tgt) = @_; 737 my $a = $fontwidth + ($abs_start + $src-$panel_start-1 + $tgt) * $pixels_per_base - 1; 738 $panel_right - $a; 739 } 740 : sub { 741 my ($src,$tgt) = @_; 742 $fontwidth/2 + $left + ($abs_start + $src-$panel_start-1 + $tgt) * $pixels_per_base - 1; 743 }; 744 745 my $mismatch_only = $self->mismatch_only; 746 my ($tgt_last_end,$src_last_end,$leftmost,$rightmost,$gaps); 747 748 my $segment = 0; 749 750 for my $seg (sort {$a->[SRC_START]<=>$b->[SRC_START]} @segments) { 751 my $y = $top-1; 752 my $end = $seg->[SRC_END]-$seg->[SRC_START]; 753 754 for (my $i=0; $i<$end+1; $i++) { 755 my $src_base = $self->_subsequence($ref_dna,$seg->[SRC_START]+$i,$seg->[SRC_START]+$i); 756 my $tgt_base = $self->_subsequence($tgt_dna,$seg->[TGT_START]+$i,$seg->[TGT_START]+$i); 757 my $x = $base2pixel->($seg->[SRC_START],$i); 758 $leftmost = $x if !defined $leftmost || $leftmost > $x; 759 $rightmost= $x if !defined $rightmost || $rightmost < $x; 760 761 next unless $tgt_base && $x >= $panel_left && $x <= $panel_right; 762 763 my $is_mismatch = $show_mismatch && $tgt_base && $src_base ne $tgt_base && $tgt_base !~ /[nN]/; 764 $tgt_base = $complement{$tgt_base} if $true_target && $strand < 0; 765 $gd->char($font,$x,$y,$tgt_base,$tgt_base =~ /[nN]/ ? $grey 766 :$is_mismatch ? $mismatch_font_color 767 :$color) 768 unless $mismatch_only && !$is_mismatch; 769 770 $drew_sequence++; 771 } 772 773 # deal with gaps in the alignment 774 if (defined $src_last_end && (my $delta = $seg->[SRC_START] - $src_last_end) > 1) { 775 for (my $i=0;$i<$delta-1;$i++) { 776 my $x = $base2pixel->($src_last_end,$i+1); 777 next if $x > $panel_right; 778 next if $x < $panel_left; 779 $gd->char($font,$x,$y,'-',$deletion_font_color); 780 } 781 $gaps = $delta-1; 782 } 783 784 # indicate the presence of insertions in the target 785 $gaps ||= 0; 786 my $pos = $src_last_end + $gaps; 787 my $delta = $seg->[TGT_START] - $tgt_last_end; 788 my $src_delta = $seg->[SRC_START] - $src_last_end; 789 790 if ($segment && $delta && ($delta > $src_delta-$gaps)) { # an insertion in the target relative to the source 791 my $gap_left = $base2pixel->($pos+0.5,0); 792 my $gap_right = $base2pixel->($seg->[SRC_START],0); 793 ($gap_left,$gap_right) = ($gap_right+$fontwidth,$gap_left-$fontwidth) if $self->flip; 794 warn "delta=$delta, gap_left=$gap_left, gap_right=$gap_right" if DEBUG; 795 796 next if $gap_left <= $panel_left || $gap_right >= $panel_right; 797 798 my $length = $delta-1; 799 $length = 1 if $length <= 0; # workaround 800 my $gap_distance = $gap_right - $gap_left; 801 my $pixels_per_inserted_base = $gap_distance/$length; 802 803 if ($pixels_per_inserted_base >= $fontwidth) { # Squeeze the insertion in 804 for (my $i = 0; $i<$delta-1; $i++) { 805 my $x = $gap_left + $pixels_per_inserted_base * $i; 806 my $bp = $self->_subsequence($tgt_dna,$tgt_last_end+$i+1,$tgt_last_end+$i+1); 807 next if $x < $panel_left; 808 $gd->char($font,$x,$y,$bp,$color); 809 } 810 } else { #here's where we insert the insertion length 811 if ($gap_distance >= $fontwidth*length($length)) { 812 $gd->string($font,$gap_left,$y,$length,$color); 813 } 814 } 815 } 816 817 } continue { 818 $tgt_last_end = $seg->[TGT_END]; 819 $src_last_end = $seg->[SRC_END]; 820 $segment++; 821 } 822 823 return $drew_sequence; 824} 825 826sub _gapped_alignment_to_segments { 827 my $self = shift; 828 my ($cigar,$sdna,$tdna) = @_; 829 my ($pad_source,$pad_target,$pad_match); 830 warn "_gapped_alignment_to_segments\n$sdna\n$tdna" if DEBUG; 831 832 for my $event (@$cigar) { 833 my ($op,$count) = @$event; 834 warn "op=$op, count=$count" if DEBUG; 835 if ($op eq 'I') { 836 $pad_source .= '-' x $count; 837 $pad_target .= substr($tdna,0,$count,''); 838 $pad_match .= ' ' x $count; 839 } 840 elsif ($op eq 'D' || $op eq 'N') { 841 $pad_source .= substr($sdna,0,$count,''); 842 $pad_target .= '-' x $count; 843 $pad_match .= ' ' x $count; 844 } 845 elsif ($op eq 'S') { 846 $pad_source .= '-' x $count; 847 $pad_target .= substr($tdna,0,$count,''); 848 $pad_match .= ' ' x $count; 849 850 } 851 elsif ($op eq 'H' || $op eq 'P') { 852 # Nothing to do. This is simply an informational operation. 853 } else { # everything else is assumed to be a match -- revisit 854 $pad_source .= substr($sdna,0,$count,''); 855 $pad_target .= substr($tdna,0,$count,''); 856 $pad_match .= '|' x $count; 857 } 858 } 859 860 warn "pads:\n$pad_source\n$pad_match\n$pad_target" if DEBUG; 861 862 return $self->pads_to_segments($pad_source,$pad_match,$pad_target); 863} 864 865sub pads_to_segments { 866 my $self = shift; 867 my ($gap1,$align,$gap2) = @_; 868 warn "pads_to_segments" if DEBUG; 869 warn "$gap1\n$align\n$gap2\n" if DEBUG; 870 871 # create arrays that map residue positions to gap positions 872 my @maps; 873 for my $seq ($gap1,$gap2) { 874 my @seq = split '',$seq; 875 my @map; 876 my $residue = 0; 877 for (my $i=0;$i<@seq;$i++) { 878 $map[$i] = $residue; 879 $residue++ if $seq[$i] ne '-'; 880 } 881 push @maps,\@map; 882 } 883 884 my @result; 885 while ($align =~ /(\S+)/g) { 886 my $align_end = pos($align) - 1; 887 my $align_start = $align_end - length($1) + 1; 888 push @result,[@{$maps[0]}[$align_start,$align_end], 889 @{$maps[1]}[$align_start,$align_end]]; 890 } 891 return wantarray ? @result : \@result; 892} 893 894sub _get_cigar { 895 my $self = shift; 896 my $feat = shift; 897 898 # some features have this built in 899 if ($feat->can('cigar_array')) { 900 my $cigar = $feat->cigar_array; 901 @$cigar = reverse @$cigar if $feat->strand < 0; 902 return $cigar; 903 } 904 905 my ($cigar) = $feat->get_tag_values('Gap'); 906 return unless $cigar; 907 908 my @arry; 909 my $regexp = $cigar =~ /^\d+/ ? '(\d+)([A-Z])' 910 : '([A-Z])(\d+)'; 911 if ($cigar =~ /^\d+/) { 912 while ($cigar =~ /(\d+)([A-Z])/g) { 913 my ($count,$op) = ($1,$2); 914 push @arry,[$op,$count]; 915 } 916 } else { 917 while ($cigar =~ /([A-Z])(\d+)/g) { 918 my ($op,$count) = ($1,$2); 919 push @arry,[$op,$count]; 920 } 921 } 922 return \@arry; 923} 924 925sub _subsequence { 926 my $self = shift; 927 my ($seq,$start,$end,$strand) = @_; 928 my $sub; 929 if ((defined $strand && $strand < 0)) { 930 my $piece = substr($seq,length($seq)-$end,$end-$start+1); 931 $sub = $self->reversec($piece); 932 } else { 933 $sub = substr($seq,$start-1,$end-$start+1); 934 } 935 return $self->flip ? $complement{$sub} : $sub; 936} 937 938# draw the classic "i-beam" icon to indicate that an insertion fits between 939# two bases 940# sub _draw_insertion_point { 941# my $self = shift; 942# my ($gd,$x,$y,$color) = @_; 943# my $top = $y; 944# $x--; 945# my $bottom = $y + $self->font->height - 4; 946# $gd->line($x,$top+2, $x,$bottom-2,$color); 947# $gd->setPixel($x+1, $top+1,$color); 948# $gd->setPixel($x+$_, $top,$color) for (2..3); 949# $gd->setPixel($x-1, $top+1,$color); 950# $gd->setPixel($x-$_, $top,$color) for (2..3); 951 952# $gd->setPixel($x+1, $bottom-1,$color); 953# $gd->setPixel($x+$_, $bottom, $color) for (2..3); 954# $gd->setPixel($x-1, $bottom-1,$color); 955# $gd->setPixel($x-$_, $bottom, $color) for (2..3); 956# } 957 958# don't like that -- try drawing carets 959sub _draw_insertion_point { 960 my $self = shift; 961 my ($gd,$left,$right,$top,$bottom,$color) = @_; 962 963 my $poly = GD::Polygon->new(); 964 $poly->addPt($left-3,$top+1); 965 $poly->addPt($right+2,$top+1); 966 $poly->addPt(($left+$right)/2-1,$top+3); 967 $gd->filledPolygon($poly,$color); 968 969 $poly = GD::Polygon->new(); 970 $poly->addPt($left-3,$bottom); 971 $poly->addPt($right+2,$bottom); 972 $poly->addPt(($left+$right)/2-1,$bottom-2); 973 $gd->filledPolygon($poly,$color); 974} 975 9761; 977 978__END__ 979 980=head1 NAME 981 982Bio::Graphics::Glyph::segments - The "segments" glyph 983 984=head1 SYNOPSIS 985 986 See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>. 987 988=head1 DESCRIPTION 989 990This glyph is used for drawing features that consist of discontinuous 991segments. Unlike "graded_segments" or "alignment", the segments are a 992uniform color and not dependent on the score of the segment. 993 994=head2 METHODS 995 996This module overrides the maxdepth() method to return 1 unless 997explicitly specified by the -maxdepth option. This means that modules 998inheriting from segments will only be presented with one level of 999subfeatures. Override the maxdepth() method to get more levels. 1000 1001=head2 OPTIONS 1002 1003The following options are standard among all Glyphs. See 1004L<Bio::Graphics::Glyph> for a full explanation. 1005 1006 Option Description Default 1007 ------ ----------- ------- 1008 1009 -fgcolor Foreground color black 1010 1011 -outlinecolor Synonym for -fgcolor 1012 1013 -bgcolor Background color turquoise 1014 1015 -fillcolor Synonym for -bgcolor 1016 1017 -linewidth Line width 1 1018 1019 -height Height of glyph 10 1020 1021 -font Glyph font gdSmallFont 1022 1023 -connector Connector type 0 (false) 1024 1025 -connector_color 1026 Connector color black 1027 1028 -label Whether to draw a label 0 (false) 1029 1030 -description Whether to draw a description 0 (false) 1031 1032 -strand_arrow Whether to indicate 0 (false) 1033 strandedness 1034 1035 -hilite Highlight color undef (no color) 1036 1037In addition, the following glyph-specific options are recognized: 1038 1039 -draw_dna If true, draw the dna residues 0 (false) 1040 when magnification level 1041 allows. 1042 1043 -draw_target If true, draw the dna residues 0 (false) 1044 of the TARGET sequence when 1045 magnification level allows. 1046 See "Displaying Alignments". 1047 1048 -draw_protein_target If true, draw the protein residues 0 (false) 1049 of the TARGET sequence when 1050 magnification level allows. 1051 See "Displaying Alignments". 1052 1053 -ragged_extra When combined with -draw_target, 0 (false) 1054 draw extra bases beyond the end 1055 of the alignment. The value is 1056 the maximum number of extra 1057 bases. 1058 See "Displaying Alignments". 1059 1060 -ragged_start Deprecated option. Use 1061 -ragged_extra instead 1062 1063 -show_mismatch When combined with -draw_target, 0 (false) 1064 highlights mismatched bases in 1065 the mismatch color. 1066 Can be 0 (don't display); 1067 1 (display when the DNA fits); 1068 or another positive integer 1069 (display when the region in 1070 view is <= this value). 1071 See "Displaying Alignments". 1072 1073 -mismatch_only When combined with -draw_target, 0 (false) 1074 draws only the mismatched bases 1075 in the alignment. Implies 1076 -show_mismatch. 1077 See "Displaying Alignments". 1078 1079 -mismatch_color The mismatch color to use 'lightgrey' 1080 1081 -insertion_color The color to use for insertions 'green' 1082 relative to the reference. 1083 1084 -deletion_color The color to use for deletions 'red' 1085 relative to the reference. 1086 1087 -indel_color The color to use for indels, used 'lightgrey' 1088 only if -insertion_color or 1089 -deletion_color are absent 1090 1091 -true_target Show the target DNA in its native 0 (false) 1092 (plus strand) orientation, even if 1093 the alignment is to the minus strand. 1094 See "Displaying Alignments". 1095 1096 -realign Attempt to realign sequences at 0 (false) 1097 high mag to account for indels. 1098 See "Displaying Alignments". 1099 1100If the -draw_dna flag is set to a true value, then when the 1101magnification is high enough, the underlying DNA sequence will be 1102shown. This option is mutually exclusive with -draw_target. See 1103Bio::Graphics::Glyph::generic for more details. 1104 1105The -draw_target, -ragged_extra, and -show_mismatch options only work 1106with seqfeatures that implement the hit() method 1107(Bio::SeqFeature::SimilarityPair). -draw_target will cause the DNA of 1108the hit sequence to be displayed when the magnification is high enough 1109to allow individual bases to be drawn. The -ragged_extra option will 1110cause the alignment to be extended at the extreme ends by the 1111indicated number of bases, and is useful for looking for polyAs and 1112cloning sites at the ends of ESTs and cDNAs. -show_mismatch will cause 1113mismatched bases to be highlighted in with the color indicated by 1114-mismatch_color. A -show_mismatch value of "1" will highlight mismatches 1115only when the base pairs are displayed. A positive integer will cause 1116mismatches to be shown whenever the region in view is less than or equal 1117to the requested value. 1118 1119At high magnifications, minus strand matches will automatically be 1120shown as their reverse complement (so that the match has the same 1121sequence as the plus strand of the source dna). If you prefer to see 1122the actual sequence of the target as it appears on the minus strand, 1123then set -true_target to true. 1124 1125Note that -true_target has the opposite meaning from 1126-canonical_strand, which is used in conjunction with -draw_dna to draw 1127minus strand features as if they appear on the plus strand. 1128 1129=head2 Displaying Alignments 1130 1131When the B<-draw_target> option is true, this glyph can be used to 1132display nucleotide alignments such as BLAST, FASTA or BLAT 1133similarities. At high magnification, this glyph will attempt to show 1134how the sequence of the source (query) DNA matches the sequence of the 1135target (the hit). For this to work, the feature must implement the 1136hit() method, and both the source and the target DNA must be 1137available. If you pass the glyph a series of 1138Bio::SeqFeature::SimilarityPair objects, then these criteria will be 1139satisified. 1140 1141Without additional help, this glyph cannot display gapped alignments 1142correctly. To display gapped alignments, you can use the 1143Bio::Graphics::Brower::Realign module, which is part of the Generic 1144Genome Browser package (http://www.gmod.org). If you wish to install 1145the Realign module and not the rest of the package, here is the 1146recipe: 1147 1148 cd Generic-Genome-Browser-1.XX 1149 perl Makefile.PL DO_XS=1 1150 make 1151 make install_site 1152 1153If possible, build the gbrowse package with the DO_XS=1 option. This 1154compiles a C-based DP algorithm that both gbrowse and gbrowse_details 1155will use if they can. If DO_XS is not set, then the scripts will use 1156a Perl-based version of the algorithm that is 10-100 times slower. 1157 1158The display of alignments can be tweaked using the -ragged_extra, 1159-show_mismatch, -true_target, and -realign options. See the options 1160section for further details. 1161 1162There is also a B<-draw_protein_target> option, which is designed for 1163protein to nucleotide alignments. It draws the target sequence every 1164third base pair and is supposed to align correctly with the forward 1165and reverse translation glyphs. This option is experimental at the 1166moment, and may not work correctly, to use with care. 1167 1168=head1 BUGS 1169 1170Please report them. 1171 1172=head1 SEE ALSO 1173 1174 1175L<Bio::Graphics::Panel>, 1176L<Bio::Graphics::Glyph>, 1177L<Bio::Graphics::Glyph::arrow>, 1178L<Bio::Graphics::Glyph::cds>, 1179L<Bio::Graphics::Glyph::crossbox>, 1180L<Bio::Graphics::Glyph::diamond>, 1181L<Bio::Graphics::Glyph::dna>, 1182L<Bio::Graphics::Glyph::dot>, 1183L<Bio::Graphics::Glyph::ellipse>, 1184L<Bio::Graphics::Glyph::extending_arrow>, 1185L<Bio::Graphics::Glyph::generic>, 1186L<Bio::Graphics::Glyph::graded_segments>, 1187L<Bio::Graphics::Glyph::heterogeneous_segments>, 1188L<Bio::Graphics::Glyph::line>, 1189L<Bio::Graphics::Glyph::pinsertion>, 1190L<Bio::Graphics::Glyph::primers>, 1191L<Bio::Graphics::Glyph::rndrect>, 1192L<Bio::Graphics::Glyph::segments>, 1193L<Bio::Graphics::Glyph::ruler_arrow>, 1194L<Bio::Graphics::Glyph::toomany>, 1195L<Bio::Graphics::Glyph::transcript>, 1196L<Bio::Graphics::Glyph::transcript2>, 1197L<Bio::Graphics::Glyph::translation>, 1198L<Bio::Graphics::Glyph::triangle>, 1199L<Bio::DB::GFF>, 1200L<Bio::SeqI>, 1201L<Bio::SeqFeatureI>, 1202L<Bio::Das>, 1203L<GD> 1204 1205=head1 AUTHOR 1206 1207Lincoln Stein E<lt>lstein@cshl.orgE<gt> 1208 1209Copyright (c) 2001 Cold Spring Harbor Laboratory 1210 1211This library is free software; you can redistribute it and/or modify 1212it under the same terms as Perl itself. See DISCLAIMER.txt for 1213disclaimers of warranty. 1214 1215=cut 1216