1package Bio::Graphics::Glyph::segments;
2
3use strict;
4use Bio::Location::Simple;
5
6use constant RAGGED_START_FUZZ => 25;  # will show ragged ends of alignments
7                                       # up to this many bp.
8use constant DEBUG => 0;
9
10# These are just offsets into an array data structure
11use constant TARGET    => 0;
12use constant SRC_START => 1;
13use constant SRC_END   => 2;
14use constant TGT_START => 3;
15use constant TGT_END   => 4;
16
17eval { require Bio::Graphics::Browser2::Realign; 1; } || eval { require Bio::Graphics::Browser::Realign; };
18
19use base qw(Bio::Graphics::Glyph::segmented_keyglyph Bio::Graphics::Glyph::generic);
20
21my %complement = (g=>'c',a=>'t',t=>'a',c=>'g',n=>'n',
22		  G=>'C',A=>'T',T=>'A',C=>'G',N=>'N');
23
24sub my_description {
25    return <<END;
26This glyph draws multipart genomic features as a series of rectangles connected
27by solid lines. If the feature is attached to a DNA sequence, then the glyph will
28draw the sequence when magnification is high enough to show individual base pairs.
29The glyph is also capable of drawing genomic alignments, and performing protein
30translations.
31END
32}
33sub my_options {
34    {
35	draw_target => [
36	    'boolean',
37	    undef,
38	    'If true, draw the dna residues of the TARGET (aligned) sequence when the',
39	    'magnification level allows.',
40	    'See L<Bio::Graphics::Glyph::segments/"Displaying Alignments">.'
41	    ],
42	 draw_protein_target => [
43	     'boolean',
44	     undef,
45	     'If true, draw the protein residues of the TARGET (aligned) sequence when the',
46	     'magnification level allows.',
47	     'See L<Bio::Graphics::Glyph::segments/"Displaying Alignments">.'],
48	  ragged_extra => [
49	      'boolean',
50	      undef,
51	      'When combined with -draw_target, draw extra bases beyond the end',
52	      'of the alignment. The value is the maximum number of extra bases.',
53	      'See L<Bio::Graphics::Glyph::segments/"Displaying Alignments">.'],
54	  show_mismatch => [
55	      'integer',
56	      undef,
57	      'When combined with -draw_target, highlights mismatched bases in',
58	      'the mismatch color. A value of 0 or undef never shows mismatches.',
59	      'A value of 1 shows mismatches at the base pair alignment level, but',
60	      'not at magnifications too low to allow the DNA to be displayed.',
61	      'Any other positive integer will show mismatches when the track is showing',
62	      'a region less than or equal to the specified value.',
63	      'See L<Bio::Graphics::Glyph::segments/"Displaying Alignments">.'],
64	  mismatch_color => [
65	      'color',
66	      'lightgrey',
67	      'The color to use for mismatched bases when displaying alignments.',
68	      'See L<Bio::Graphics::Glyph::segments/"Displaying Alignments">.'],
69	  indel_color => [
70	      'color',
71	      'lightgrey',
72	      'The color to use for indels when displaying alignments.'],
73	  insertion_color => [
74	      'color',
75	      'green',
76	      'The color to use for insertions when displaying alignments; overrides indel_color'],
77	  deletion_color => [
78	      'color',
79	      'red',
80	      'The color to use for deletions when displaying alignments; overrides indel_color'],
81	  mismatch_only => [
82	      'boolean',
83	      undef,
84	      'If true, only print mismatched bases when displaying alignments.'],
85	  true_target => [
86	      'boolean',
87	      undef,
88	      'Show the target DNA in its native (plus strand) orientation, even',
89	      'if the alignment is to the minus strand.',
90	      'See L<Bio::Graphics::Glyph::segments/"Displaying Alignments">.'],
91	  split_on_cigar => [
92	      'boolean',
93	      undef,
94	      'If true, and if the feature contains a CIGAR string as the value of the Gap',
95	      'tag, then split the feature into subparts based on the CIGAR.'],
96	  realign => [
97	      'boolean',
98	      undef,
99	      'Attempt to realign sequences at high magnification to account',
100	      'for indels.',
101	      'See L<Bio::Graphics::Glyph::segments/"Displaying Alignments">.'],
102    }
103}
104
105sub mismatch_color {
106    my $self = shift;
107    my $c    = $self->option('mismatch_color') || 'lightgrey';
108    return $self->translate_color($c);
109}
110
111sub indel_color {
112    my $self = shift;
113    my $c    = $self->option('indel_color');
114    return $self->mismatch_color unless $c;
115    return $self->translate_color($c);
116}
117
118sub insertion_color {
119    my $self = shift;
120    my $c    = $self->option('insertion_color');
121    $c     ||= $self->option('indel_color');
122    $c     ||= $self->my_options->{insertion_color}[1];
123    return $self->translate_color($c);
124}
125
126sub deletion_color {
127    my $self = shift;
128    my $c    = $self->option('deletion_color');
129    $c     ||= $self->option('indel_color');
130    $c     ||= $self->my_options->{deletion_color}[1];
131    return $self->translate_color($c);
132}
133
134sub show_mismatch {
135    my $self = shift;
136    my $smm = $self->option('show_mismatch');
137    $smm  ||= 1 if $self->option('mismatch_only');
138    return unless $smm;
139    return 1 if $smm == 1 && $self->dna_fits;
140    return 1 if $smm >= $self->panel->length;
141}
142
143sub mismatch_only { shift->option('mismatch_only') }
144
145
146sub pad_left {
147  my $self = shift;
148  return $self->SUPER::pad_left unless $self->level > 0;
149  my $ragged = $self->option('ragged_start')
150    ? RAGGED_START_FUZZ
151    : $self->option('ragged_extra');
152
153  return $self->SUPER::pad_left
154    unless $self->draw_target && $ragged && $self->dna_fits;
155  my $extra = 0;
156  my $target = eval {$self->feature->hit} or return $self->SUPER::pad_left + $extra;
157  return $self->SUPER::pad_left + $extra unless $target->start<$target->end && $target->start < $ragged;
158  return ($target->start-1) * $self->scale + $extra;
159}
160
161sub pad_right {
162  my $self = shift;
163  return $self->SUPER::pad_right unless $self->level > 0;
164  my $ragged = $self->option('ragged_start')
165    ? RAGGED_START_FUZZ
166    : $self->option('ragged_extra');
167  return $self->SUPER::pad_right
168    unless $self->draw_target && $ragged && $self->dna_fits;
169  my $target = eval {$self->feature->hit} or return $self->SUPER::pad_right;
170  return $self->SUPER::pad_right unless $target->end < $target->start && $target->start < $ragged;
171  return ($target->end-1) * $self->scale;
172}
173
174sub labelwidth {
175  my $self = shift;
176  return $self->SUPER::labelwidth unless $self->draw_target && $self->dna_fits && $self->label_position eq 'left';
177  return $self->{labelwidth} ||= (length($self->label||'')+1) * $self->mono_font->width;
178}
179sub draw_target {
180  my $self = shift;
181  return if $self->option('draw_dna');
182  return $self->option('draw_target');
183}
184
185sub draw_protein_target {
186  my $self = shift;
187  return if $self->option('draw_protein');
188  return $self->option('draw_protein_target');
189  return $self->option('draw_target');
190}
191
192sub height {
193  my $self = shift;
194  my $height = $self->SUPER::height;
195  return $height unless $self->draw_target || $self->draw_protein_target;
196  if ($self->draw_target) {
197    return $height unless $self->dna_fits;
198  }
199  if ($self->draw_protein_target) {
200    return $height unless $self->protein_fits;
201  }
202  my $fontheight = $self->mono_font->height;
203  return $fontheight if $fontheight > $height;
204}
205
206# group sets connector to 'solid'
207sub connector {
208  my $self = shift;
209  return $self->SUPER::connector(@_) if $self->all_callbacks;
210  return ($self->SUPER::connector(@_) || 'solid');
211}
212
213# never allow our components to bump
214sub bump {
215  my $self = shift;
216  my $bump = $self->SUPER::bump(@_);
217  return $bump if $self->all_callbacks;
218  return $self->parts_overlap ? $bump : 0;
219}
220
221sub maxdepth {
222  my $self = shift;
223  my $md   = $self->Bio::Graphics::Glyph::maxdepth;
224  return $md if defined $md;
225  return 1;
226}
227
228# this was willfully confusing
229#sub fontcolor {
230#  my $self = shift;
231#  return $self->SUPER::fontcolor unless $self->draw_target;# || $self->option('draw_dna');
232#  return $self->SUPER::fontcolor unless $self->dna_fits;
233#  return $self->bgcolor;
234#}
235
236# Override _subfeat() method to make it appear that a top-level feature that
237# has no subfeatures appears as a feature that has a single subfeature.
238# Otherwise at high mags gaps will be drawn as components rather than
239# as connectors.  Because of differing representations of split features
240# in Bio::DB::GFF::Feature and Bio::SeqFeature::Generic, there is
241# some breakage of encapsulation here.
242sub _subfeat {
243    my $self    = shift;
244    my $feature = shift;
245
246    my @subfeat = $self->SUPER::_subfeat($feature);
247
248    if (!@subfeat && $self->option('split_on_cigar')) {
249	my $cigar   = $self->_get_cigar($feature);
250	if ($cigar && @$cigar) {
251	    return $self->_split_on_cigar($feature,$cigar);
252	}
253    }
254
255    return @subfeat if @subfeat;
256    if ($self->level == 0 && !@subfeat && !$self->feature_has_subparts) {
257	return $self->feature;
258    } else {
259	return;
260    }
261}
262
263sub _split_on_cigar {
264    my $self = shift;
265    my ($feature,$cigar) = @_;
266
267    my $source_start = $feature->start;
268    my $source_end   = $feature->end;
269    my $ss  = $feature->strand;
270    my $ts  = $feature->hit->strand;
271
272    # Render -/- alignments on the + strand
273    if ($ss == -1 && $ts == -1) {
274        $ss = 1;
275    }
276
277    my $target_start = eval {$feature->hit->start} || return $feature;
278
279    my (@parts);
280
281	for my $event (@$cigar) {
282	    my ($op,$count) = @$event;
283	    if ($op eq 'I' || $op eq 'S' || $op eq 'H') {
284		$target_start += $count;
285	    }
286	    elsif ($op eq 'D' || $op eq 'N') {
287		$source_start += $count;
288	    }
289	    elsif ($op eq 'P') {
290		# Do NOTHING for pads. Irrelevant for pairwise
291		# alignments, since we cannot show the pad in
292		# the reference sequence
293	    } else {  # everything else is assumed to be a match -- revisit
294		push @parts,[$source_start,$source_start+$count-1,
295			     $target_start,$target_start+$count-1];
296		$source_start += $count;
297		$target_start += $count;
298	    }
299	}
300
301    my $id  = $feature->seq_id;
302    my $tid = $feature->hit->seq_id;
303    my @result = map {
304	my ($s1,$s2,$t1,$t2) = @$_;
305	my $s = Bio::Graphics::Feature->new(-seq_id=> $id,
306					    -start => $s1,
307					    -end   => $s2,
308					    -strand => $ss,
309	    );
310	my $h = Bio::Graphics::Feature->new(-seq_id=> $tid,
311					    -start => $t1,
312					    -end   => $t2,
313					    -strand => $ts,
314	    );
315	$s->add_hit($h);
316	$s;
317    } @parts;
318    return @result;
319}
320
321sub draw {
322  my $self = shift;
323
324  my $draw_target         = $self->draw_target && $self->dna_fits && eval {$self->feature->hit->seq};
325
326  $self->SUPER::draw(@_);
327
328  return if $self->feature_has_subparts;
329  return unless $draw_target;
330
331  my $drew_sequence;
332  $drew_sequence = $self->draw_multiple_alignment(@_);
333
334  my ($gd,$x,$y) = @_;
335  $y  += $self->top + $self->pad_top if $drew_sequence;  # something is wrong - this is a hack/workaround
336  my $connector     =  $self->connector;
337  $self->draw_connectors($gd,$x,$y)
338    if $connector && $connector ne 'none' && $self->level == 0;
339}
340
341sub draw_component {
342    my $self = shift;
343    my ($gd,$left,$top,$partno,$total_parts) = @_;
344    my ($x1,$y1,$x2,$y2) = $self->bounds($left,$top);
345
346    my $draw_target;
347    my $strand   = $self->feature->strand;
348
349    if ($self->draw_target && $self->dna_fits) {
350	$draw_target++;
351	my $stranded = $self->stranded;
352	my $bgcolor  = $self->bgcolor;
353	if ($stranded) {
354	    $x1 -= 6 if $strand < 0 && $x1 >= $self->panel->left;
355	    $x2 += 6 if $strand > 0 && $x2 <= $self->panel->right;
356	    $self->filled_arrow($gd,$strand,$x1,$y1,$x2,$y2)
357	} else {
358	    $self->filled_box($gd,$x1,$y1,$x2,$y2,$bgcolor,$bgcolor);
359	}
360    } else {
361	$self->SUPER::draw_component(@_);
362    }
363
364    return unless $self->show_mismatch;
365    my $mismatch_color = $self->mismatch_color;
366    my $feature = $self->feature;
367    my $start   = $self->feature->start;
368    my $end     = $feature->end;
369    my (@mismatch_positions,@del_positions,@in_positions);
370
371    if (my ($src,$matchstr,$tgt) = eval{$feature->padded_alignment}) {
372	my @src   = split '',$src;
373	my @match = split '',$matchstr;
374	my @tgt   = split '',$tgt;
375	my $pos   = $start;
376
377	# skip over src padding (probably soft clipped)
378	while ($src[0] eq '-') {
379	    shift @src;
380	    shift @tgt;
381	}
382	while ($src[-1] eq '-') {
383	    pop @src;
384	    pop @tgt;
385	}
386
387	for (my $i=0;$i<@src;$i++) {
388	    if ($src[$i] eq '-') {
389		push @in_positions,$pos;
390	    }
391	    elsif ($tgt[$i] eq '-') {
392		push @del_positions,$pos;
393		$pos++;
394	    } elsif ($src[$i] ne $tgt[$i]) {
395		push @mismatch_positions,$pos;
396		$pos++;
397	    } else {
398		$pos++;
399	    }
400	}
401    }
402
403    else {
404	my $sdna = eval {$feature->dna};
405	my $tdna = eval {$feature->target->dna};  # works with GFF files
406
407	return unless $sdna =~ /[gatc]/i;
408 	return unless $tdna =~ /[gatc]/i;
409
410	my @src = split '',$sdna;
411	my @tgt = split '',$tdna;
412	for (my $i=0;$i<@src;$i++) {
413	    next if $src[$i] eq $tgt[$i];
414	    warn "$src[$i] eq $tgt[$i], strand=$strand";
415	    my $pos = $strand >= 0 ? $i+$start : $end-$i;
416	    push @mismatch_positions,$pos;
417	}
418    }
419
420    my $pixels_per_base      = $self->scale;
421    my $panel_right          = $self->panel->right;
422
423    for my $a ([\@mismatch_positions,$self->mismatch_color],
424	       [\@del_positions,$self->deletion_color],
425	       [\@in_positions,$self->insertion_color,0.5,0.5]
426	) {
427
428	my $color            = $a->[1];
429	my $offset           = $a->[2]||0;
430	my $width            = $a->[3]||1;
431	my @pixel_positions = $self->map_no_trunc(@{$a->[0]});
432
433	foreach (@pixel_positions) {
434	    next if $_ < $x1;
435	    next if $_ > $x2;
436	    next if $_ >= $panel_right;
437	    my $left  = $_ - $pixels_per_base*$offset;
438	    my $right = $left+($width*$pixels_per_base);
439	    my $top   = $y1+1;
440	    my $bottom= $y2-1;
441	    my $middle= ($y1+$y2)/2;
442	    if ($self->stranded && $left <= $x1+$pixels_per_base-1 && $self->strand < 0) {
443		$self->filled_arrow($gd,$self->strand,
444				    $draw_target ? ($left-4):$left+2,
445				    $top,
446				    $draw_target ? $right:$right+5,$bottom,$color,$color,1);
447	    } elsif ($self->stranded && $right >= $x2-$pixels_per_base+1 && $self->strand > 0) {
448		$self->filled_arrow($gd,$self->strand,
449				    $left,$top,$draw_target ? ($right+4): $right-2,$bottom,$color,$color,1);
450	    } else {
451		$self->filled_box($gd,
452				  $left,
453				  $top,
454				  $right,
455				  $bottom,
456				  $color,$color);
457	    }
458	}
459    }
460}
461
462# BUG: this horrible subroutine has grown without control and needs
463# to be broken down into manageable subrutines.
464sub draw_multiple_alignment {
465  my $self = shift;
466  my $gd   = shift;
467  my ($left,$top,$partno,$total_parts) = @_;
468
469  my $flipped              = $self->flip;
470  my $ragged_extra         = $self->option('ragged_start')
471                               ? RAGGED_START_FUZZ : $self->option('ragged_extra');
472  my $true_target          = $self->option('true_target');
473  my $show_mismatch        = $self->show_mismatch;
474  my $do_realign           = $self->option('realign');
475
476  my $pixels_per_base      = $self->scale;
477  my $feature              = $self->feature;
478
479  my $panel                = $self->panel;
480  my ($abs_start,$abs_end)     = ($feature->start,$feature->end);
481  my ($tgt_start,$tgt_end)     = ($feature->hit->start,$feature->hit->end);
482  my ($panel_start,$panel_end) = ($self->panel->start,$self->panel->end);
483  my $strand               = $feature->strand;
484  my $panel_left           = $self->panel->left;
485  my $panel_right          = $self->panel->right;
486  my $bgcolor              = $self->bgcolor;
487
488  my $drew_sequence;
489
490  if ($tgt_start > $tgt_end) { #correct for data problems
491    $strand    = -1;
492    ($tgt_start,$tgt_end) = ($tgt_end,$tgt_start);
493  }
494
495  warn "TGT_START..TGT_END = $tgt_start..$tgt_end" if DEBUG;
496
497  my ($bl,$bt,$br,$bb)     = $self->bounds($left,$top);
498  $top = $bt;
499
500  my $stranded = $self->stranded;
501
502  my @s                     = $self->_subfeat($feature);
503
504  # FIX ME
505  # workaround for features in which top level feature does not have a hit but
506  # subfeatures do. There is total breakage of encapsulation here because sometimes
507  # a chado alignment places the aligned segment in the top-level feature, and sometimes
508  # in the child feature.
509  unless (@s) {            # || $feature->isa('Bio::DB::GFF::Feature')) {
510    @s = ($feature);
511  }
512
513  my $can_realign;
514  if (Bio::Graphics::Browser2::Realign->can('align_segs')) {
515      $can_realign   = \&Bio::Graphics::Browser2::Realign::align_segs;
516  } elsif (Bio::Graphics::Browser::Realign->can('align_segs')) {
517      $can_realign   = \&Bio::Graphics::Browser::Realign::align_segs;
518  }
519
520  my (@segments,%strands);
521  my ($ref_dna,$tgt_dna);
522
523  for my $s (@s) {
524
525    my $target = $s->hit;
526    my ($src_start,$src_end) = ($s->start,$s->end);
527#    next unless $src_start <= $panel_end && $src_end >= $panel_start;
528
529    my ($tgt_start,$tgt_end) = ($target->start,$target->end);
530
531    my $strand_bug;
532    unless (exists $strands{$target}) {
533      my $strand = $feature->strand;
534      if ($tgt_start > $tgt_end) { #correct for data problems
535	$strand    = -1;
536	($tgt_start,$tgt_end) = ($tgt_end,$tgt_start);
537	$strand_bug++;
538      }
539      $strands{$target} = $strand;
540    }
541
542    my $cigar = $self->_get_cigar($s);
543    if ($cigar || ($can_realign && $do_realign)) {
544	($ref_dna,$tgt_dna) = ($s->dna,$target->dna);
545	warn "$s: ",$s->seq_id,":",$s->start,'..',$s->end if DEBUG;
546	warn "ref/tgt"             if DEBUG;
547	warn "$ref_dna\n$tgt_dna"  if DEBUG;
548
549	my @exact_segments;
550
551	if ($cigar) {
552	    warn   "Segmenting [$target,$src_start,$src_end,$tgt_start,$tgt_end] via $cigar.\n" if DEBUG;
553	    @exact_segments = $self->_gapped_alignment_to_segments($cigar,$ref_dna,$tgt_dna);
554	}
555	else {
556	    warn   "Realigning [$target,$src_start,$src_end,$tgt_start,$tgt_end].\n" if DEBUG;
557	    @exact_segments = $can_realign->($ref_dna,$tgt_dna);
558	}
559
560	foreach (@exact_segments) {
561	    warn "=========> [$target,@$_]\n" if DEBUG;
562	    my $a = $strands{$target} >= 0
563		? [$target,$_->[0]+$src_start,$_->[1]+$src_start,$_->[2]+$tgt_start,$_->[3]+$tgt_start]
564		: [$target,$src_end-$_->[1],$src_end-$_->[0],$_->[2]+$tgt_start,$_->[3]+$tgt_start];
565	    warn "[$target,$_->[0]+$src_start,$_->[1]+$src_start,$tgt_end-$_->[3],$tgt_end-$_->[2]]" if DEBUG;
566	    warn "=========> [@$a]\n" if DEBUG;
567	    warn substr($ref_dna,     $_->[0],$_->[1]-$_->[0]+1),"\n" if DEBUG;
568	    warn substr($tgt_dna,$_->[2],$_->[3]-$_->[2]+1),"\n"      if DEBUG;
569	    push @segments,$a;
570	}
571    }
572    else {
573	push @segments,[$target,$src_start,$src_end,$tgt_start,$tgt_end];
574    }
575  }
576
577  # get 'em in the right order so that we don't have to worry about
578  # where the beginning and end are.
579  @segments = sort {$a->[TGT_START]<=>$b->[TGT_START]} @segments;
580
581  # adjust for ragged (nonaligned) ends
582  my ($offset_left,$offset_right) = (0,0);
583  if ($ragged_extra && $ragged_extra > 0) {
584
585    # add a little rag to the left end
586    $offset_left = $segments[0]->[TGT_START] > $ragged_extra ? $ragged_extra : $segments[0]->[TGT_START]-1;
587    if ($strand >= 0) {
588      $offset_left     = $segments[0]->[SRC_START]-1 if $segments[0]->[SRC_START] - $offset_left < 1;
589      $abs_start                -= $offset_left;
590      $tgt_start                -= $offset_left;
591      $segments[0]->[SRC_START] -= $offset_left;
592      $segments[0]->[TGT_START] -= $offset_left;
593    } else {
594      $abs_end                  += $offset_left;
595      $tgt_start                -= $offset_left;
596      $segments[0]->[SRC_END]   += $offset_left;
597      $segments[0]->[TGT_START] -= $offset_left;
598    }
599
600    # add a little rag to the right end - this is complicated because
601    # we don't know what the length of the underlying dna is, so we
602    # use the subfeat method to find out
603    my $current_end        = $segments[-1]->[TGT_END];
604    $offset_right          = length $segments[-1]->[TARGET]->subseq($current_end+1,$current_end+$ragged_extra)->seq;
605    if ($strand >= 0) {
606      $abs_end                 += $offset_right;
607      $tgt_end                 += $offset_left;
608      $segments[-1]->[TGT_END] += $offset_right;
609      $segments[-1]->[SRC_END] += $offset_right;
610    } else {
611      $abs_start                 -= $offset_right;
612      $tgt_end                   += $offset_left;
613      $segments[-1]->[TGT_END]   += $offset_right;
614      $segments[-1]->[SRC_START] -= $offset_right;
615    }
616  }
617
618  # get the DNAs now - a little complicated by the necessity of using
619  # the subseq() method
620  $ref_dna ||= $feature->subseq(1-$offset_left,$feature->length+$offset_right)->seq;
621
622  # this may not be right if the alignment involves only a portion of the target DNA
623  $tgt_dna ||= $feature->hit->dna;
624
625  # none of these seem to be working properly with BAM alignments
626  # my $tgt_len = abs($segments[-1]->[TGT_END] - $segments[0]->[TGT_START]) + 1;
627  # my $tgt_dna = $feature->hit->subseq(1-$offset_left,$feature->length+$offset_right)->seq;
628  # my $tgt_dna = $feature->hit->subseq(1-$offset_left,$tgt_len+$offset_right)->seq;
629
630  # work around changes in the API
631  $ref_dna    = $ref_dna->seq if ref $ref_dna and $ref_dna->can('seq');
632  $tgt_dna    = $tgt_dna->seq if ref $tgt_dna and $tgt_dna->can('seq');
633
634  $ref_dna    = lc $ref_dna;
635  $tgt_dna    = lc $tgt_dna;
636
637  # sanity check.  Let's see if they look like they're lining up
638  warn "$feature dna sanity check:\n$ref_dna\n$tgt_dna\n" if DEBUG;
639
640  # now we're all lined up, and we're going to adjust everything to fall within the bounds
641  # of the left and right panel coordinates
642  my %clip;
643  for my $seg (@segments) {
644
645    my $target = $seg->[TARGET];
646    warn "preclip [@$seg]\n" if DEBUG;
647
648    # left clipping
649    if ( (my $delta = $seg->[SRC_START] - $panel_start) < 0 ) {
650      warn "clip left delta = $delta" if DEBUG;
651      $seg->[SRC_START] = $panel_start;
652      if ($strand >= 0) {
653	$seg->[TGT_START] -= $delta;
654      }
655    }
656
657    # right clipping
658    if ( (my $delta = $panel_end - $seg->[SRC_END]) < 0) {
659      warn "clip right delta = $delta" if DEBUG;
660      $seg->[SRC_END] = $panel_end;
661      if ($strand < 0) {
662	$seg->[TGT_START] -= $delta;
663      }
664    }
665
666    my $length = $seg->[SRC_END]-$seg->[SRC_START]+1;
667    $seg->[TGT_END] = $seg->[TGT_START]+$length-1;
668
669    warn "Clipping gives [@$seg], tgt_start = $tgt_start\n" if DEBUG;
670  }
671
672  # remove segments that got clipped out of existence
673  # no longer doing this because it interferes with ability to
674  # detect insertions in the target
675#  @segments = grep { $_->[SRC_START]<=$_->[SRC_END] } @segments;
676
677  # relativize coordinates
678  if ($strand < 0) {
679# breaks BAM, but probably needed for non-BAM features
680    $ref_dna = $self->reversec($ref_dna) unless eval { $feature->reversed } ;
681    $tgt_dna = $self->reversec($tgt_dna);
682  }
683
684  my ($red,$green,$blue)   = $self->panel->rgb($bgcolor);
685  my $avg         = ($red+$green+$blue)/3;
686  my $color       = $self->translate_color($avg > 128 ? 'black' : 'white');
687  my $font       = $self->mono_font;
688  my $lineheight = $font->height;
689  my $fontwidth  = $font->width;
690
691  my $mismatch = $self->mismatch_color;
692  my $insertion= $self->insertion_color;
693  my $deletion = $self->deletion_color;
694  my $grey     = $self->translate_color('gray');
695  my $mismatch_font_color = eval {
696      my ($r,$g,$b) = $self->panel->rgb($mismatch);
697      $self->translate_color(($r+$g+$b)>128 ? 'black' : 'white');
698  };
699  my $insertion_font_color = eval {
700      my ($r,$g,$b) = $self->panel->rgb($insertion);
701      $self->translate_color(($r+$g+$b)>128 ? 'black' : 'white');
702  };
703  my $deletion_font_color = eval {
704      my ($r,$g,$b) = $self->panel->rgb($deletion);
705      $self->translate_color(($r+$g+$b)>128 ? 'black' : 'white');
706  };
707
708
709  unless (@segments) { # this will happen if entire region is a target gap
710      for (my $i = $bl;$i<$br-$self->scale;$i+=$self->scale) {
711	  $gd->char($font,$self->flip ? $i+$self->scale-4 : $i+2,$top,'-',$deletion_font_color);
712      }
713      return;
714  }
715
716  for my $seg (@segments) {
717    $seg->[SRC_START] -= $abs_start - 1;
718    $seg->[SRC_END]   -= $abs_start - 1;
719    $seg->[TGT_START] -= $tgt_start - 1;
720    $seg->[TGT_END]   -= $tgt_start - 1;
721
722    if ($strand < 0) {
723      ($seg->[TGT_START],$seg->[TGT_END]) = (length($tgt_dna)-$seg->[TGT_END]+1,length($tgt_dna)-$seg->[TGT_START]+1);
724    }
725    if (DEBUG) {
726      warn "$feature: relativized coordinates = [@$seg]\n";
727      warn $self->_subsequence($ref_dna,$seg->[SRC_START],$seg->[SRC_END]),"\n";
728      warn $self->_subsequence($tgt_dna,$seg->[TGT_START],$seg->[TGT_END]),"\n";
729    }
730  }
731
732  # draw
733  my $base2pixel =
734    $self->flip ?
735      sub {
736	my ($src,$tgt) = @_;
737	my $a = $fontwidth + ($abs_start + $src-$panel_start-1 + $tgt) * $pixels_per_base - 1;
738	$panel_right - $a;
739      }
740      : sub {
741	my ($src,$tgt) = @_;
742	$fontwidth/2 + $left + ($abs_start + $src-$panel_start-1 + $tgt) * $pixels_per_base - 1;
743      };
744
745  my $mismatch_only = $self->mismatch_only;
746  my ($tgt_last_end,$src_last_end,$leftmost,$rightmost,$gaps);
747
748  my $segment = 0;
749
750  for my $seg (sort {$a->[SRC_START]<=>$b->[SRC_START]} @segments) {
751    my $y = $top-1;
752    my $end = $seg->[SRC_END]-$seg->[SRC_START];
753
754    for (my $i=0; $i<$end+1; $i++) {
755      my $src_base = $self->_subsequence($ref_dna,$seg->[SRC_START]+$i,$seg->[SRC_START]+$i);
756      my $tgt_base = $self->_subsequence($tgt_dna,$seg->[TGT_START]+$i,$seg->[TGT_START]+$i);
757      my $x = $base2pixel->($seg->[SRC_START],$i);
758      $leftmost = $x if !defined $leftmost  || $leftmost  > $x;
759      $rightmost= $x if !defined $rightmost || $rightmost < $x;
760
761      next unless $tgt_base && $x >= $panel_left && $x <= $panel_right;
762
763      my $is_mismatch = $show_mismatch && $tgt_base && $src_base ne $tgt_base && $tgt_base !~ /[nN]/;
764      $tgt_base = $complement{$tgt_base} if $true_target && $strand < 0;
765      $gd->char($font,$x,$y,$tgt_base,$tgt_base =~ /[nN]/ ? $grey
766		                     :$is_mismatch        ? $mismatch_font_color
767	                             :$color)
768	  unless $mismatch_only && !$is_mismatch;
769
770      $drew_sequence++;
771    }
772
773    # deal with gaps in the alignment
774    if (defined $src_last_end && (my $delta = $seg->[SRC_START] - $src_last_end) > 1) {
775	for (my $i=0;$i<$delta-1;$i++) {
776	    my $x = $base2pixel->($src_last_end,$i+1);
777	    next if $x > $panel_right;
778	    next if $x < $panel_left;
779	    $gd->char($font,$x,$y,'-',$deletion_font_color);
780	}
781	$gaps = $delta-1;
782    }
783
784    # indicate the presence of insertions in the target
785    $gaps       ||= 0;
786    my $pos       = $src_last_end + $gaps;
787    my $delta     = $seg->[TGT_START] - $tgt_last_end;
788    my $src_delta = $seg->[SRC_START] - $src_last_end;
789
790    if ($segment && $delta && ($delta > $src_delta-$gaps)) {  # an insertion in the target relative to the source
791	my $gap_left  = $base2pixel->($pos+0.5,0);
792	my $gap_right = $base2pixel->($seg->[SRC_START],0);
793	($gap_left,$gap_right) = ($gap_right+$fontwidth,$gap_left-$fontwidth) if $self->flip;
794	warn "delta=$delta, gap_left=$gap_left, gap_right=$gap_right" if DEBUG;
795
796	next if $gap_left <= $panel_left || $gap_right >= $panel_right;
797
798	my $length = $delta-1;
799	$length    = 1 if $length <= 0;  # workaround
800	my $gap_distance   = $gap_right - $gap_left;
801	my $pixels_per_inserted_base = $gap_distance/$length;
802
803	if ($pixels_per_inserted_base >= $fontwidth) {  # Squeeze the insertion in
804	    for (my $i = 0; $i<$delta-1; $i++) {
805		my $x = $gap_left + $pixels_per_inserted_base * $i;
806		my $bp = $self->_subsequence($tgt_dna,$tgt_last_end+$i+1,$tgt_last_end+$i+1);
807		next if $x < $panel_left;
808		$gd->char($font,$x,$y,$bp,$color);
809	    }
810	} else {  #here's where we insert the insertion length
811	    if ($gap_distance >= $fontwidth*length($length)) {
812		$gd->string($font,$gap_left,$y,$length,$color);
813	    }
814	}
815    }
816
817  } continue {
818    $tgt_last_end  = $seg->[TGT_END];
819    $src_last_end  = $seg->[SRC_END];
820    $segment++;
821  }
822
823  return $drew_sequence;
824}
825
826sub _gapped_alignment_to_segments {
827    my $self = shift;
828    my ($cigar,$sdna,$tdna) = @_;
829    my ($pad_source,$pad_target,$pad_match);
830    warn "_gapped_alignment_to_segments\n$sdna\n$tdna" if DEBUG;
831
832    for my $event (@$cigar) {
833	my ($op,$count) = @$event;
834	warn "op=$op, count=$count" if DEBUG;
835	if ($op eq 'I') {
836	    $pad_source .= '-' x $count;
837	    $pad_target .= substr($tdna,0,$count,'');
838	    $pad_match  .= ' ' x $count;
839	}
840	elsif ($op eq 'D' || $op eq 'N') {
841	    $pad_source .= substr($sdna,0,$count,'');
842	    $pad_target .= '-' x $count;
843	    $pad_match  .= ' ' x $count;
844	}
845	elsif ($op eq 'S') {
846	    $pad_source .= '-' x $count;
847	    $pad_target .= substr($tdna,0,$count,'');
848	    $pad_match  .= ' ' x $count;
849
850	}
851	elsif ($op eq 'H' || $op eq 'P') {
852	    # Nothing to do. This is simply an informational operation.
853	} else {  # everything else is assumed to be a match -- revisit
854	    $pad_source .= substr($sdna,0,$count,'');
855	    $pad_target .= substr($tdna,0,$count,'');
856	    $pad_match  .= '|' x $count;
857	}
858    }
859
860    warn "pads:\n$pad_source\n$pad_match\n$pad_target" if DEBUG;
861
862    return $self->pads_to_segments($pad_source,$pad_match,$pad_target);
863}
864
865sub pads_to_segments {
866    my $self = shift;
867    my ($gap1,$align,$gap2) = @_;
868    warn "pads_to_segments" if DEBUG;
869    warn "$gap1\n$align\n$gap2\n" if DEBUG;
870
871    # create arrays that map residue positions to gap positions
872    my @maps;
873    for my $seq ($gap1,$gap2) {
874	my @seq = split '',$seq;
875	my @map;
876	my $residue = 0;
877	for (my $i=0;$i<@seq;$i++) {
878	    $map[$i] = $residue;
879	    $residue++ if $seq[$i] ne '-';
880	}
881	push @maps,\@map;
882    }
883
884    my @result;
885    while ($align =~ /(\S+)/g) {
886	my $align_end   = pos($align) - 1;
887	my $align_start = $align_end  - length($1) + 1;
888	push @result,[@{$maps[0]}[$align_start,$align_end],
889		      @{$maps[1]}[$align_start,$align_end]];
890    }
891    return wantarray ? @result : \@result;
892}
893
894sub _get_cigar {
895    my $self = shift;
896    my $feat = shift;
897
898    # some features have this built in
899    if ($feat->can('cigar_array')) {
900	my $cigar = $feat->cigar_array;
901	@$cigar = reverse @$cigar if $feat->strand < 0;
902	return $cigar;
903    }
904
905    my ($cigar) = $feat->get_tag_values('Gap');
906    return unless $cigar;
907
908    my @arry;
909    my $regexp = $cigar =~ /^\d+/ ? '(\d+)([A-Z])'
910	                          : '([A-Z])(\d+)';
911    if ($cigar =~ /^\d+/) {
912	while ($cigar =~ /(\d+)([A-Z])/g) {
913	    my ($count,$op) = ($1,$2);
914	    push @arry,[$op,$count];
915	}
916    } else {
917	while ($cigar =~ /([A-Z])(\d+)/g) {
918	    my ($op,$count) = ($1,$2);
919	    push @arry,[$op,$count];
920	}
921    }
922    return \@arry;
923}
924
925sub _subsequence {
926  my $self = shift;
927  my ($seq,$start,$end,$strand) = @_;
928  my $sub;
929  if ((defined $strand && $strand < 0)) {
930    my $piece = substr($seq,length($seq)-$end,$end-$start+1);
931    $sub = $self->reversec($piece);
932  } else {
933    $sub = substr($seq,$start-1,$end-$start+1);
934  }
935  return $self->flip ? $complement{$sub} : $sub;
936}
937
938# draw the classic "i-beam" icon to indicate that an insertion fits between
939# two bases
940# sub _draw_insertion_point {
941#   my $self = shift;
942#   my ($gd,$x,$y,$color) = @_;
943#   my $top    = $y;
944#   $x--;
945#   my $bottom = $y + $self->font->height - 4;
946#   $gd->line($x,$top+2, $x,$bottom-2,$color);
947#   $gd->setPixel($x+1,  $top+1,$color);
948#   $gd->setPixel($x+$_, $top,$color) for (2..3);
949#   $gd->setPixel($x-1,  $top+1,$color);
950#   $gd->setPixel($x-$_, $top,$color) for (2..3);
951
952#   $gd->setPixel($x+1,  $bottom-1,$color);
953#   $gd->setPixel($x+$_, $bottom,  $color) for (2..3);
954#   $gd->setPixel($x-1,  $bottom-1,$color);
955#   $gd->setPixel($x-$_, $bottom,  $color) for (2..3);
956# }
957
958# don't like that -- try drawing carets
959sub _draw_insertion_point {
960   my $self = shift;
961   my ($gd,$left,$right,$top,$bottom,$color) = @_;
962
963   my $poly = GD::Polygon->new();
964   $poly->addPt($left-3,$top+1);
965   $poly->addPt($right+2,$top+1);
966   $poly->addPt(($left+$right)/2-1,$top+3);
967   $gd->filledPolygon($poly,$color);
968
969   $poly = GD::Polygon->new();
970   $poly->addPt($left-3,$bottom);
971   $poly->addPt($right+2,$bottom);
972   $poly->addPt(($left+$right)/2-1,$bottom-2);
973   $gd->filledPolygon($poly,$color);
974}
975
9761;
977
978__END__
979
980=head1 NAME
981
982Bio::Graphics::Glyph::segments - The "segments" glyph
983
984=head1 SYNOPSIS
985
986  See L<Bio::Graphics::Panel> and L<Bio::Graphics::Glyph>.
987
988=head1 DESCRIPTION
989
990This glyph is used for drawing features that consist of discontinuous
991segments.  Unlike "graded_segments" or "alignment", the segments are a
992uniform color and not dependent on the score of the segment.
993
994=head2 METHODS
995
996This module overrides the maxdepth() method to return 1 unless
997explicitly specified by the -maxdepth option. This means that modules
998inheriting from segments will only be presented with one level of
999subfeatures. Override the maxdepth() method to get more levels.
1000
1001=head2 OPTIONS
1002
1003The following options are standard among all Glyphs.  See
1004L<Bio::Graphics::Glyph> for a full explanation.
1005
1006  Option      Description                      Default
1007  ------      -----------                      -------
1008
1009  -fgcolor      Foreground color	       black
1010
1011  -outlinecolor	Synonym for -fgcolor
1012
1013  -bgcolor      Background color               turquoise
1014
1015  -fillcolor    Synonym for -bgcolor
1016
1017  -linewidth    Line width                     1
1018
1019  -height       Height of glyph		       10
1020
1021  -font         Glyph font		       gdSmallFont
1022
1023  -connector    Connector type                 0 (false)
1024
1025  -connector_color
1026                Connector color                black
1027
1028  -label        Whether to draw a label	       0 (false)
1029
1030  -description  Whether to draw a description  0 (false)
1031
1032  -strand_arrow Whether to indicate            0 (false)
1033                 strandedness
1034
1035  -hilite       Highlight color                undef (no color)
1036
1037In addition, the following glyph-specific options are recognized:
1038
1039  -draw_dna     If true, draw the dna residues        0 (false)
1040                 when magnification level
1041                 allows.
1042
1043  -draw_target  If true, draw the dna residues        0 (false)
1044                 of the TARGET sequence when
1045                 magnification level allows.
1046                 See "Displaying Alignments".
1047
1048  -draw_protein_target  If true, draw the protein residues        0 (false)
1049                 of the TARGET sequence when
1050                 magnification level allows.
1051                 See "Displaying Alignments".
1052
1053  -ragged_extra When combined with -draw_target,      0 (false)
1054                draw extra bases beyond the end
1055                of the alignment. The value is
1056                the maximum number of extra
1057                bases.
1058                See "Displaying Alignments".
1059
1060  -ragged_start  Deprecated option.  Use
1061                 -ragged_extra instead
1062
1063  -show_mismatch When combined with -draw_target,     0 (false)
1064                 highlights mismatched bases in
1065                 the mismatch color.
1066                 Can be 0 (don't display);
1067                 1 (display when the DNA fits);
1068                 or another positive integer
1069                 (display when the region in
1070                 view is <= this value).
1071                 See "Displaying Alignments".
1072
1073  -mismatch_only When combined with -draw_target,     0 (false)
1074                 draws only the mismatched bases
1075                 in the alignment. Implies
1076                 -show_mismatch.
1077                 See "Displaying Alignments".
1078
1079  -mismatch_color The mismatch color to use           'lightgrey'
1080
1081  -insertion_color The color to use for insertions    'green'
1082                   relative to the reference.
1083
1084  -deletion_color The color to use for deletions      'red'
1085                  relative to the reference.
1086
1087  -indel_color   The color to use for indels, used   'lightgrey'
1088                 only if -insertion_color or
1089                 -deletion_color are absent
1090
1091  -true_target   Show the target DNA in its native    0 (false)
1092                 (plus strand) orientation, even if
1093                 the alignment is to the minus strand.
1094                 See "Displaying Alignments".
1095
1096  -realign       Attempt to realign sequences at      0 (false)
1097                 high mag to account for indels.
1098                 See "Displaying Alignments".
1099
1100If the -draw_dna flag is set to a true value, then when the
1101magnification is high enough, the underlying DNA sequence will be
1102shown.  This option is mutually exclusive with -draw_target. See
1103Bio::Graphics::Glyph::generic for more details.
1104
1105The -draw_target, -ragged_extra, and -show_mismatch options only work
1106with seqfeatures that implement the hit() method
1107(Bio::SeqFeature::SimilarityPair). -draw_target will cause the DNA of
1108the hit sequence to be displayed when the magnification is high enough
1109to allow individual bases to be drawn. The -ragged_extra option will
1110cause the alignment to be extended at the extreme ends by the
1111indicated number of bases, and is useful for looking for polyAs and
1112cloning sites at the ends of ESTs and cDNAs. -show_mismatch will cause
1113mismatched bases to be highlighted in with the color indicated by
1114-mismatch_color. A -show_mismatch value of "1" will highlight mismatches
1115only when the base pairs are displayed. A positive integer will cause
1116mismatches to be shown whenever the region in view is less than or equal
1117to the requested value.
1118
1119At high magnifications, minus strand matches will automatically be
1120shown as their reverse complement (so that the match has the same
1121sequence as the plus strand of the source dna).  If you prefer to see
1122the actual sequence of the target as it appears on the minus strand,
1123then set -true_target to true.
1124
1125Note that -true_target has the opposite meaning from
1126-canonical_strand, which is used in conjunction with -draw_dna to draw
1127minus strand features as if they appear on the plus strand.
1128
1129=head2 Displaying Alignments
1130
1131When the B<-draw_target> option is true, this glyph can be used to
1132display nucleotide alignments such as BLAST, FASTA or BLAT
1133similarities.  At high magnification, this glyph will attempt to show
1134how the sequence of the source (query) DNA matches the sequence of the
1135target (the hit).  For this to work, the feature must implement the
1136hit() method, and both the source and the target DNA must be
1137available.  If you pass the glyph a series of
1138Bio::SeqFeature::SimilarityPair objects, then these criteria will be
1139satisified.
1140
1141Without additional help, this glyph cannot display gapped alignments
1142correctly.  To display gapped alignments, you can use the
1143Bio::Graphics::Brower::Realign module, which is part of the Generic
1144Genome Browser package (http://www.gmod.org).  If you wish to install
1145the Realign module and not the rest of the package, here is the
1146recipe:
1147
1148  cd Generic-Genome-Browser-1.XX
1149  perl Makefile.PL DO_XS=1
1150  make
1151  make install_site
1152
1153If possible, build the gbrowse package with the DO_XS=1 option.  This
1154compiles a C-based DP algorithm that both gbrowse and gbrowse_details
1155will use if they can.  If DO_XS is not set, then the scripts will use
1156a Perl-based version of the algorithm that is 10-100 times slower.
1157
1158The display of alignments can be tweaked using the -ragged_extra,
1159-show_mismatch, -true_target, and -realign options.  See the options
1160section for further details.
1161
1162There is also a B<-draw_protein_target> option, which is designed for
1163protein to nucleotide alignments. It draws the target sequence every
1164third base pair and is supposed to align correctly with the forward
1165and reverse translation glyphs. This option is experimental at the
1166moment, and may not work correctly, to use with care.
1167
1168=head1 BUGS
1169
1170Please report them.
1171
1172=head1 SEE ALSO
1173
1174
1175L<Bio::Graphics::Panel>,
1176L<Bio::Graphics::Glyph>,
1177L<Bio::Graphics::Glyph::arrow>,
1178L<Bio::Graphics::Glyph::cds>,
1179L<Bio::Graphics::Glyph::crossbox>,
1180L<Bio::Graphics::Glyph::diamond>,
1181L<Bio::Graphics::Glyph::dna>,
1182L<Bio::Graphics::Glyph::dot>,
1183L<Bio::Graphics::Glyph::ellipse>,
1184L<Bio::Graphics::Glyph::extending_arrow>,
1185L<Bio::Graphics::Glyph::generic>,
1186L<Bio::Graphics::Glyph::graded_segments>,
1187L<Bio::Graphics::Glyph::heterogeneous_segments>,
1188L<Bio::Graphics::Glyph::line>,
1189L<Bio::Graphics::Glyph::pinsertion>,
1190L<Bio::Graphics::Glyph::primers>,
1191L<Bio::Graphics::Glyph::rndrect>,
1192L<Bio::Graphics::Glyph::segments>,
1193L<Bio::Graphics::Glyph::ruler_arrow>,
1194L<Bio::Graphics::Glyph::toomany>,
1195L<Bio::Graphics::Glyph::transcript>,
1196L<Bio::Graphics::Glyph::transcript2>,
1197L<Bio::Graphics::Glyph::translation>,
1198L<Bio::Graphics::Glyph::triangle>,
1199L<Bio::DB::GFF>,
1200L<Bio::SeqI>,
1201L<Bio::SeqFeatureI>,
1202L<Bio::Das>,
1203L<GD>
1204
1205=head1 AUTHOR
1206
1207Lincoln Stein E<lt>lstein@cshl.orgE<gt>
1208
1209Copyright (c) 2001 Cold Spring Harbor Laboratory
1210
1211This library is free software; you can redistribute it and/or modify
1212it under the same terms as Perl itself.  See DISCLAIMER.txt for
1213disclaimers of warranty.
1214
1215=cut
1216