1package ExtUtils::XSSymSet;
2
3use strict;
4use Config;
5our $VERSION = '1.4';
6
7
8sub new {
9  my($pkg,$maxlen,$silent) = @_;
10  $maxlen ||= 31;
11  # Allow absurdly long symbols here if we've told the compiler to
12  # do the shortening for us.
13  $maxlen = 2048 if $Config{'useshortenedsymbols'};
14  $silent ||= 0;
15  my($obj) = { '__M@xLen' => $maxlen, '__S!lent' => $silent };
16  bless $obj, $pkg;
17}
18
19
20sub trimsym {
21  my($self,$name,$maxlen,$silent) = @_;
22
23  unless (defined $maxlen) {
24    if (ref $self) { $maxlen ||= $self->{'__M@xLen'}; }
25    $maxlen ||= 31;
26  }
27  $maxlen = 2048 if $Config{'useshortenedsymbols'};
28
29  unless (defined $silent) {
30    if (ref $self) { $silent ||= $self->{'__S!lent'}; }
31    $silent ||= 0;
32  }
33  return $name if (length $name <= $maxlen);
34
35  my $trimmed = $name;
36  # First, just try to remove duplicated delimiters
37  $trimmed =~ s/__/_/g;
38  if (length $trimmed > $maxlen) {
39    # Next, all duplicated chars
40    $trimmed =~ s/(.)\1+/$1/g;
41    if (length $trimmed > $maxlen) {
42      my $squeezed = $trimmed;
43      my($xs,$prefix,$func) = $trimmed =~ /^(XS_)?(.*)_([^_]*)$/;
44      $xs ||= '';
45      my $frac = 3; # replaces broken length-based calculations but w/same result
46      my $pat = '([^_])';
47      if (length $func <= 12) {  # Try to preserve short function names
48        if ($frac > 1) { $pat .= '[^A-Z_]{' . ($frac - 1) . '}'; }
49        $prefix =~ s/$pat/$1/g;
50        $squeezed = "$xs$prefix" . "_$func";
51        if (length $squeezed > $maxlen) {
52          $pat =~ s/A-Z//;
53          $prefix =~ s/$pat/$1/g;
54          $squeezed = "$xs$prefix" . "_$func";
55        }
56      }
57      else {
58        if ($frac > 1) { $pat .= '[^A-Z_]{' . ($frac - 1) . '}'; }
59        $squeezed = "$prefix$func";
60        $squeezed =~ s/$pat/$1/g;
61        if (length "$xs$squeezed" > $maxlen) {
62          $pat =~ s/A-Z//;
63          $squeezed =~ s/$pat/$1/g;
64        }
65        $squeezed = "$xs$squeezed";
66      }
67      if (length $squeezed <= $maxlen) { $trimmed = $squeezed; }
68      else {
69        my $frac = int((length $trimmed - $maxlen) / length $trimmed + 0.5);
70        my $pat = '(.).{$frac}';
71        $trimmed =~ s/$pat/$1/g;
72      }
73    }
74  }
75  warn "Warning: long symbol $name\n\ttrimmed to $trimmed\n\t" unless $silent;
76  return $trimmed;
77}
78
79
80sub addsym {
81  my($self,$sym,$maxlen,$silent) = @_;
82  my $trimmed = $self->get_trimmed($sym);
83
84  return $trimmed if defined $trimmed;
85
86  $maxlen ||= $self->{'__M@xLen'} || 31;
87  $silent ||= $self->{'__S!lent'} || 0;
88  $trimmed = $self->trimsym($sym,$maxlen,1);
89  if (exists $self->{$trimmed}) {
90    my($i) = "00";
91    $trimmed = $self->trimsym($sym,$maxlen-3,$silent);
92    while (exists $self->{"${trimmed}_$i"}) { $i++; }
93    warn "Warning: duplicate symbol $trimmed\n\tchanged to ${trimmed}_$i\n\t(original was $sym)\n\t"
94      unless $silent;
95    $trimmed .= "_$i";
96  }
97  elsif (not $silent and $trimmed ne $sym) {
98    warn "Warning: long symbol $sym\n\ttrimmed to $trimmed\n\t";
99  }
100  $self->{$trimmed} = $sym;
101  $self->{'__N+Map'}->{$sym} = $trimmed;
102  $trimmed;
103}
104
105
106sub delsym {
107  my($self,$sym) = @_;
108  my $trimmed = $self->{'__N+Map'}->{$sym};
109  if (defined $trimmed) {
110    delete $self->{'__N+Map'}->{$sym};
111    delete $self->{$trimmed};
112  }
113  $trimmed;
114}
115
116
117sub get_trimmed {
118  my($self,$sym) = @_;
119  $self->{'__N+Map'}->{$sym};
120}
121
122
123sub get_orig {
124  my($self,$trimmed) = @_;
125  $self->{$trimmed};
126}
127
128
129sub all_orig { (keys %{$_[0]->{'__N+Map'}}); }
130sub all_trimmed { (grep { /^\w+$/ } keys %{$_[0]}); }
131
132__END__
133
134=head1 NAME
135
136ExtUtils::XSSymSet - keep sets of symbol names palatable to the VMS linker
137
138=head1 SYNOPSIS
139
140  use ExtUtils::XSSymSet;
141
142  $set = new ExtUtils::XSSymSet;
143  while ($sym = make_symbol()) { $set->addsym($sym); }
144  foreach $safesym ($set->all_trimmed) {
145    print "Processing $safesym (derived from ",
146        $self->get_orig($safesym), ")\n";
147    do_stuff($safesym);
148  }
149
150  $safesym = ExtUtils::XSSymSet->trimsym($onesym);
151
152=head1 DESCRIPTION
153
154Since the VMS linker distinguishes symbols based only on the first 31
155characters of their names, it is occasionally necessary to shorten
156symbol names in order to avoid collisions.  (This is especially true of
157names generated by xsubpp, since prefixes generated by nested package
158names can become quite long.)  C<ExtUtils::XSSymSet> provides functions to
159shorten names in a consistent fashion, and to track a set of names to
160insure that each is unique.  While designed with F<xsubpp> in mind, it
161may be used with any set of strings.
162
163This package supplies the following functions, all of which should be
164called as methods.
165
166=over 4
167
168=item new([$maxlen[,$silent]])
169
170Creates an empty C<ExtUtils::XSSymset> set of symbols.  This function may be
171called as a static method or via an existing object.  If C<$maxlen> or
172C<$silent> are specified, they are used as the defaults for maximum
173name length and warning behavior in future calls to addsym() or
174trimsym() via this object.  If the compiler has been instructed to do its
175own symbol shortening via C<$Config{'useshortenedsymbols'}>, a value of
1762048 is assumed for C<$maxlen> as a way of bypassing the shortening done by
177this module.
178
179=item addsym($name[,$maxlen[,$silent]])
180
181Creates a symbol name from C<$name>, using the methods described
182under trimsym(), which is unique in this set of symbols, and returns
183the new name.  C<$name> and its resultant are added to the set, and
184any future calls to addsym() specifying the same C<$name> will return
185the same result, regardless of the value of C<$maxlen> specified.
186Unless C<$silent> is true, warnings are output if C<$name> had to be
187trimmed or changed in order to avoid collision with an existing symbol
188name.  C<$maxlen> and C<$silent> default to the values specified when
189this set of symbols was created.  This method must be called via an
190existing object.
191
192=item trimsym($name[,$maxlen[,$silent]])
193
194Creates a symbol name C<$maxlen> or fewer characters long from
195C<$name> and returns it. If C<$name> is too long, it first tries to
196shorten it by removing duplicate characters, then by periodically
197removing non-underscore characters, and finally, if necessary, by
198periodically removing characters of any type.  C<$maxlen> defaults
199to 31.  Unless C<$silent> is true, a warning is output if C<$name>
200is altered in any way.  This function may be called either as a
201static method or via an existing object, but in the latter case no
202check is made to insure that the resulting name is unique in the
203set of symbols.    If the compiler has been instructed to do its
204own symbol shortening via C<$Config{'useshortenedsymbols'}>, a value
205of 2048 is assumed for C<$maxlen> as a way of bypassing the shortening
206done by this module.
207
208=item delsym($name)
209
210Removes C<$name> from the set of symbols, where C<$name> is the
211original symbol name passed previously to addsym().  If C<$name>
212existed in the set of symbols, returns its "trimmed" equivalent,
213otherwise returns C<undef>.  This method must be called via an
214existing object.
215
216=item get_orig($trimmed)
217
218Returns the original name which was trimmed to C<$trimmed> by a
219previous call to addsym(), or C<undef> if C<$trimmed> does not
220correspond to a member of this set of symbols.  This method must be
221called via an existing object.
222
223=item get_trimmed($name)
224
225Returns the trimmed name which was generated from C<$name> by a
226previous call to addsym(), or C<undef> if C<$name> is not a member
227of this set of symbols.  This method must be called via an
228existing object.
229
230=item all_orig()
231
232Returns a list containing all of the original symbol names
233from this set.
234
235=item all_trimmed()
236
237Returns a list containing all of the trimmed symbol names
238from this set.
239
240=back
241
242=head1 AUTHOR
243
244Charles Bailey  E<lt>I<bailey@newman.upenn.edu>E<gt>
245
246=head1 REVISION
247
248Last revised 8-Oct-2010, for Perl 5.13.6.
249
250