1package ExtUtils::Constant::Base; 2 3use strict; 4use vars qw($VERSION); 5use Carp; 6use Text::Wrap; 7use ExtUtils::Constant::Utils qw(C_stringify perl_stringify); 8$VERSION = '0.07'; 9 10use constant is_perl56 => ($] < 5.007 && $] > 5.005_50); 11 12 13=head1 NAME 14 15ExtUtils::Constant::Base - base class for ExtUtils::Constant objects 16 17=head1 SYNOPSIS 18 19 require ExtUtils::Constant::Base; 20 @ISA = 'ExtUtils::Constant::Base'; 21 22=head1 DESCRIPTION 23 24ExtUtils::Constant::Base provides a base implementation of methods to 25generate C code to give fast constant value lookup by named string. Currently 26it's mostly used ExtUtils::Constant::XS, which generates the lookup code 27for the constant() subroutine found in many XS modules. 28 29=head1 USAGE 30 31ExtUtils::Constant::Base exports no subroutines. The following methods are 32available 33 34=over 4 35 36=cut 37 38sub valid_type { 39 # Default to assuming that you don't need different types of return data. 40 1; 41} 42sub default_type { 43 ''; 44} 45 46=item header 47 48A method returning a scalar containing definitions needed, typically for a 49C header file. 50 51=cut 52 53sub header { 54 '' 55} 56 57# This might actually be a return statement. Note that you are responsible 58# for any space you might need before your value, as it lets to perform 59# "tricks" such as "return KEY_" and have strings appended. 60sub assignment_clause_for_type; 61# In which case this might be an empty string 62sub return_statement_for_type {undef}; 63sub return_statement_for_notdef; 64sub return_statement_for_notfound; 65 66# "#if 1" is true to a C pre-processor 67sub macro_from_name { 68 1; 69} 70 71sub macro_from_item { 72 1; 73} 74 75sub macro_to_ifdef { 76 my ($self, $macro) = @_; 77 if (ref $macro) { 78 return $macro->[0]; 79 } 80 if (defined $macro && $macro ne "" && $macro ne "1") { 81 return $macro ? "#ifdef $macro\n" : "#if 0\n"; 82 } 83 return ""; 84} 85 86sub macro_to_ifndef { 87 my ($self, $macro) = @_; 88 if (ref $macro) { 89 # Can't invert these stylishly, so "bodge it" 90 return "$macro->[0]#else\n"; 91 } 92 if (defined $macro && $macro ne "" && $macro ne "1") { 93 return $macro ? "#ifndef $macro\n" : "#if 1\n"; 94 } 95 croak "Can't generate an ifndef for unconditional code"; 96} 97 98sub macro_to_endif { 99 my ($self, $macro) = @_; 100 101 if (ref $macro) { 102 return $macro->[1]; 103 } 104 if (defined $macro && $macro ne "" && $macro ne "1") { 105 return "#endif\n"; 106 } 107 return ""; 108} 109 110sub name_param { 111 'name'; 112} 113 114# This is possibly buggy, in that it's not mandatory (below, in the main 115# C_constant parameters, but is expected to exist here, if it's needed) 116# Buggy because if you're definitely pure 8 bit only, and will never be 117# presented with your constants in utf8, the default form of C_constant can't 118# be told not to do the utf8 version. 119 120sub is_utf8_param { 121 'utf8'; 122} 123 124sub memEQ { 125 "!memcmp"; 126} 127 128=item memEQ_clause args_hashref 129 130A method to return a suitable C C<if> statement to check whether I<name> 131is equal to the C variable C<name>. If I<checked_at> is defined, then it 132is used to avoid C<memEQ> for short names, or to generate a comment to 133highlight the position of the character in the C<switch> statement. 134 135If i<checked_at> is a reference to a scalar, then instead it gives 136the characters pre-checked at the beginning, (and the number of chars by 137which the C variable name has been advanced. These need to be chopped from 138the front of I<name>). 139 140=cut 141 142sub memEQ_clause { 143# if (memEQ(name, "thingy", 6)) { 144 # Which could actually be a character comparison or even "" 145 my ($self, $args) = @_; 146 my ($name, $checked_at, $indent) = @{$args}{qw(name checked_at indent)}; 147 $indent = ' ' x ($indent || 4); 148 my $front_chop; 149 if (ref $checked_at) { 150 # regexp won't work on 5.6.1 without use utf8; in turn that won't work 151 # on 5.005_03. 152 substr ($name, 0, length $$checked_at,) = ''; 153 $front_chop = C_stringify ($$checked_at); 154 undef $checked_at; 155 } 156 my $len = length $name; 157 158 if ($len < 2) { 159 return $indent . "{\n" 160 if (defined $checked_at and $checked_at == 0) or $len == 0; 161 # We didn't switch, drop through to the code for the 2 character string 162 $checked_at = 1; 163 } 164 165 my $name_param = $self->name_param; 166 167 if ($len < 3 and defined $checked_at) { 168 my $check; 169 if ($checked_at == 1) { 170 $check = 0; 171 } elsif ($checked_at == 0) { 172 $check = 1; 173 } 174 if (defined $check) { 175 my $char = C_stringify (substr $name, $check, 1); 176 # Placate 5.005 with a break in the string. I can't see a good way of 177 # getting it to not take [ as introducing an array lookup, even with 178 # ${name_param}[$check] 179 return $indent . "if ($name_param" . "[$check] == '$char') {\n"; 180 } 181 } 182 if (($len == 2 and !defined $checked_at) 183 or ($len == 3 and defined ($checked_at) and $checked_at == 2)) { 184 my $char1 = C_stringify (substr $name, 0, 1); 185 my $char2 = C_stringify (substr $name, 1, 1); 186 return $indent . 187 "if ($name_param" . "[0] == '$char1' && $name_param" . "[1] == '$char2') {\n"; 188 } 189 if (($len == 3 and defined ($checked_at) and $checked_at == 1)) { 190 my $char1 = C_stringify (substr $name, 0, 1); 191 my $char2 = C_stringify (substr $name, 2, 1); 192 return $indent . 193 "if ($name_param" . "[0] == '$char1' && $name_param" . "[2] == '$char2') {\n"; 194 } 195 196 my $pointer = '^'; 197 my $have_checked_last = defined ($checked_at) && $len == $checked_at + 1; 198 if ($have_checked_last) { 199 # Checked at the last character, so no need to memEQ it. 200 $pointer = C_stringify (chop $name); 201 $len--; 202 } 203 204 $name = C_stringify ($name); 205 my $memEQ = $self->memEQ(); 206 my $body = $indent . "if ($memEQ($name_param, \"$name\", $len)) {\n"; 207 # Put a little ^ under the letter we checked at 208 # Screws up for non printable and non-7 bit stuff, but that's too hard to 209 # get right. 210 if (defined $checked_at) { 211 $body .= $indent . "/* " . (' ' x length $memEQ) 212 . (' ' x length $name_param) 213 . (' ' x $checked_at) . $pointer 214 . (' ' x ($len - $checked_at + length $len)) . " */\n"; 215 } elsif (defined $front_chop) { 216 $body .= $indent . "/* $front_chop" 217 . (' ' x ($len + 1 + length $len)) . " */\n"; 218 } 219 return $body; 220} 221 222=item dump_names arg_hashref, ITEM... 223 224An internal function to generate the embedded perl code that will regenerate 225the constant subroutines. I<default_type>, I<types> and I<ITEM>s are the 226same as for C_constant. I<indent> is treated as number of spaces to indent 227by. If C<declare_types> is true a C<$types> is always declared in the perl 228code generated, if defined and false never declared, and if undefined C<$types> 229is only declared if the values in I<types> as passed in cannot be inferred from 230I<default_types> and the I<ITEM>s. 231 232=cut 233 234sub dump_names { 235 my ($self, $args, @items) = @_; 236 my ($default_type, $what, $indent, $declare_types) 237 = @{$args}{qw(default_type what indent declare_types)}; 238 $indent = ' ' x ($indent || 0); 239 240 my $result; 241 my (@simple, @complex, %used_types); 242 foreach (@items) { 243 my $type; 244 if (ref $_) { 245 $type = $_->{type} || $default_type; 246 if ($_->{utf8}) { 247 # For simplicity always skip the bytes case, and reconstitute this entry 248 # from its utf8 twin. 249 next if $_->{utf8} eq 'no'; 250 # Copy the hashref, as we don't want to mess with the caller's hashref. 251 $_ = {%$_}; 252 unless (is_perl56) { 253 utf8::decode ($_->{name}); 254 } else { 255 $_->{name} = pack 'U*', unpack 'U0U*', $_->{name}; 256 } 257 delete $_->{utf8}; 258 } 259 } else { 260 $_ = {name=>$_}; 261 $type = $default_type; 262 } 263 $used_types{$type}++; 264 if ($type eq $default_type 265 # grr 5.6.1 266 and length $_->{name} 267 and length $_->{name} == ($_->{name} =~ tr/A-Za-z0-9_//) 268 and !defined ($_->{macro}) and !defined ($_->{value}) 269 and !defined ($_->{default}) and !defined ($_->{pre}) 270 and !defined ($_->{post}) and !defined ($_->{def_pre}) 271 and !defined ($_->{def_post}) and !defined ($_->{weight})) { 272 # It's the default type, and the name consists only of A-Za-z0-9_ 273 push @simple, $_->{name}; 274 } else { 275 push @complex, $_; 276 } 277 } 278 279 if (!defined $declare_types) { 280 # Do they pass in any types we weren't already using? 281 foreach (keys %$what) { 282 next if $used_types{$_}; 283 $declare_types++; # Found one in $what that wasn't used. 284 last; # And one is enough to terminate this loop 285 } 286 } 287 if ($declare_types) { 288 $result = $indent . 'my $types = {map {($_, 1)} qw(' 289 . join (" ", sort keys %$what) . ")};\n"; 290 } 291 local $Text::Wrap::huge = 'overflow'; 292 local $Text::Wrap::columns = 80; 293 $result .= wrap ($indent . "my \@names = (qw(", 294 $indent . " ", join (" ", sort @simple) . ")"); 295 if (@complex) { 296 foreach my $item (sort {$a->{name} cmp $b->{name}} @complex) { 297 my $name = perl_stringify $item->{name}; 298 my $line = ",\n$indent {name=>\"$name\""; 299 $line .= ", type=>\"$item->{type}\"" if defined $item->{type}; 300 foreach my $thing (qw (macro value default pre post def_pre def_post)) { 301 my $value = $item->{$thing}; 302 if (defined $value) { 303 if (ref $value) { 304 $line .= ", $thing=>[\"" 305 . join ('", "', map {perl_stringify $_} @$value) . '"]'; 306 } else { 307 $line .= ", $thing=>\"" . perl_stringify($value) . "\""; 308 } 309 } 310 } 311 $line .= "}"; 312 # Ensure that the enclosing C comment doesn't end 313 # by turning */ into *" . "/ 314 $line =~ s!\*\/!\*" . "/!gs; 315 # gcc -Wall doesn't like finding /* inside a comment 316 $line =~ s!\/\*!/" . "\*!gs; 317 $result .= $line; 318 } 319 } 320 $result .= ");\n"; 321 322 $result; 323} 324 325=item assign arg_hashref, VALUE... 326 327A method to return a suitable assignment clause. If I<type> is aggregate 328(eg I<PVN> expects both pointer and length) then there should be multiple 329I<VALUE>s for the components. I<pre> and I<post> if defined give snippets 330of C code to proceed and follow the assignment. I<pre> will be at the start 331of a block, so variables may be defined in it. 332 333=cut 334# Hmm. value undef to do NOTDEF? value () to do NOTFOUND? 335 336sub assign { 337 my $self = shift; 338 my $args = shift; 339 my ($indent, $type, $pre, $post, $item) 340 = @{$args}{qw(indent type pre post item)}; 341 $post ||= ''; 342 my $clause; 343 my $close; 344 if ($pre) { 345 chomp $pre; 346 $close = "$indent}\n"; 347 $clause = $indent . "{\n"; 348 $indent .= " "; 349 $clause .= "$indent$pre"; 350 $clause .= ";" unless $pre =~ /;$/; 351 $clause .= "\n"; 352 } 353 confess "undef \$type" unless defined $type; 354 confess "Can't generate code for type $type" 355 unless $self->valid_type($type); 356 357 $clause .= join '', map {"$indent$_\n"} 358 $self->assignment_clause_for_type({type=>$type,item=>$item}, @_); 359 chomp $post; 360 if (length $post) { 361 $clause .= "$post"; 362 $clause .= ";" unless $post =~ /;$/; 363 $clause .= "\n"; 364 } 365 my $return = $self->return_statement_for_type($type); 366 $clause .= "$indent$return\n" if defined $return; 367 $clause .= $close if $close; 368 return $clause; 369} 370 371=item return_clause arg_hashref, ITEM 372 373A method to return a suitable C<#ifdef> clause. I<ITEM> is a hashref 374(as passed to C<C_constant> and C<match_clause>. I<indent> is the number 375of spaces to indent, defaulting to 6. 376 377=cut 378 379sub return_clause { 380 381##ifdef thingy 382# *iv_return = thingy; 383# return PERL_constant_ISIV; 384##else 385# return PERL_constant_NOTDEF; 386##endif 387 my ($self, $args, $item) = @_; 388 my $indent = $args->{indent}; 389 390 my ($name, $value, $default, $pre, $post, $def_pre, $def_post, $type) 391 = @$item{qw (name value default pre post def_pre def_post type)}; 392 $value = $name unless defined $value; 393 my $macro = $self->macro_from_item($item); 394 $indent = ' ' x ($indent || 6); 395 unless (defined $type) { 396 # use Data::Dumper; print STDERR Dumper ($item); 397 confess "undef \$type"; 398 } 399 400 ##ifdef thingy 401 my $clause = $self->macro_to_ifdef($macro); 402 403 # *iv_return = thingy; 404 # return PERL_constant_ISIV; 405 $clause 406 .= $self->assign ({indent=>$indent, type=>$type, pre=>$pre, post=>$post, 407 item=>$item}, ref $value ? @$value : $value); 408 409 if (defined $macro && $macro ne "" && $macro ne "1") { 410 ##else 411 $clause .= "#else\n"; 412 413 # return PERL_constant_NOTDEF; 414 if (!defined $default) { 415 my $notdef = $self->return_statement_for_notdef(); 416 $clause .= "$indent$notdef\n" if defined $notdef; 417 } else { 418 my @default = ref $default ? @$default : $default; 419 $type = shift @default; 420 $clause .= $self->assign ({indent=>$indent, type=>$type, pre=>$pre, 421 post=>$post, item=>$item}, @default); 422 } 423 } 424 ##endif 425 $clause .= $self->macro_to_endif($macro); 426 427 return $clause; 428} 429 430sub match_clause { 431 # $offset defined if we have checked an offset. 432 my ($self, $args, $item) = @_; 433 my ($offset, $indent) = @{$args}{qw(checked_at indent)}; 434 $indent = ' ' x ($indent || 4); 435 my $body = ''; 436 my ($no, $yes, $either, $name, $inner_indent); 437 if (ref $item eq 'ARRAY') { 438 ($yes, $no) = @$item; 439 $either = $yes || $no; 440 confess "$item is $either expecting hashref in [0] || [1]" 441 unless ref $either eq 'HASH'; 442 $name = $either->{name}; 443 } else { 444 confess "$item->{name} has utf8 flag '$item->{utf8}', should be false" 445 if $item->{utf8}; 446 $name = $item->{name}; 447 $inner_indent = $indent; 448 } 449 450 $body .= $self->memEQ_clause ({name => $name, checked_at => $offset, 451 indent => length $indent}); 452 # If we've been presented with an arrayref for $item, then the user string 453 # contains in the range 128-255, and we need to check whether it was utf8 454 # (or not). 455 # In the worst case we have two named constants, where one's name happens 456 # encoded in UTF8 happens to be the same byte sequence as the second's 457 # encoded in (say) ISO-8859-1. 458 # In this case, $yes and $no both have item hashrefs. 459 if ($yes) { 460 $body .= $indent . " if (" . $self->is_utf8_param . ") {\n"; 461 } elsif ($no) { 462 $body .= $indent . " if (!" . $self->is_utf8_param . ") {\n"; 463 } 464 if ($either) { 465 $body .= $self->return_clause ({indent=>4 + length $indent}, $either); 466 if ($yes and $no) { 467 $body .= $indent . " } else {\n"; 468 $body .= $self->return_clause ({indent=>4 + length $indent}, $no); 469 } 470 $body .= $indent . " }\n"; 471 } else { 472 $body .= $self->return_clause ({indent=>2 + length $indent}, $item); 473 } 474 $body .= $indent . "}\n"; 475} 476 477 478=item switch_clause arg_hashref, NAMELEN, ITEMHASH, ITEM... 479 480An internal method to generate a suitable C<switch> clause, called by 481C<C_constant> I<ITEM>s are in the hash ref format as given in the description 482of C<C_constant>, and must all have the names of the same length, given by 483I<NAMELEN>. I<ITEMHASH> is a reference to a hash, keyed by name, values being 484the hashrefs in the I<ITEM> list. (No parameters are modified, and there can 485be keys in the I<ITEMHASH> that are not in the list of I<ITEM>s without 486causing problems - the hash is passed in to save generating it afresh for 487each call). 488 489=cut 490 491sub switch_clause { 492 my ($self, $args, $namelen, $items, @items) = @_; 493 my ($indent, $comment) = @{$args}{qw(indent comment)}; 494 $indent = ' ' x ($indent || 2); 495 496 local $Text::Wrap::huge = 'overflow'; 497 local $Text::Wrap::columns = 80; 498 499 my @names = sort map {$_->{name}} @items; 500 my $leader = $indent . '/* '; 501 my $follower = ' ' x length $leader; 502 my $body = $indent . "/* Names all of length $namelen. */\n"; 503 if (defined $comment) { 504 $body = wrap ($leader, $follower, $comment) . "\n"; 505 $leader = $follower; 506 } 507 my @safe_names = @names; 508 foreach (@safe_names) { 509 confess sprintf "Name '$_' is length %d, not $namelen", length 510 unless length == $namelen; 511 # Argh. 5.6.1 512 # next unless tr/A-Za-z0-9_//c; 513 next if tr/A-Za-z0-9_// == length; 514 $_ = '"' . perl_stringify ($_) . '"'; 515 # Ensure that the enclosing C comment doesn't end 516 # by turning */ into *" . "/ 517 s!\*\/!\*"."/!gs; 518 # gcc -Wall doesn't like finding /* inside a comment 519 s!\/\*!/"."\*!gs; 520 } 521 $body .= wrap ($leader, $follower, join (" ", @safe_names) . " */") . "\n"; 522 # Figure out what to switch on. 523 # (RMS, Spread of jump table, Position, Hashref) 524 my @best = (1e38, ~0); 525 # Prefer the last character over the others. (As it lets us shorten the 526 # memEQ clause at no cost). 527 foreach my $i ($namelen - 1, 0 .. ($namelen - 2)) { 528 my ($min, $max) = (~0, 0); 529 my %spread; 530 if (is_perl56) { 531 # Need proper Unicode preserving hash keys for bytes in range 128-255 532 # here too, for some reason. grr 5.6.1 yet again. 533 tie %spread, 'ExtUtils::Constant::Aaargh56Hash'; 534 } 535 foreach (@names) { 536 my $char = substr $_, $i, 1; 537 my $ord = ord $char; 538 confess "char $ord is out of range" if $ord > 255; 539 $max = $ord if $ord > $max; 540 $min = $ord if $ord < $min; 541 push @{$spread{$char}}, $_; 542 # warn "$_ $char"; 543 } 544 # I'm going to pick the character to split on that minimises the root 545 # mean square of the number of names in each case. Normally this should 546 # be the one with the most keys, but it may pick a 7 where the 8 has 547 # one long linear search. I'm not sure if RMS or just sum of squares is 548 # actually better. 549 # $max and $min are for the tie-breaker if the root mean squares match. 550 # Assuming that the compiler may be building a jump table for the 551 # switch() then try to minimise the size of that jump table. 552 # Finally use < not <= so that if it still ties the earliest part of 553 # the string wins. Because if that passes but the memEQ fails, it may 554 # only need the start of the string to bin the choice. 555 # I think. But I'm micro-optimising. :-) 556 # OK. Trump that. Now favour the last character of the string, before the 557 # rest. 558 my $ss; 559 $ss += @$_ * @$_ foreach values %spread; 560 my $rms = sqrt ($ss / keys %spread); 561 if ($rms < $best[0] || ($rms == $best[0] && ($max - $min) < $best[1])) { 562 @best = ($rms, $max - $min, $i, \%spread); 563 } 564 } 565 confess "Internal error. Failed to pick a switch point for @names" 566 unless defined $best[2]; 567 # use Data::Dumper; print Dumper (@best); 568 my ($offset, $best) = @best[2,3]; 569 $body .= $indent . "/* Offset $offset gives the best switch position. */\n"; 570 571 my $do_front_chop = $offset == 0 && $namelen > 2; 572 if ($do_front_chop) { 573 $body .= $indent . "switch (*" . $self->name_param() . "++) {\n"; 574 } else { 575 $body .= $indent . "switch (" . $self->name_param() . "[$offset]) {\n"; 576 } 577 foreach my $char (sort keys %$best) { 578 confess sprintf "'$char' is %d bytes long, not 1", length $char 579 if length ($char) != 1; 580 confess sprintf "char %#X is out of range", ord $char if ord ($char) > 255; 581 $body .= $indent . "case '" . C_stringify ($char) . "':\n"; 582 foreach my $thisone (sort { 583 # Deal with the case of an item actually being an array ref to 1 or 2 584 # hashrefs. Don't assign to $a or $b, as they're aliases to the 585 # original 586 my $l = ref $a eq 'ARRAY' ? ($a->[0] || $->[1]) : $a; 587 my $r = ref $b eq 'ARRAY' ? ($b->[0] || $->[1]) : $b; 588 # Sort by weight first 589 ($r->{weight} || 0) <=> ($l->{weight} || 0) 590 # Sort equal weights by name 591 or $l->{name} cmp $r->{name}} 592 # If this looks evil, maybe it is. $items is a 593 # hashref, and we're doing a hash slice on it 594 @{$items}{@{$best->{$char}}}) { 595 # warn "You are here"; 596 if ($do_front_chop) { 597 $body .= $self->match_clause ({indent => 2 + length $indent, 598 checked_at => \$char}, $thisone); 599 } else { 600 $body .= $self->match_clause ({indent => 2 + length $indent, 601 checked_at => $offset}, $thisone); 602 } 603 } 604 $body .= $indent . " break;\n"; 605 } 606 $body .= $indent . "}\n"; 607 return $body; 608} 609 610sub C_constant_return_type { 611 "static int"; 612} 613 614sub C_constant_prefix_param { 615 ''; 616} 617 618sub C_constant_prefix_param_defintion { 619 ''; 620} 621 622sub name_param_definition { 623 "const char *" . $_[0]->name_param; 624} 625 626sub namelen_param { 627 'len'; 628} 629 630sub namelen_param_definition { 631 'size_t ' . $_[0]->namelen_param; 632} 633 634sub C_constant_other_params { 635 ''; 636} 637 638sub C_constant_other_params_defintion { 639 ''; 640} 641 642=item params WHAT 643 644An "internal" method, subject to change, currently called to allow an 645overriding class to cache information that will then be passed into all 646the C<*param*> calls. (Yes, having to read the source to make sense of this is 647considered a known bug). I<WHAT> is be a hashref of types the constant 648function will return. In ExtUtils::Constant::XS this method is used to 649returns a hashref keyed IV NV PV SV to show which combination of pointers will 650be needed in the C argument list generated by 651C_constant_other_params_definition and C_constant_other_params 652 653=cut 654 655sub params { 656 ''; 657} 658 659 660=item dogfood arg_hashref, ITEM... 661 662An internal function to generate the embedded perl code that will regenerate 663the constant subroutines. Parameters are the same as for C_constant. 664 665Currently the base class does nothing and returns an empty string. 666 667=cut 668 669sub dogfood { 670 '' 671} 672 673=item normalise_items args, default_type, seen_types, seen_items, ITEM... 674 675Convert the items to a normalised form. For 8 bit and Unicode values converts 676the item to an array of 1 or 2 items, both 8 bit and UTF-8 encoded. 677 678=cut 679 680sub normalise_items 681{ 682 my $self = shift; 683 my $args = shift; 684 my $default_type = shift; 685 my $what = shift; 686 my $items = shift; 687 my @new_items; 688 foreach my $orig (@_) { 689 my ($name, $item); 690 if (ref $orig) { 691 # Make a copy which is a normalised version of the ref passed in. 692 $name = $orig->{name}; 693 my ($type, $macro, $value) = @$orig{qw (type macro value)}; 694 $type ||= $default_type; 695 $what->{$type} = 1; 696 $item = {name=>$name, type=>$type}; 697 698 undef $macro if defined $macro and $macro eq $name; 699 $item->{macro} = $macro if defined $macro; 700 undef $value if defined $value and $value eq $name; 701 $item->{value} = $value if defined $value; 702 foreach my $key (qw(default pre post def_pre def_post weight 703 not_constant)) { 704 my $value = $orig->{$key}; 705 $item->{$key} = $value if defined $value; 706 # warn "$key $value"; 707 } 708 } else { 709 $name = $orig; 710 $item = {name=>$name, type=>$default_type}; 711 $what->{$default_type} = 1; 712 } 713 warn +(ref ($self) || $self) 714 . "doesn't know how to handle values of type $_ used in macro $name" 715 unless $self->valid_type ($item->{type}); 716 # tr///c is broken on 5.6.1 for utf8, so my original tr/\0-\177//c 717 # doesn't work. Upgrade to 5.8 718 # if ($name !~ tr/\0-\177//c || $] < 5.005_50) { 719 if ($name !~ /[[:^ascii:]]/ || $] < 5.005_50 720 || $args->{disable_utf8_duplication}) { 721 # No characters outside 7 bit ASCII. 722 if (exists $items->{$name}) { 723 die "Multiple definitions for macro $name"; 724 } 725 $items->{$name} = $item; 726 } else { 727 # No characters outside 8 bit. This is hardest. 728 if (exists $items->{$name} and ref $items->{$name} ne 'ARRAY') { 729 confess "Unexpected ASCII definition for macro $name"; 730 } 731 # Again, 5.6.1 tr broken, so s/5\.6.*/5\.8\.0/; 732 # if ($name !~ tr/\0-\377//c) { 733 if ($name =~ tr/\0-\377// == length $name) { 734# if ($] < 5.007) { 735# $name = pack "C*", unpack "U*", $name; 736# } 737 $item->{utf8} = 'no'; 738 $items->{$name}[1] = $item; 739 push @new_items, $item; 740 # Copy item, to create the utf8 variant. 741 $item = {%$item}; 742 } 743 # Encode the name as utf8 bytes. 744 unless (is_perl56) { 745 utf8::encode($name); 746 } else { 747# warn "Was >$name< " . length ${name}; 748 $name = pack 'C*', unpack 'C*', $name . pack 'U*'; 749# warn "Now '${name}' " . length ${name}; 750 } 751 if ($items->{$name}[0]) { 752 die "Multiple definitions for macro $name"; 753 } 754 $item->{utf8} = 'yes'; 755 $item->{name} = $name; 756 $items->{$name}[0] = $item; 757 # We have need for the utf8 flag. 758 $what->{''} = 1; 759 } 760 push @new_items, $item; 761 } 762 @new_items; 763} 764 765=item C_constant arg_hashref, ITEM... 766 767A function that returns a B<list> of C subroutine definitions that return 768the value and type of constants when passed the name by the XS wrapper. 769I<ITEM...> gives a list of constant names. Each can either be a string, 770which is taken as a C macro name, or a reference to a hash with the following 771keys 772 773=over 8 774 775=item name 776 777The name of the constant, as seen by the perl code. 778 779=item type 780 781The type of the constant (I<IV>, I<NV> etc) 782 783=item value 784 785A C expression for the value of the constant, or a list of C expressions if 786the type is aggregate. This defaults to the I<name> if not given. 787 788=item macro 789 790The C pre-processor macro to use in the C<#ifdef>. This defaults to the 791I<name>, and is mainly used if I<value> is an C<enum>. If a reference an 792array is passed then the first element is used in place of the C<#ifdef> 793line, and the second element in place of the C<#endif>. This allows 794pre-processor constructions such as 795 796 #if defined (foo) 797 #if !defined (bar) 798 ... 799 #endif 800 #endif 801 802to be used to determine if a constant is to be defined. 803 804A "macro" 1 signals that the constant is always defined, so the C<#if>/C<#endif> 805test is omitted. 806 807=item default 808 809Default value to use (instead of C<croak>ing with "your vendor has not 810defined...") to return if the macro isn't defined. Specify a reference to 811an array with type followed by value(s). 812 813=item pre 814 815C code to use before the assignment of the value of the constant. This allows 816you to use temporary variables to extract a value from part of a C<struct> 817and return this as I<value>. This C code is places at the start of a block, 818so you can declare variables in it. 819 820=item post 821 822C code to place between the assignment of value (to a temporary) and the 823return from the function. This allows you to clear up anything in I<pre>. 824Rarely needed. 825 826=item def_pre 827 828=item def_post 829 830Equivalents of I<pre> and I<post> for the default value. 831 832=item utf8 833 834Generated internally. Is zero or undefined if name is 7 bit ASCII, 835"no" if the name is 8 bit (and so should only match if SvUTF8() is false), 836"yes" if the name is utf8 encoded. 837 838The internals automatically clone any name with characters 128-255 but none 839256+ (ie one that could be either in bytes or utf8) into a second entry 840which is utf8 encoded. 841 842=item weight 843 844Optional sorting weight for names, to determine the order of 845linear testing when multiple names fall in the same case of a switch clause. 846Higher comes earlier, undefined defaults to zero. 847 848=back 849 850In the argument hashref, I<package> is the name of the package, and is only 851used in comments inside the generated C code. I<subname> defaults to 852C<constant> if undefined. 853 854I<default_type> is the type returned by C<ITEM>s that don't specify their 855type. It defaults to the value of C<default_type()>. I<types> should be given 856either as a comma separated list of types that the C subroutine I<subname> 857will generate or as a reference to a hash. I<default_type> will be added to 858the list if not present, as will any types given in the list of I<ITEM>s. The 859resultant list should be the same list of types that C<XS_constant> is 860given. [Otherwise C<XS_constant> and C<C_constant> may differ in the number of 861parameters to the constant function. I<indent> is currently unused and 862ignored. In future it may be used to pass in information used to change the C 863indentation style used.] The best way to maintain consistency is to pass in a 864hash reference and let this function update it. 865 866I<breakout> governs when child functions of I<subname> are generated. If there 867are I<breakout> or more I<ITEM>s with the same length of name, then the code 868to switch between them is placed into a function named I<subname>_I<len>, for 869example C<constant_5> for names 5 characters long. The default I<breakout> is 8703. A single C<ITEM> is always inlined. 871 872=cut 873 874# The parameter now BREAKOUT was previously documented as: 875# 876# I<NAMELEN> if defined signals that all the I<name>s of the I<ITEM>s are of 877# this length, and that the constant name passed in by perl is checked and 878# also of this length. It is used during recursion, and should be C<undef> 879# unless the caller has checked all the lengths during code generation, and 880# the generated subroutine is only to be called with a name of this length. 881# 882# As you can see it now performs this function during recursion by being a 883# scalar reference. 884 885sub C_constant { 886 my ($self, $args, @items) = @_; 887 my ($package, $subname, $default_type, $what, $indent, $breakout) = 888 @{$args}{qw(package subname default_type types indent breakout)}; 889 $package ||= 'Foo'; 890 $subname ||= 'constant'; 891 # I'm not using this. But a hashref could be used for full formatting without 892 # breaking this API 893 # $indent ||= 0; 894 895 my ($namelen, $items); 896 if (ref $breakout) { 897 # We are called recursively. We trust @items to be normalised, $what to 898 # be a hashref, and pinch %$items from our parent to save recalculation. 899 ($namelen, $items) = @$breakout; 900 } else { 901 $items = {}; 902 if (is_perl56) { 903 # Need proper Unicode preserving hash keys. 904 require ExtUtils::Constant::Aaargh56Hash; 905 tie %$items, 'ExtUtils::Constant::Aaargh56Hash'; 906 } 907 $breakout ||= 3; 908 $default_type ||= $self->default_type(); 909 if (!ref $what) { 910 # Convert line of the form IV,UV,NV to hash 911 $what = {map {$_ => 1} split /,\s*/, ($what || '')}; 912 # Figure out what types we're dealing with, and assign all unknowns to the 913 # default type 914 } 915 @items = $self->normalise_items ({}, $default_type, $what, $items, @items); 916 # use Data::Dumper; print Dumper @items; 917 } 918 my $params = $self->params ($what); 919 920 # Probably "static int" 921 my ($body, @subs); 922 $body = $self->C_constant_return_type($params) . "\n$subname (" 923 # Eg "pTHX_ " 924 . $self->C_constant_prefix_param_defintion($params) 925 # Probably "const char *name" 926 . $self->name_param_definition($params); 927 # Something like ", STRLEN len" 928 $body .= ", " . $self->namelen_param_definition($params) 929 unless defined $namelen; 930 $body .= $self->C_constant_other_params_defintion($params); 931 $body .= ") {\n"; 932 933 if (defined $namelen) { 934 # We are a child subroutine. Print the simple description 935 my $comment = 'When generated this function returned values for the list' 936 . ' of names given here. However, subsequent manual editing may have' 937 . ' added or removed some.'; 938 $body .= $self->switch_clause ({indent=>2, comment=>$comment}, 939 $namelen, $items, @items); 940 } else { 941 # We are the top level. 942 $body .= " /* Initially switch on the length of the name. */\n"; 943 $body .= $self->dogfood ({package => $package, subname => $subname, 944 default_type => $default_type, what => $what, 945 indent => $indent, breakout => $breakout}, 946 @items); 947 $body .= ' switch ('.$self->namelen_param().") {\n"; 948 # Need to group names of the same length 949 my @by_length; 950 foreach (@items) { 951 push @{$by_length[length $_->{name}]}, $_; 952 } 953 foreach my $i (0 .. $#by_length) { 954 next unless $by_length[$i]; # None of this length 955 $body .= " case $i:\n"; 956 if (@{$by_length[$i]} == 1) { 957 my $only_thing = $by_length[$i]->[0]; 958 if ($only_thing->{utf8}) { 959 if ($only_thing->{utf8} eq 'yes') { 960 # With utf8 on flag item is passed in element 0 961 $body .= $self->match_clause (undef, [$only_thing]); 962 } else { 963 # With utf8 off flag item is passed in element 1 964 $body .= $self->match_clause (undef, [undef, $only_thing]); 965 } 966 } else { 967 $body .= $self->match_clause (undef, $only_thing); 968 } 969 } elsif (@{$by_length[$i]} < $breakout) { 970 $body .= $self->switch_clause ({indent=>4}, 971 $i, $items, @{$by_length[$i]}); 972 } else { 973 # Only use the minimal set of parameters actually needed by the types 974 # of the names of this length. 975 my $what = {}; 976 foreach (@{$by_length[$i]}) { 977 $what->{$_->{type}} = 1; 978 $what->{''} = 1 if $_->{utf8}; 979 } 980 $params = $self->params ($what); 981 push @subs, $self->C_constant ({package=>$package, 982 subname=>"${subname}_$i", 983 default_type => $default_type, 984 types => $what, indent => $indent, 985 breakout => [$i, $items]}, 986 @{$by_length[$i]}); 987 $body .= " return ${subname}_$i (" 988 # Eg "aTHX_ " 989 . $self->C_constant_prefix_param($params) 990 # Probably "name" 991 . $self->name_param($params); 992 $body .= $self->C_constant_other_params($params); 993 $body .= ");\n"; 994 } 995 $body .= " break;\n"; 996 } 997 $body .= " }\n"; 998 } 999 my $notfound = $self->return_statement_for_notfound(); 1000 $body .= " $notfound\n" if $notfound; 1001 $body .= "}\n"; 1002 return (@subs, $body); 1003} 1004 10051; 1006__END__ 1007 1008=back 1009 1010=head1 BUGS 1011 1012Not everything is documented yet. 1013 1014Probably others. 1015 1016=head1 AUTHOR 1017 1018Nicholas Clark <nick@ccl4.org> based on the code in C<h2xs> by Larry Wall and 1019others 1020