1#!/usr/bin/perl -w 2# 3# 4# Regenerate (overwriting only if changed): 5# 6# pod/perldebguts.pod 7# regnodes.h 8# 9# from information stored in 10# 11# regcomp.sym 12# regexp.h 13# 14# pod/perldebguts.pod is not completely regenerated. Only the table of 15# regexp nodes is replaced; other parts remain unchanged. 16# 17# Accepts the standard regen_lib -q and -v args. 18# 19# This script is normally invoked from regen.pl. 20 21BEGIN { 22 # Get function prototypes 23 require './regen/regen_lib.pl'; 24} 25 26use strict; 27 28# NOTE I don't think anyone actually knows what all of these properties mean, 29# and I suspect some of them are outright unused. This is a first attempt to 30# clean up the generation so maybe one day we can move to something more self 31# documenting. (One might argue that an array of hashes of properties would 32# be easier to use.) 33# 34# Why we use the term regnode and nodes, and not say, opcodes, I am not sure. 35 36# General thoughts: 37# 1. We use a single continuum to represent both opcodes and states, 38# and in regexec.c we switch on the combined set. 39# 2. Opcodes have more information associated to them, states are simpler, 40# basically just an identifier/number that can be used to switch within 41# the state machine. 42# 3. Some opcode are order dependent. 43# 4. Output files often use "tricks" to reduce diff effects. Some of what 44# we do below is more clumsy looking than it could be because of this. 45 46# Op/state properties: 47# 48# Property In Descr 49# ---------------------------------------------------------------------------- 50# name Both Name of op/state 51# id Both integer value for this opcode/state 52# optype Both Either 'op' or 'state' 53# line_num Both line_num number of the input file for this item. 54# type Op Type of node (aka regkind) 55# code Op Apparently not used 56# suffix Op which regnode struct this uses, so if this is '1', it 57# uses 'struct regnode_1' 58# flags Op S for simple; V for varies 59# longj Op Boolean as to if this node is a longjump 60# comment Both Comment about node, if any. Placed in perlredebguts 61# as its description 62# pod_comment Both Special comments for pod output (preceding lines in def) 63# Such lines begin with '#*' 64 65# Global State 66my @all; # all opcodes/state 67my %all; # hash of all opcode/state names 68 69my @ops; # array of just opcodes 70my @states; # array of just states 71 72my $longest_name_length= 0; # track lengths of names for nicer reports 73my (%type_alias); # map the type (??) 74 75# register a newly constructed node into our state tables. 76# ensures that we have no name collisions (on name anyway), 77# and issues the "id" for the node. 78sub register_node { 79 my ($node)= @_; 80 81 if ( $all{ $node->{name} } ) { 82 die "Duplicate item '$node->{name}' in regcomp.sym line $node->{line_num} " 83 . "previously defined on line $all{ $node->{name} }{line_num}\n"; 84 } elsif (!$node->{optype}) { 85 die "must have an optype in node ", Dumper($node); 86 } elsif ($node->{optype} eq "op") { 87 push @ops, $node; 88 } elsif ($node->{optype} eq "state") { 89 push @states, $node; 90 } else { 91 die "Uknown optype '$node->{optype}' in ", Dumper($node); 92 } 93 $node->{id}= 0 + @all; 94 push @all, $node; 95 $all{ $node->{name} }= $node; 96 97 if ($node->{longj} && $node->{longj} != 1) { 98 die "longj field must be in [01] if present in ", Dumper($node); 99 } 100 101} 102 103# Parse and add an opcode definition to the global state. 104# What an opcode definition looks like is given in regcomp.sym. 105# 106# Not every opcode definition has all of the components. We should maybe make 107# this nicer/easier to read in the future. Also note that the above is tab 108# sensitive. 109 110# Special comments for an entry precede it, and begin with '#*' and are placed 111# in the generated pod file just before the entry. 112 113sub parse_opcode_def { 114 my ( $text, $line_num, $pod_comment )= @_; 115 my $node= { 116 line_num => $line_num, 117 pod_comment => $pod_comment, 118 optype => "op", 119 }; 120 121 # first split the line into three, the initial NAME, a middle part 122 # that we call "desc" which contains various (not well documented) things, 123 # and a comment section. 124 @{$node}{qw(name desc comment)}= /^(\S+)\s+([^\t]+?)\s*;\s*(.*)/ 125 or die "Failed to match $_"; 126 127 # the content of the "desc" field from the first step is extracted here: 128 @{$node}{qw(type code suffix flags longj)}= split /[,\s]\s*/, $node->{desc}; 129 130 defined $node->{$_} or $node->{$_} = "" 131 for qw(type code suffix flags longj); 132 133 register_node($node); # has to be before the type_alias code below 134 135 if ( !$all{ $node->{type} } and !$type_alias{ $node->{type} } ) { 136 137 #warn "Regop type '$node->{type}' from regcomp.sym line $line_num" 138 # ." is not an existing regop, and will be aliased to $node->{name}\n" 139 # if -t STDERR; 140 $type_alias{ $node->{type} }= $node->{name}; 141 } 142 143 $longest_name_length= length $node->{name} 144 if length $node->{name} > $longest_name_length; 145} 146 147# parse out a state definition and add the resulting data 148# into the global state. may create multiple new states from 149# a single definition (this is part of the point). 150# Format for states: 151# REGOP \t typelist [ \t typelist] 152# typelist= namelist 153# = namelist:FAIL 154# = name:count 155# Eg: 156# WHILEM A_pre,A_min,A_max,B_min,B_max:FAIL 157# BRANCH next:FAIL 158# CURLYM A,B:FAIL 159# 160# The CURLYM definition would create the states: 161# CURLYM_A, CURLYM_A_fail, CURLYM_B, CURLYM_B_fail 162sub parse_state_def { 163 my ( $text, $line_num, $pod_comment )= @_; 164 my ( $type, @lists )= split /\s+/, $text; 165 die "No list? $type" if !@lists; 166 foreach my $list (@lists) { 167 my ( $names, $special )= split /:/, $list, 2; 168 $special ||= ""; 169 foreach my $name ( split /,/, $names ) { 170 my $real= 171 $name eq 'resume' 172 ? "resume_$type" 173 : "${type}_$name"; 174 my @suffix; 175 if ( !$special ) { 176 @suffix= (""); 177 } 178 elsif ( $special =~ /\d/ ) { 179 @suffix= ( 1 .. $special ); 180 } 181 elsif ( $special eq 'FAIL' ) { 182 @suffix= ( "", "_fail" ); 183 } 184 else { 185 die "unknown :type ':$special'"; 186 } 187 foreach my $suffix (@suffix) { 188 my $node= { 189 name => "$real$suffix", 190 optype => "state", 191 type => $type || "", 192 comment => "state for $type", 193 line_num => $line_num, 194 }; 195 register_node($node); 196 } 197 } 198 } 199} 200 201sub process_flags { 202 my ( $flag, $varname, $comment )= @_; 203 $comment= '' unless defined $comment; 204 205 my @selected; 206 my $bitmap= ''; 207 for my $node (@ops) { 208 my $set= $node->{flags} && $node->{flags} eq $flag ? 1 : 0; 209 210 # Whilst I could do this with vec, I'd prefer to do longhand the arithmetic 211 # ops in the C code. 212 my $current= do { 213 no warnings; 214 ord substr $bitmap, ( $node->{id} >> 3 ); 215 }; 216 substr( $bitmap, ( $node->{id} >> 3 ), 1 )= 217 chr( $current | ( $set << ( $node->{id} & 7 ) ) ); 218 219 push @selected, $node->{name} if $set; 220 } 221 my $out_string= join ', ', @selected, 0; 222 $out_string =~ s/(.{1,70},) /$1\n /g; 223 224 my $out_mask= join ', ', map { sprintf "0x%02X", ord $_ } split '', $bitmap; 225 226 return $comment . <<"EOP"; 227#define REGNODE_\U$varname\E(node) (PL_${varname}_bitmask[(node) >> 3] & (1 << ((node) & 7))) 228 229#ifndef DOINIT 230EXTCONST U8 PL_${varname}\[] __attribute__deprecated__; 231#else 232EXTCONST U8 PL_${varname}\[] __attribute__deprecated__ = { 233 $out_string 234}; 235#endif /* DOINIT */ 236 237#ifndef DOINIT 238EXTCONST U8 PL_${varname}_bitmask[]; 239#else 240EXTCONST U8 PL_${varname}_bitmask[] = { 241 $out_mask 242}; 243#endif /* DOINIT */ 244EOP 245} 246 247sub print_process_EXACTish { 248 my ($out)= @_; 249 250 # Creates some bitmaps for EXACTish nodes. 251 252 my @folded; 253 my @req8; 254 255 my $base; 256 for my $node (@ops) { 257 next unless $node->{type} eq 'EXACT'; 258 my $name = $node->{name}; 259 $base = $node->{id} if $name eq 'EXACT'; 260 261 my $index = $node->{id} - $base; 262 263 # This depends entirely on naming conventions in regcomp.sym 264 $folded[$index] = $name =~ /^EXACTF/ || 0; 265 $req8[$index] = $name =~ /8/ || 0; 266 } 267 268 die "Can't cope with > 32 EXACTish nodes" if @folded > 32; 269 270 my $exactf = sprintf "%X", oct("0b" . join "", reverse @folded); 271 my $req8 = sprintf "%X", oct("0b" . join "", reverse @req8); 272 print $out <<EOP, 273 274/* Is 'op', known to be of type EXACT, folding? */ 275#define isEXACTFish(op) (__ASSERT_(PL_regkind[op] == EXACT) (PL_EXACTFish_bitmask & (1U << (op - EXACT)))) 276 277/* Do only UTF-8 target strings match 'op', known to be of type EXACT? */ 278#define isEXACT_REQ8(op) (__ASSERT_(PL_regkind[op] == EXACT) (PL_EXACT_REQ8_bitmask & (1U << (op - EXACT)))) 279 280#ifndef DOINIT 281EXTCONST U32 PL_EXACTFish_bitmask; 282EXTCONST U32 PL_EXACT_REQ8_bitmask; 283#else 284EXTCONST U32 PL_EXACTFish_bitmask = 0x$exactf; 285EXTCONST U32 PL_EXACT_REQ8_bitmask = 0x$req8; 286#endif /* DOINIT */ 287EOP 288} 289 290sub read_definition { 291 my ( $file )= @_; 292 my ( $seen_sep, $pod_comment )= ""; 293 open my $in_fh, "<", $file 294 or die "Failed to open '$file' for reading: $!"; 295 while (<$in_fh>) { 296 297 # Special pod comments 298 if (/^#\* ?/) { $pod_comment .= "# $'"; } 299 300 # Truly blank lines possibly surrounding pod comments 301 elsif (/^\s*$/) { $pod_comment .= "\n" } 302 303 next if /\A\s*#/ || /\A\s*\z/; 304 305 s/\s*\z//; 306 if (/^-+\s*$/) { 307 $seen_sep= 1; 308 next; 309 } 310 311 if ($seen_sep) { 312 parse_state_def( $_, $., $pod_comment ); 313 } 314 else { 315 parse_opcode_def( $_, $., $pod_comment ); 316 } 317 $pod_comment= ""; 318 } 319 close $in_fh; 320 die "Too many regexp/state opcodes! Maximum is 256, but there are ", 0 + @all, 321 " in file!" 322 if @all > 256; 323} 324 325# use fixed width to keep the diffs between regcomp.pl recompiles 326# as small as possible. 327my ( $base_name_width, $rwidth, $twidth )= ( 22, 12, 9 ); 328 329sub print_state_defs { 330 my ($out)= @_; 331 printf $out <<EOP, 332/* Regops and State definitions */ 333 334#define %*s\t%d 335#define %*s\t%d 336 337EOP 338 -$base_name_width, 339 REGNODE_MAX => $#ops, 340 -$base_name_width, REGMATCH_STATE_MAX => $#all; 341 342 my %rev_type_alias= reverse %type_alias; 343 my $base_format = "#define %*s\t%d\t/* %#04x %s */\n"; 344 my @withs; 345 my $in_states = 0; 346 347 my $max_name_width = 0; 348 for my $ref (\@ops, \@states) { 349 for my $node ($ref->@*) { 350 my $len = length $node->{name}; 351 $max_name_width = $len if $max_name_width < $len; 352 } 353 } 354 355 die "Do a white-space only commit to increase \$base_name_width to" 356 . " $max_name_width; then re-run" if $base_name_width < $max_name_width; 357 358 print $out <<EOT; 359/* -- For regexec.c to switch on target being utf8 (t8) or not (tb, b='byte'); */ 360#define with_t_UTF8ness(op, t_utf8) (((op) << 1) + (cBOOL(t_utf8))) 361/* -- same, but also with pattern (p8, pb) -- */ 362#define with_tp_UTF8ness(op, t_utf8, p_utf8) \\ 363\t\t(((op) << 2) + (cBOOL(t_utf8) << 1) + cBOOL(p_utf8)) 364 365/* The #defines below give both the basic regnode and the expanded version for 366 switching on utf8ness */ 367EOT 368 369 for my $node (@ops) { 370 print_state_def_line($out, $node->{name}, $node->{id}, $node->{comment}); 371 if ( defined( my $alias= $rev_type_alias{ $node->{name} } ) ) { 372 print_state_def_line($out, $alias, $node->{id}, $node->{comment}); 373 } 374 } 375 376 print $out "\t/* ------------ States ------------- */\n"; 377 for my $node (@states) { 378 print_state_def_line($out, $node->{name}, $node->{id}, $node->{comment}); 379 } 380} 381 382sub print_state_def_line 383{ 384 my ($fh, $name, $id, $comment) = @_; 385 386 # The sub-names are like '_tb' or '_tb_p8' = max 6 chars wide 387 my $name_col_width = $base_name_width + 6; 388 my $base_id_width = 3; # Max is '255' or 3 cols 389 my $mid_id_width = 3; # Max is '511' or 3 cols 390 my $full_id_width = 3; # Max is '1023' but not close to using the 4th 391 392 my $line = "#define " . $name; 393 $line .= " " x ($name_col_width - length($name)); 394 395 $line .= sprintf "%*s", $base_id_width, $id; 396 $line .= " " x $mid_id_width; 397 $line .= " " x ($full_id_width + 2); 398 399 $line .= "/* "; 400 my $hanging = length $line; # Indent any subsequent line to this pos 401 $line .= sprintf "0x%02x", $id; 402 403 my $columns = 78; 404 405 # From the documentation: 'In fact, every resulting line will have length 406 # of no more than "$columns - 1"' 407 $line = wrap($columns + 1, "", " " x $hanging, "$line $comment"); 408 chomp $line; # wrap always adds a trailing \n 409 $line =~ s/ \s+ $ //x; # trim, just in case. 410 411 # The comment may have wrapped. Find the final \n and measure the length 412 # to the end. If it is short enough, just append the ' */' to the line. 413 # If it is too close to the end of the space available, add an extra line 414 # that consists solely of blanks and the ' */' 415 my $len = length($line); my $rindex = rindex($line, "\n"); 416 if (length($line) - rindex($line, "\n") - 1 <= $columns - 3) { 417 $line .= " */\n"; 418 } 419 else { 420 $line .= "\n" . " " x ($hanging - 3) . "*/\n"; 421 } 422 423 print $fh $line; 424 425 # And add the 2 subsidiary #defines used when switching on 426 # with_t_UTF8nes() 427 my $with_id_t = $id * 2; 428 for my $with (qw(tb t8)) { 429 my $with_name = "${name}_$with"; 430 print $fh "#define ", $with_name; 431 print $fh " " x ($name_col_width - length($with_name) + $base_id_width); 432 printf $fh "%*s", $mid_id_width, $with_id_t; 433 print $fh " " x $full_id_width; 434 printf $fh " /*"; 435 print $fh " " x (4 + 2); # 4 is width of 0xHH that the base entry uses 436 printf $fh "0x%03x */\n", $with_id_t; 437 438 $with_id_t++; 439 } 440 441 # Finally add the 4 subsidiary #defines used when switching on 442 # with_tp_UTF8nes() 443 my $with_id_tp = $id * 4; 444 for my $with (qw(tb_pb tb_p8 t8_pb t8_p8)) { 445 my $with_name = "${name}_$with"; 446 print $fh "#define ", $with_name; 447 print $fh " " x ($name_col_width - length($with_name) + $base_id_width + $mid_id_width); 448 printf $fh "%*s", $full_id_width, $with_id_tp; 449 printf $fh " /*"; 450 print $fh " " x (4 + 2); # 4 is width of 0xHH that the base entry uses 451 printf $fh "0x%03x */\n", $with_id_tp; 452 453 $with_id_tp++; 454 } 455 456 print $fh "\n"; # Blank line separates groups for clarity 457} 458 459sub print_regkind { 460 my ($out)= @_; 461 print $out <<EOP; 462 463/* PL_regkind[] What type of regop or state is this. */ 464 465#ifndef DOINIT 466EXTCONST U8 PL_regkind[]; 467#else 468EXTCONST U8 PL_regkind[] = { 469EOP 470 use Data::Dumper; 471 foreach my $node (@all) { 472 print Dumper($node) if !defined $node->{type} or !defined( $node->{name} ); 473 printf $out "\t%*s\t/* %*s */\n", 474 -1 - $twidth, "$node->{type},", -$base_name_width, $node->{name}; 475 print $out "\t/* ------------ States ------------- */\n" 476 if $node->{id} == $#ops and $node->{id} != $#all; 477 } 478 479 print $out <<EOP; 480}; 481#endif 482EOP 483} 484 485sub wrap_ifdef_print { 486 my $out= shift; 487 my $token= shift; 488 print $out <<EOP; 489 490#ifdef $token 491EOP 492 $_->($out) for @_; 493 print $out <<EOP; 494#endif /* $token */ 495 496EOP 497} 498 499sub print_regarglen { 500 my ($out)= @_; 501 print $out <<EOP; 502 503/* regarglen[] - How large is the argument part of the node (in regnodes) */ 504 505static const U8 regarglen[] = { 506EOP 507 508 foreach my $node (@ops) { 509 my $size= 0; 510 $size= "EXTRA_SIZE(struct regnode_$node->{suffix})" if $node->{suffix}; 511 512 printf $out "\t%*s\t/* %*s */\n", -37, "$size,", -$rwidth, $node->{name}; 513 } 514 515 print $out <<EOP; 516}; 517EOP 518} 519 520sub print_reg_off_by_arg { 521 my ($out)= @_; 522 print $out <<EOP; 523 524/* reg_off_by_arg[] - Which argument holds the offset to the next node */ 525 526static const char reg_off_by_arg[] = { 527EOP 528 529 foreach my $node (@ops) { 530 my $size= $node->{longj} || 0; 531 532 printf $out "\t%d,\t/* %*s */\n", $size, -$rwidth, $node->{name}; 533 } 534 535 print $out <<EOP; 536}; 537 538EOP 539} 540 541sub print_reg_name { 542 my ($out)= @_; 543 print $out <<EOP; 544 545/* reg_name[] - Opcode/state names in string form, for debugging */ 546 547#ifndef DOINIT 548EXTCONST char * PL_reg_name[]; 549#else 550EXTCONST char * const PL_reg_name[] = { 551EOP 552 553 my $ofs= 0; 554 my $sym= ""; 555 foreach my $node (@all) { 556 my $size= $node->{longj} || 0; 557 558 printf $out "\t%*s\t/* $sym%#04x */\n", 559 -3 - $base_name_width, qq("$node->{name}",), $node->{id} - $ofs; 560 if ( $node->{id} == $#ops and @ops != @all ) { 561 print $out "\t/* ------------ States ------------- */\n"; 562 $ofs= $#ops; 563 $sym= 'REGNODE_MAX +'; 564 } 565 } 566 567 print $out <<EOP; 568}; 569#endif /* DOINIT */ 570 571EOP 572} 573 574sub print_reg_extflags_name { 575 my ($out)= @_; 576 print $out <<EOP; 577/* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */ 578 579#ifndef DOINIT 580EXTCONST char * PL_reg_extflags_name[]; 581#else 582EXTCONST char * const PL_reg_extflags_name[] = { 583EOP 584 585 my %rxfv; 586 my %definitions; # Remember what the symbol definitions are 587 my $val= 0; 588 my %reverse; 589 my $REG_EXTFLAGS_NAME_SIZE= 0; 590 foreach my $file ( "op_reg_common.h", "regexp.h" ) { 591 open my $in_fh, "<", $file or die "Can't read '$file': $!"; 592 while (<$in_fh>) { 593 594 # optional leading '_'. Return symbol in $1, and strip it from 595 # comment of line. Currently doesn't handle comments running onto 596 # next line 597 if (s/^ \# \s* define \s+ ( _? RXf_ \w+ ) \s+ //xi) { 598 chomp; 599 my $define= $1; 600 my $orig= $_; 601 s{ /\* .*? \*/ }{ }x; # Replace comments by a blank 602 603 # Replace any prior defined symbols by their values 604 foreach my $key ( keys %definitions ) { 605 s/\b$key\b/$definitions{$key}/g; 606 } 607 608 # Remove the U suffix from unsigned int literals 609 s/\b([0-9]+)U\b/$1/g; 610 611 my $newval= eval $_; # Get numeric definition 612 613 $definitions{$define}= $newval; 614 615 next unless $_ =~ /<</; # Bit defines use left shift 616 if ( $val & $newval ) { 617 my @names= ( $define, $reverse{$newval} ); 618 s/PMf_// for @names; 619 if ( $names[0] ne $names[1] ) { 620 die sprintf 621 "ERROR: both $define and $reverse{$newval} use 0x%08X (%s:%s)", 622 $newval, $orig, $_; 623 } 624 next; 625 } 626 $val |= $newval; 627 $rxfv{$define}= $newval; 628 $reverse{$newval}= $define; 629 } 630 } 631 } 632 my %vrxf= reverse %rxfv; 633 printf $out "\t/* Bits in extflags defined: %s */\n", unpack 'B*', pack 'N', 634 $val; 635 my %multibits; 636 for ( 0 .. 31 ) { 637 my $power_of_2= 2**$_; 638 my $n= $vrxf{$power_of_2}; 639 my $extra= ""; 640 if ( !$n ) { 641 642 # Here, there was no name that matched exactly the bit. It could be 643 # either that it is unused, or the name matches multiple bits. 644 if ( !( $val & $power_of_2 ) ) { 645 $n= "UNUSED_BIT_$_"; 646 } 647 else { 648 649 # Here, must be because it matches multiple bits. Look through 650 # all possibilities until find one that matches this one. Use 651 # that name, and all the bits it matches 652 foreach my $name ( keys %rxfv ) { 653 if ( $rxfv{$name} & $power_of_2 ) { 654 $n= $name . ( $multibits{$name}++ ); 655 $extra= sprintf qq{ : "%s" - 0x%08x}, $name, 656 $rxfv{$name} 657 if $power_of_2 != $rxfv{$name}; 658 last; 659 } 660 } 661 } 662 } 663 s/\bRXf_(PMf_)?// for $n, $extra; 664 printf $out qq(\t%-20s/* 0x%08x%s */\n), qq("$n",), $power_of_2, $extra; 665 $REG_EXTFLAGS_NAME_SIZE++; 666 } 667 668 print $out <<EOP; 669}; 670#endif /* DOINIT */ 671 672#ifdef DEBUGGING 673# define REG_EXTFLAGS_NAME_SIZE $REG_EXTFLAGS_NAME_SIZE 674#endif 675EOP 676 677} 678 679sub print_reg_intflags_name { 680 my ($out)= @_; 681 print $out <<EOP; 682 683/* PL_reg_intflags_name[] - Opcode/state names in string form, for debugging */ 684 685#ifndef DOINIT 686EXTCONST char * PL_reg_intflags_name[]; 687#else 688EXTCONST char * const PL_reg_intflags_name[] = { 689EOP 690 691 my %rxfv; 692 my %definitions; # Remember what the symbol definitions are 693 my $val= 0; 694 my %reverse; 695 my $REG_INTFLAGS_NAME_SIZE= 0; 696 foreach my $file ("regcomp.h") { 697 open my $fh, "<", $file or die "Can't read $file: $!"; 698 while (<$fh>) { 699 700 # optional leading '_'. Return symbol in $1, and strip it from 701 # comment of line 702 if ( 703 m/^ \# \s* define \s+ ( PREGf_ ( \w+ ) ) \s+ 0x([0-9a-f]+)(?:\s*\/\*(.*)\*\/)?/xi 704 ) 705 { 706 chomp; 707 my $define= $1; 708 my $abbr= $2; 709 my $hex= $3; 710 my $comment= $4; 711 my $val= hex($hex); 712 $comment= $comment ? " - $comment" : ""; 713 714 printf $out qq(\t%-30s/* 0x%08x - %s%s */\n), qq("$abbr",), 715 $val, $define, $comment; 716 $REG_INTFLAGS_NAME_SIZE++; 717 } 718 } 719 } 720 721 print $out <<EOP; 722}; 723#endif /* DOINIT */ 724 725EOP 726 print $out <<EOQ; 727#ifdef DEBUGGING 728# define REG_INTFLAGS_NAME_SIZE $REG_INTFLAGS_NAME_SIZE 729#endif 730 731EOQ 732} 733 734sub print_process_flags { 735 my ($out)= @_; 736 737 print $out process_flags( 'V', 'varies', <<'EOC'); 738/* The following have no fixed length. U8 so we can do strchr() on it. */ 739EOC 740 741 print $out process_flags( 'S', 'simple', <<'EOC'); 742 743/* The following always have a length of 1. U8 we can do strchr() on it. */ 744/* (Note that length 1 means "one character" under UTF8, not "one octet".) */ 745EOC 746 747} 748 749sub do_perldebguts { 750 my $guts= open_new( 'pod/perldebguts.pod', '>' ); 751 752 my $node; 753 my $code; 754 my $name_fmt= '<' x ( $longest_name_length - 1 ); 755 my $descr_fmt= '<' x ( 58 - $longest_name_length ); 756 eval <<EOD or die $@; 757format GuTS = 758 ^*~~ 759 \$node->{pod_comment} 760 ^$name_fmt ^<<<<<<<<< ^$descr_fmt~~ 761 \$node->{name}, \$code, defined \$node->{comment} ? \$node->{comment} : '' 762. 7631; 764EOD 765 766 my $old_fh= select($guts); 767 $~= "GuTS"; 768 769 open my $oldguts, '<', 'pod/perldebguts.pod' 770 or die "$0 cannot open pod/perldebguts.pod for reading: $!"; 771 while (<$oldguts>) { 772 print; 773 last if /=for regcomp.pl begin/; 774 } 775 776 print <<'END_OF_DESCR'; 777 778 # TYPE arg-description [regnode-struct-suffix] [longjump-len] DESCRIPTION 779END_OF_DESCR 780 for my $n (@ops) { 781 $node= $n; 782 $code= "$node->{code} " . ( $node->{suffix} || "" ); 783 $code .= " $node->{longj}" if $node->{longj}; 784 if ( $node->{pod_comment} ||= "" ) { 785 786 # Trim multiple blanks 787 $node->{pod_comment} =~ s/^\n\n+/\n/; 788 $node->{pod_comment} =~ s/\n\n+$/\n\n/; 789 } 790 write; 791 } 792 print "\n"; 793 794 while (<$oldguts>) { 795 last if /=for regcomp.pl end/; 796 } 797 do { print } while <$oldguts>; #win32 can't unlink an open FH 798 close $oldguts or die "Error closing pod/perldebguts.pod: $!"; 799 select $old_fh; 800 close_and_rename($guts); 801} 802 803my $confine_to_core = 'defined(PERL_CORE) || defined(PERL_EXT_RE_BUILD)'; 804read_definition("regcomp.sym"); 805my $out= open_new( 'regnodes.h', '>', 806 { by => 'regen/regcomp.pl', from => 'regcomp.sym' } ); 807print $out "#if $confine_to_core\n\n"; 808print_state_defs($out); 809print_regkind($out); 810wrap_ifdef_print( 811 $out, 812 "REG_COMP_C", 813 \&print_regarglen, 814 \&print_reg_off_by_arg 815); 816print_reg_name($out); 817print_reg_extflags_name($out); 818print_reg_intflags_name($out); 819print_process_flags($out); 820print_process_EXACTish($out); 821print $out "\n#endif /* $confine_to_core */\n"; 822read_only_bottom_close_and_rename($out); 823 824do_perldebguts(); 825